summaryrefslogtreecommitdiffstats
path: root/public/.local/bin/ocrthis.sh
diff options
context:
space:
mode:
Diffstat (limited to 'public/.local/bin/ocrthis.sh')
-rwxr-xr-xpublic/.local/bin/ocrthis.sh20
1 files changed, 20 insertions, 0 deletions
diff --git a/public/.local/bin/ocrthis.sh b/public/.local/bin/ocrthis.sh
new file mode 100755
index 0000000..3010a6a
--- /dev/null
+++ b/public/.local/bin/ocrthis.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Creates and OCR PDF out of an image
+# (OCR limited to printed characters (handwriting or photograph OCR is bad, if
+# any)
+#
+# Alternatively use `tesseract FILE text`
+
+if [ $# -ne 1 ]; then
+ echo "Usage: $(basename "$0") input_file"
+ # (there is `basename` and `dirname`)
+ exit 1
+fi
+
+b="$(basename "$1")"
+convert "$1" "${b}.pdf"
+# TODO: some contrast enhancement step would help. If text has low contrast
+# with background (e.g., blue on black, green on black), then OCR fails.
+ocrmypdf "${b}.pdf" "${b}.ocr.pdf"
+mv -f "${b}.ocr.pdf" "${b}.pdf"