summaryrefslogtreecommitdiffstats
path: root/public/.local/bin/ocrthis.sh
diff options
context:
space:
mode:
authorMitsuo Tokumori <[email protected]>2024-09-11 04:25:38 +0900
committerMitsuo Tokumori <[email protected]>2024-09-11 04:25:38 +0900
commit37ff7062874b72bafa99afe661f42653565ed511 (patch)
tree60372bf7fdb94c9034c7d1cc5a17cd7df526c4fc /public/.local/bin/ocrthis.sh
parentdca319b8d9d221f1aaf3a00ee457d202096c66ac (diff)
downloaddotfiles-37ff7062874b72bafa99afe661f42653565ed511.tar.gz
dotfiles-37ff7062874b72bafa99afe661f42653565ed511.tar.bz2
dotfiles-37ff7062874b72bafa99afe661f42653565ed511.zip
RESTRUCTURE. Replicate relative paths in public/
Diffstat (limited to 'public/.local/bin/ocrthis.sh')
-rwxr-xr-xpublic/.local/bin/ocrthis.sh20
1 files changed, 20 insertions, 0 deletions
diff --git a/public/.local/bin/ocrthis.sh b/public/.local/bin/ocrthis.sh
new file mode 100755
index 0000000..3010a6a
--- /dev/null
+++ b/public/.local/bin/ocrthis.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Creates and OCR PDF out of an image
+# (OCR limited to printed characters (handwriting or photograph OCR is bad, if
+# any)
+#
+# Alternatively use `tesseract FILE text`
+
+if [ $# -ne 1 ]; then
+ echo "Usage: $(basename "$0") input_file"
+ # (there is `basename` and `dirname`)
+ exit 1
+fi
+
+b="$(basename "$1")"
+convert "$1" "${b}.pdf"
+# TODO: some contrast enhancement step would help. If text has low contrast
+# with background (e.g., blue on black, green on black), then OCR fails.
+ocrmypdf "${b}.pdf" "${b}.ocr.pdf"
+mv -f "${b}.ocr.pdf" "${b}.pdf"