From 37ff7062874b72bafa99afe661f42653565ed511 Mon Sep 17 00:00:00 2001 From: Mitsuo Tokumori Date: Wed, 11 Sep 2024 04:25:38 +0900 Subject: RESTRUCTURE. Replicate relative paths in public/ --- public/.local/bin/ocrthis.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 public/.local/bin/ocrthis.sh (limited to 'public/.local/bin/ocrthis.sh') diff --git a/public/.local/bin/ocrthis.sh b/public/.local/bin/ocrthis.sh new file mode 100755 index 0000000..3010a6a --- /dev/null +++ b/public/.local/bin/ocrthis.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Creates and OCR PDF out of an image +# (OCR limited to printed characters (handwriting or photograph OCR is bad, if +# any) +# +# Alternatively use `tesseract FILE text` + +if [ $# -ne 1 ]; then + echo "Usage: $(basename "$0") input_file" + # (there is `basename` and `dirname`) + exit 1 +fi + +b="$(basename "$1")" +convert "$1" "${b}.pdf" +# TODO: some contrast enhancement step would help. If text has low contrast +# with background (e.g., blue on black, green on black), then OCR fails. +ocrmypdf "${b}.pdf" "${b}.ocr.pdf" +mv -f "${b}.ocr.pdf" "${b}.pdf" -- cgit v1.2.3