diff options
Diffstat (limited to 'local/bin/ocrthis.sh')
| -rwxr-xr-x | local/bin/ocrthis.sh | 20 | 
1 files changed, 20 insertions, 0 deletions
| diff --git a/local/bin/ocrthis.sh b/local/bin/ocrthis.sh new file mode 100755 index 0000000..3010a6a --- /dev/null +++ b/local/bin/ocrthis.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Creates and OCR PDF out of an image +# (OCR limited to printed characters (handwriting or photograph OCR is bad, if +# any) +# +# Alternatively use `tesseract FILE text` + +if [ $# -ne 1 ]; then +    echo "Usage: $(basename "$0") input_file" +    # (there is `basename` and `dirname`) +    exit 1 +fi + +b="$(basename "$1")" +convert "$1" "${b}.pdf" +# TODO: some contrast enhancement step would help.  If text has low contrast +# with background (e.g., blue on black, green on black), then OCR fails. +ocrmypdf "${b}.pdf" "${b}.ocr.pdf" +mv -f "${b}.ocr.pdf" "${b}.pdf" | 
