#!/bin/bash

# Creates and OCR PDF out of an image
# (OCR limited to printed characters (handwriting or photograph OCR is bad, if
# any)
#
# Alternatively use `tesseract FILE text`

if [ $# -ne 1 ]; then
    echo "Usage: $(basename "$0") input_file"
    # (there is `basename` and `dirname`)
    exit 1
fi

b="$(basename "$1")"
convert "$1" "${b}.pdf"
# TODO: some contrast enhancement step would help.  If text has low contrast
# with background (e.g., blue on black, green on black), then OCR fails.
ocrmypdf "${b}.pdf" "${b}.ocr.pdf"
mv -f "${b}.ocr.pdf" "${b}.pdf"