#!/bin/bash # Creates and OCR PDF out of an image # (OCR limited to printed characters (handwriting or photograph OCR is bad, if # any) # # Alternatively use `tesseract FILE text` if [ $# -ne 1 ]; then echo "Usage: $(basename "$0") input_file" # (there is `basename` and `dirname`) exit 1 fi b="$(basename "$1")" convert "$1" "${b}.pdf" # TODO: some contrast enhancement step would help. If text has low contrast # with background (e.g., blue on black, green on black), then OCR fails. ocrmypdf "${b}.pdf" "${b}.ocr.pdf" mv -f "${b}.ocr.pdf" "${b}.pdf"