blob: 3010a6ac9a9c376ddb0be3713c75fb1bdaad60cc (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
#!/bin/bash
# Creates and OCR PDF out of an image
# (OCR limited to printed characters (handwriting or photograph OCR is bad, if
# any)
#
# Alternatively use `tesseract FILE text`
if [ $# -ne 1 ]; then
echo "Usage: $(basename "$0") input_file"
# (there is `basename` and `dirname`)
exit 1
fi
b="$(basename "$1")"
convert "$1" "${b}.pdf"
# TODO: some contrast enhancement step would help. If text has low contrast
# with background (e.g., blue on black, green on black), then OCR fails.
ocrmypdf "${b}.pdf" "${b}.ocr.pdf"
mv -f "${b}.ocr.pdf" "${b}.pdf"
|