diff options
| author | Mitsuo Tokumori <[email protected]> | 2023-11-24 00:44:34 -0500 |
|---|---|---|
| committer | Mitsuo Tokumori <[email protected]> | 2023-11-24 00:44:34 -0500 |
| commit | dbf6639677796336bdb345efb70736d29c44f60f (patch) | |
| tree | e8427d394fe51939b43b28f9cb01c863451406df /local/bin/ocrthis.sh | |
| parent | 6e515b6902c537e742c2408783459e6a0ad10fa0 (diff) | |
| download | dotfiles-dbf6639677796336bdb345efb70736d29c44f60f.tar.gz dotfiles-dbf6639677796336bdb345efb70736d29c44f60f.tar.bz2 dotfiles-dbf6639677796336bdb345efb70736d29c44f60f.zip | |
Add my bash scripts
Diffstat (limited to 'local/bin/ocrthis.sh')
| -rwxr-xr-x | local/bin/ocrthis.sh | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/local/bin/ocrthis.sh b/local/bin/ocrthis.sh new file mode 100755 index 0000000..3010a6a --- /dev/null +++ b/local/bin/ocrthis.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Creates and OCR PDF out of an image +# (OCR limited to printed characters (handwriting or photograph OCR is bad, if +# any) +# +# Alternatively use `tesseract FILE text` + +if [ $# -ne 1 ]; then + echo "Usage: $(basename "$0") input_file" + # (there is `basename` and `dirname`) + exit 1 +fi + +b="$(basename "$1")" +convert "$1" "${b}.pdf" +# TODO: some contrast enhancement step would help. If text has low contrast +# with background (e.g., blue on black, green on black), then OCR fails. +ocrmypdf "${b}.pdf" "${b}.ocr.pdf" +mv -f "${b}.ocr.pdf" "${b}.pdf" |
