diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/scan b/scan index 8aa052d..6a1d9a7 100755 --- a/scan +++ b/scan @@ -2,16 +2,11 @@ SCANLINE=/usr/local/bin/scanline -# ANSI stuff -BOLD="\033[1m" -PLAIN="\033[0m" -UNDERLINE="\033[4m" -RED="\033[31m" -GREEN="\033[32m" -BLUE="\033[34m" -CYAN="\033[36m" -LINK="$BLUE$UNDERLINE" - +. ${HOME}/.bashtools/bashtools.sh +if [[ -z $HAVE_BASHTOOLS ]]; then + echo "ERROR: bashtools not installed, download from https://git.nethack.net/rob/bashtools" >/dev/stderr + exit 1 +fi function autotags() { local file idx @@ -25,32 +20,12 @@ function autotags() { else TAGS="$TAGS ${ADDTAG[$idx]}" fi - action "* Inferred tag '${BOLD}${ADDTAG[$idx]}${PLAIN}${CYAN}' from filename." + inform "* Inferred tag '^b${ADDTAG[$idx]}^p' from filename." fi idx=$((idx+1)) done } -function cecho() { - local COL - COL="$1" - shift - echo -en "$COL" - echo -e "$*${PLAIN}" -} - -function info() { - cecho "$BLUE" "$*" -} - -function action() { - cecho "$CYAN" "$*" -} - -function error() { - cecho "$RED" "ERROR: $*" >/dev/stderr -} - function mount_local() { # $1=mountpoint local mydir mydir=$2 @@ -72,9 +47,9 @@ function mount_local() { # $1=mountpoint # check again... is_mounted "$mydir" if [ $? -ne 0 ]; then - error "$mydir could not be mounted." - sudo rmdir ${mydir} - return 1 + error "$mydir could not be mounted." + sudo rmdir ${mydir} + return 1 fi return 0 } @@ -94,9 +69,9 @@ function mount_samba() { # $1=share $2=mountpoint # check again... is_mounted "$mydir" if [ $? -ne 0 ]; then - error "$mydir could not be mounted." - sudo rmdir ${mydir} - return 1 + error "$mydir could not be mounted." + sudo rmdir ${mydir} + return 1 fi return 0 } @@ -113,32 +88,38 @@ function is_mounted() { } function usage() { - echo "usage: $0 [OPTIONS] filename tag1 [tag2] [tag3] ... [tagX]" - echo "" - echo " Scans to: \$PDFDIR/tag1//filename" - echo " Creates symlinks in:" - echo " \$PDFDIR/tag2//filename" - echo " \$PDFDIR/tag3//filename" - echo " ...etc..." - echo "" - echo " -d scan in duplex mode" - echo " -f xxx for given tag, use financial year in path rather than calendar year" - echo " -h show this text" - echo " -l list all available scanners" - echo " -m multi-page mode (prompts to load new pages each time)" - echo " -p preview document after scanning" - echo " -s xxx select scanner to use" - echo " -T Temporary mode - scan to /tmp/a.pdf" - echo " -v verbose mode" - echo "" + echo "usage: $0 [OPTIONS] filename tag1 [tag2] [tag3] ... [tagX]" + echo "" + echo " Scans to: \$PDFDIR/tag1//filename" + echo " Creates symlinks in:" + echo " \$PDFDIR/tag2//filename" + echo " \$PDFDIR/tag3//filename" + echo " ...etc..." + echo "" + echo " -d scan in duplex mode" + echo " -f xxx for given tag, use financial year in path rather than calendar year" + echo " -h show this text" + echo " -l list all available scanners" + echo " -m multi-page mode (prompts to load new pages each time)" + echo " -o use ocrmypdf to straighten document, extract text into pdf and use tags as keyword metadata" + echo " -O xxx specify path to ocrmypdf" + echo " -p preview document after scanning" + echo " -s xxx select scanner to use" + echo " -T Temporary mode - scan to /tmp/a.pdf" + echo " -v verbose mode" + echo "" } +OCRPROG_DEF="/opt/homebrew/bin/ocrmypdf" +OCRPROG="$OCRPROG_DEF" +P2TPROG="pdftotext" DUPLEXOPTS="" MODE="scan" DIR=${PDFDIR:-"~/Documents"} SHARE=${PDFSHARE:-"//rob@nas.nethack.net:/pdfs"} SCANNEROPTS="" MULTIPAGE=0 +OCR=0 PREVIEW=0 RCFILE=${HOME}/.scanrc TAGS="" @@ -147,9 +128,9 @@ TEST=0 FINYEARTAGS="" VERBOSE=0 -ALLARGS="$*" -if [[ -e $RCFILE ]]; then - info "[processing $RCFILE]" +ALLARGS="$@" +if [[ -e $RCFILE && $ALLARGS != *-h* ]]; then + notify "processing $RCFILE" while read -r f ; do if [[ $f =~ ^auto\ || $f =~ ^at\ || $f =~ ^autotag\ ]]; then @@ -162,155 +143,170 @@ if [[ -e $RCFILE ]]; then ALLARGS="$f $ALLARGS" fi done < <(egrep -v "(^#|^$)" $RCFILE) + ok fi - -ARGS=$(getopt cdf:hlmMps:Ttv $ALLARGS) -eval set -- $ARGS -for i do - case "$i" in - -v) - VERBOSE=1; shift 1; - info "[verbose mode]" - ;; - -d) - DUPLEXOPTS="-duplex"; shift 1; - info "[duplex mode]" - ;; - -h) - usage; exit 1; - ;; - -l) - ${SCANLINE} -list; exit 1; - ;; - -m) - MULTIPAGE=1; shift 1; - info "[multi-page mode enabled]" - ;; - -M) - shift 1; - echo -ne "${CYAN}Manually mounting ${DIR}...$PLAIN" - mount_samba "${SHARE}" "${DIR}" - if [ $? -ne 0 ]; then - cecho $RED "failed" - exit 1 - fi - cecho $GREEN "done" - exit 0 - ;; - -p) - PREVIEW=1; shift 1; - info "[preview enabled]" - ;; - -s) - SCANNEROPTS="-scanner \"$2\""; shift 2; - ;; - -T) - info "[temp mode]" - TEMP=1; shift 1; - ;; - -t) - info "[test mode]" - TEST=1; shift 1; - ;; - -f) - info "[tag ${BOLD}$2${PLAIN}${BLUE} will use financial year paths]" - FINYEARTAGS="$FINYEARTAGS $2" - shift 2 - ;; - --) - shift - ;; - esac +validargs="cdf:hlmMops:Ttv" +while getopts "$validargs" i $ALLARGS; do + case "$i" in + v) + VERBOSE=1; + info "verbose mode" + ;; + d) + DUPLEXOPTS="-duplex"; + info "duplex mode" + ;; + h) + usage; exit 1; + ;; + l) + ${SCANLINE} -list; exit 1; + ;; + m) + MULTIPAGE=1; + info "multi-page mode enabled" + ;; + M) + inform "${CYAN}Manually mounting ^b${DIR}^p, enter password if prompted" + mount_samba "${SHARE}" "${DIR}" + if [ $? -ne 0 ]; then + error "Mount failed" + exit 1 + fi + inform "Mount complete" + exit 0 + ;; + o) + OCR=1; + ;; + O) + OCRPROG="$OPTARG" + ;; + p) + PREVIEW=1; + info "preview enabled" + ;; + s) + SCANNEROPTS="-scanner \"$OPTARG\""; + ;; + T) + info "temp mode" + TEMP=1; + ;; + t) + info "test mode" + TEST=1; + ;; + -f) + info "tag ^b$OPTARG^p will use financial year paths" + FINYEARTAGS="$FINYEARTAGS $OPTARG" + ;; + *) + error "invalid argument: $i"; + usage; + ;; + esac done +shift $((OPTIND - 1)) + +if [[ $OCR -eq 1 ]]; then + if [[ -x $OCRPROG ]]; then + info "OCR mode enabled" + OCR=1; + else + wtext="OCR requested but ^b$OCRPROG^p not found." + if [[ $OCRPROG == $OCRPROG_DEF ]]; then + wtext="${wtext} Use ^b-O^p to specify alternate binary." + fi + warn "$wtext" + fi +fi if [[ $TEMP -eq 1 ]]; then - DIR=/tmp - FILENAME=a + DIR=/tmp + FILENAME=a else - if [ $# -lt 1 ]; then - usage - exit 1 - fi + if [ $# -lt 1 ]; then + usage + exit 1 + fi - # first arg is filename, rest are tags - FILENAME=$1 - shift 1 + # first arg is filename, rest are tags + FILENAME=$1 + shift 1 - - autotags "$FILENAME" # determine tags from filename - while [[ $# -ge 1 ]]; do - action "* Got tag '${BOLD}$1${PLAIN}${CYAN}' on command line." - if [[ -z $TAGS ]]; then - TAGS="$1" - else - TAGS="$TAGS $1" - fi - shift 1 - done + autotags "$FILENAME" # determine tags from filename + while [[ $# -ge 1 ]]; do + inform "Got tag '^b$1^p' on command line." if [[ -z $TAGS ]]; then - error "No tags specified or inferred from filename." - exit 1 - fi - - # This will be the directory which scanline writes the pdf to - FIRSTTAG=`echo $TAGS | awk '{ print $1 }'` - - # Remove duplicate tags - TAGS=`echo $TAGS | tr ' ' '\n' | sort -u | tr '\n' ' ' | sed -e 's/ $//'` - - if [[ $TEST -eq 1 ]]; then - echo -e "Tags found: ${GREEN}${TAGS}${PLAIN}" - echo -e "PDF will be written to ${BOLD}${DIR}/${GREEN}${FIRSTTAG}${PLAIN}${BOLD}/${FILENAME}.pdf${PLAIN}." - echo -e "Symlinks will be created in:" - for t in $TAGS; do - if [[ $t != $FIRSTTAG ]]; then - echo -e " - ${DIR}/${GREEN}${t}${PLAIN}/${FILENAME}.pdf${PLAIN}" - fi - done - exit 0 + TAGS="$1" + else + TAGS="$TAGS $1" fi + shift 1 + done + if [[ -z $TAGS ]]; then + error "No tags specified or inferred from filename." + exit 1 + fi + + # This will be the directory which scanline writes the pdf to + FIRSTTAG=`echo $TAGS | awk '{ print $1 }'` + + # Remove duplicate tags + TAGS=`echo $TAGS | tr ' ' '\n' | sort -u | tr '\n' ' ' | sed -e 's/ $//'` + + if [[ $TEST -eq 1 ]]; then + inform "Tags found: ${GREEN}${BOLD}${TAGS}^p" + inform "PDF will be written to ^b${DIR}/${GREEN}${BOLD}${FIRSTTAG}^p^b/${FILENAME}.pdf." + inform "Symlinks will be created in:" + for t in $TAGS; do + if [[ $t != $FIRSTTAG ]]; then + inform " - ${DIR}/${GREEN}${t}^p${FILENAME}.pdf" + fi + done + exit 0 + fi fi if [[ $TEMP -eq 0 ]]; then - # Check that target pdfs share is mounted - if [[ $DIR =~ mnt|pdf ]]; then - df -h ${DIR} 2>&1 | grep @ >/dev/null 2>&1 - is_mounted "$DIR" - if [ $? -ne 0 ]; then - info "$DIR not mounted - trying to mount it..." - #mount_samba "${SHARE}" "${DIR}" - mount_local "${DIR}" - if [ $? -ne 0 ]; then - exit 1 - fi - fi - fi - - # Check that we didn't mix up the filename and tags - if ! [ -d ${DIR}/${FIRSTTAG} ] ; then - error "$DIR/$FIRSTTAG doesn't exist, did you mix up filename and tags?" - exit 1 - fi - if [[ $FILENAME == */* ]]; then - error "Filename ${BOLD}$FILENAME${PLAIN}${RED} contains a slash - did you mix up filename and tags?" - exit 1 + # Check that target pdfs share is mounted + if [[ $DIR =~ mnt|pdf ]]; then + df -h ${DIR} 2>&1 | grep @ >/dev/null 2>&1 + is_mounted "$DIR" + if [ $? -ne 0 ]; then + info "$DIR not mounted - trying to mount it..." + #mount_samba "${SHARE}" "${DIR}" + mount_local "${DIR}" + if [ $? -ne 0 ]; then + exit 1 + fi fi + fi - if [[ $FILENAME == *,* ]]; then - error "Filename ${BOLD}$FILENAME${PLAIN}${RED} is illegal - commas not allowed." - exit 1 - fi + # Check that we didn't mix up the filename and tags + if ! [ -d ${DIR}/${FIRSTTAG} ] ; then + error "$DIR/$FIRSTTAG doesn't exist, did you mix up filename and tags?" + exit 1 + fi + if [[ $FILENAME == */* ]]; then + error "Filename ^b$FILENAME^p contains a slash - did you mix up filename and tags?" + exit 1 + fi + + if [[ $FILENAME == *,* ]]; then + error "Filename ^b$FILENAME^p is illegal - commas not allowed." + exit 1 + fi fi # do the scan - capture output #${SCANLINE} -dir "${DIR}" -name "${FILENAME}" "$DUPLEXOPTS" "$SCANNEROPTS" $* - #exec 5>&1 #OUTPUT=$( ${SCANLINE} -dir "${DIR}" -name "${FILENAME}" "$DUPLEXOPTS" "$SCANNEROPTS" $* 2>&1 |tee /dev/fd/5; exit ${PIPESTATUS[0]}) #rv=$? - finished=0 NUMPAGES=0 TEMPFILE=`mktemp /tmp/scan.XXXXXX` @@ -327,22 +323,21 @@ while [[ $finished -eq 0 ]]; do fi # scan new file - if [[ $VERBOSE -eq 1 ]]; then info "will run: ${SCANLINE} -verbose -dir \"${DIR}\" -name \"${FILENAME}\" \"$DUPLEXOPTS\" \"$SCANNEROPTS\" $TAGS 2>&1" fi - echo -e -n "${CYAN}Scanning..." + notify "Scanning..." OUTPUT=$( ${SCANLINE} -verbose -dir "${DIR}" -name "${FILENAME}" "$DUPLEXOPTS" "$SCANNEROPTS" $TAGS 2>&1) rv=$? if [ $rv -eq 0 ]; then - cecho $GREEN "done" + ok "done" else - cecho $RED "failed" - echo "" - echo -e "${BOLD}${UNDERLINE}Full output:${PLAIN}" - echo "$OUTPUT" | sed -e 's/^/ /' - exit 1 + fail + echo "" + csecho "$RED" "^bFull output:^p" + csecho -e "$RED" "$OUTPUT" | sed -e 's/^/ /' + exit 1 fi thisnpages=`printf %d $( echo "$OUTPUT" | egrep '^(Scan complete|didScanTo)' | wc -l )` NUMPAGES=$(( $NUMPAGES + $thisnpages )) @@ -358,7 +353,7 @@ while [[ $finished -eq 0 ]]; do else # multi-line mode - cecho $GREEN "Scanned + $thisnpages page(s) -> $NUMPAGES total." + csecho "$GREEN" "Scanned +^b$thisnpages^p page(s) -> ^b$NUMPAGES^p total." if [[ $gotexisting -eq 1 ]]; then # append newly-scanned page file on to rest of the pdf. pdftk ${TEMPFILE} ${PDFFILE} cat output ${TEMPFILE2} @@ -367,8 +362,7 @@ while [[ $finished -eq 0 ]]; do rm -rf ${TEMPFILE} fi - echo -en "Insert next pages and press ${BOLD}ENTER${PLAIN}, or type '${BOLD}n${PLAIN}':${PLAIN}" - read -p " " yn + ask "Insert next pages and press ^bENTER^p, or type '^bn^p':" "y" yn if [[ $yn == "n" ]]; then finished=1 fi @@ -391,7 +385,7 @@ if [[ $curmonth -ge 7 ]]; then for t in $FINYEARTAGS; do if [[ $x == */${t}/* ]]; then newname=`echo "$x" | sed -e "s,/${t}/$curyear/,/${t}/$nextyear/,"` - action "* Adjusting path for financial year: $x -> ${BOLD}$newname${PLAIN}" + inform "Adjusting path for financial year: $x -> ^b$newname^p" mv -f "$x" "$newname" PDFFILE=$(echo "$PDFFILE" | sed -e "s,${x},${newname},") fi @@ -401,29 +395,46 @@ fi PREVIEWFILE="" if [[ $PDFFILE == *" "* || $PDFFILE == *$'\n'* ]]; then - echo -e -n "${GREEN}Scanned $NUMPAGES page(s) to " + itext="Scanned $NUMPAGES page(s) to " count=1 for x in $PDFFILE; do if [[ $count -eq 1 ]]; then PREVIEWFILE="$x" - echo -e -n "${BOLD}${GREEN}${x}${PLAIN}" + itext="${itext}^b${x}^p" else - echo -e -n "${GREEN} + ${BOLD}${x}${PLAIN}" + itext="${itext} + ^b${x}^p" fi count=$((count + 1)) done - echo -e "${PLAIN}" + inform "$itext" else PREVIEWFILE="$PDFFILE" - cecho $GREEN "Scanned $NUMPAGES page(s) to ${BOLD}${PDFFILE}${PLAIN}" + inform "Scanned $NUMPAGES page(s) to ^b${PDFFILE}^p" +fi + +if [[ $OCR -eq 1 ]]; then + OCROPTS="--output-type pdfa -r -d -q --author scan.sh --keywords '$TAGS'" + + $OCRPROG $OCROPTS "${PREVIEWFILE}" "${PREVIEWFILE}" >/dev/null 2>&1 + if [[ $? -eq 0 ]]; then + if [[ -x $P2TPROG ]]; then + words=$($P2TPROG "${PREVIEWFILE}" /dev/stdout | wc -c) + [[ $words -eq 1 ]] && ess="" || ess="s" + inform "OCR complete. Found ^b${word}^p word${ess}" + else + inform "OCR complete. Install pdftotext for word counts." + fi + else + error "OCR failed" + fi fi # Put the full path onto the copy buffer echo -n "${PREVIEWFILE}" | pbcopy if [ $PREVIEW -eq 1 ]; then - action "Showing preview..." - open "${PREVIEWFILE}" + inform "Showing preview..." + open "${PREVIEWFILE}" fi