Initial checkin

This commit is contained in:
Rob Pearce 2021-06-03 19:13:43 +10:00
commit c24bf0c96a
2 changed files with 486 additions and 0 deletions

71
README.md Normal file
View File

@ -0,0 +1,71 @@
# Overview
Bash wrapper for [scanline](https://github.com/klep/scanline) to allow for fast document scanning.
# Features
- Automatically tag PDFs based on filename
- Duplex scanning
- "multipage" scanning to allow for scanner adjustment between pages (for example if pages are different sizes)
- Store documents in directories based on financial year insteaf of calendar year for defined tags
# Usage
rpearce@crom:scan$ ./scan
[processing /Users/rpearce/.scanrc]
[tag tax will use financial year paths]
[preview enabled]
usage: ./scan [OPTIONS] filename tag1 [tag2] [tag3] ... [tagX]
Scans to: $PDFDIR/tag1/<year>/filename
Creates symlinks in:
$PDFDIR/tag2/<year>/filename
$PDFDIR/tag3/<year>/filename
...etc...
-d scan in duplex mode
-f xxx for given tag, use financial year in path rather than calendar year
-h show this text
-l list all available scanners
-m multi-page mode (prompts to load new pages each time)
-p preview document after scanning
-s xxx select scanner to use
-T Temporary mode - scan to /tmp/a.pdf
# Examples
## .scanrc example
bash$ cat ~/.scanrc
# Always preview documents
-p
# If "tax" tag is found, file based on financial year
-f tax
# AutoTag based on certain filenames
at nrma car/mazda
at rego car/mazda
at super super
at homeloan house
at visa bank
at youthsaver bank
at rcpt receipts
at receipt receipts
at elec utilities
at water utilities
at gas utilities
at ticket tickets
at quote quotes
## Usage examples
# Scan a double sided document
bash$ ./scan -d my_duplex_bank_statement.pdf bank financial
# Make a combined PDF of an A4 document and a small form factor reciept
bash$ ./scan -m combined.pdf misc
# Scan something without filing it to $PDFDIR/<tag>/<year>
bash$ ./scan -T
bash$ mv /tmp/a.pdf wherever

415
scan Executable file
View File

@ -0,0 +1,415 @@
#!/bin/bash
SCANLINE=/usr/local/bin/scanline
# ANSI stuff
BOLD="\033[1m"
PLAIN="\033[0m"
UNDERLINE="\033[4m"
RED="\033[31m"
GREEN="\033[32m"
BLUE="\033[34m"
CYAN="\033[36m"
LINK="$BLUE$UNDERLINE"
function autotags() {
local file idx
file="$1"
idx=0
while [ $idx -lt $NAUTOTAGS ]; do
if [[ $file == *${LOOKFOR[$idx]}* ]]; then
if [[ -z $TAGS ]]; then
TAGS="${ADDTAG[$idx]}"
else
TAGS="$TAGS ${ADDTAG[$idx]}"
fi
action "* Inferred tag '${BOLD}${ADDTAG[$idx]}${PLAIN}${CYAN}' from filename."
fi
idx=$((idx+1))
done
}
function cecho() {
local COL
COL="$1"
shift
echo -en "$COL"
echo -e "$*${PLAIN}"
}
function info() {
cecho "$BLUE" "$*"
}
function action() {
cecho "$CYAN" "$*"
}
function error() {
cecho "$RED" "ERROR: $*" >/dev/stderr
}
function mount_local() { # $1=mountpoint
local mydir
mydir=$2
# try an ls on case we have automount set up
ls ${mydir}/ >/dev/null 2>&1
is_mounted "$mydir"
if [ $? -eq 0 ]; then
return 0
fi
# otherwise, make sure it exists first
if [[ ! -d ${mydir} ]]; then
sudo mkdir ${mydir}
sudo chown $USER:staff ${mydir}
fi
mount ${mydir}
# check again...
is_mounted "$mydir"
if [ $? -ne 0 ]; then
error "$mydir could not be mounted."
sudo rmdir ${mydir}
return 1
fi
return 0
}
function mount_samba() { # $1=share $2=mountpoint
local myshare mydir
myshare=$1
mydir=$2
sudo mkdir ${mydir}
sudo chown $USER:staff ${mydir}
# Make password prompt be cyan
echo -e -n "$CYAN"
mount_smbfs ${myshare} ${mydir}
echo -e -n "$PLAIN"
# check again...
is_mounted "$mydir"
if [ $? -ne 0 ]; then
error "$mydir could not be mounted."
sudo rmdir ${mydir}
return 1
fi
return 0
}
function is_mounted() {
local dir a b
dir="$1"
a=`stat -f %d "${dir}/."`
b=`stat -f %d "${dir}/.."`
if [[ $a == $b ]]; then
return 1
fi
return 0
}
function usage() {
echo "usage: $0 [OPTIONS] filename tag1 [tag2] [tag3] ... [tagX]"
echo ""
echo " Scans to: \$PDFDIR/tag1/<year>/filename"
echo " Creates symlinks in:"
echo " \$PDFDIR/tag2/<year>/filename"
echo " \$PDFDIR/tag3/<year>/filename"
echo " ...etc..."
echo ""
echo " -d scan in duplex mode"
echo " -f xxx for given tag, use financial year in path rather than calendar year"
echo " -h show this text"
echo " -l list all available scanners"
echo " -m multi-page mode (prompts to load new pages each time)"
echo " -p preview document after scanning"
echo " -s xxx select scanner to use"
echo " -T Temporary mode - scan to /tmp/a.pdf"
echo ""
}
DUPLEXOPTS=""
MODE="scan"
DIR=${PDFDIR:-"~/Documents"}
SHARE=${PDFSHARE:-"//rob@nas.nethack.net:/pdfs"}
SCANNEROPTS=""
MULTIPAGE=0
PREVIEW=0
RCFILE=${HOME}/.scanrc
TAGS=""
NAUTOTAGS=0
TEST=0
FINYEARTAGS=""
ALLARGS="$*"
if [[ -e $RCFILE ]]; then
info "[processing $RCFILE]"
while read -r f ; do
if [[ $f =~ ^auto\ || $f =~ ^at\ || $f =~ ^autotag\ ]]; then
thislookfor=`echo "$f" | awk '{ print $2 }'`
thisaddtag=`echo "$f" | awk '{ print $3 }'`
LOOKFOR[$NAUTOTAGS]="$thislookfor"
ADDTAG[$NAUTOTAGS]="$thisaddtag"
NAUTOTAGS=$(($NAUTOTAGS + 1))
else
ALLARGS="$f $ALLARGS"
fi
done < <(egrep -v "(^#|^$)" $RCFILE)
fi
ARGS=$(getopt cdf:hlmMps:Tt $ALLARGS)
eval set -- $ARGS
for i do
case "$i" in
-d)
DUPLEXOPTS="-duplex"; shift 1;
info "[duplex mode]"
;;
-h)
usage; exit 1;
;;
-l)
${SCANLINE} -list; exit 1;
;;
-m)
MULTIPAGE=1; shift 1;
info "[multi-page mode enabled]"
;;
-M)
shift 1;
echo -ne "${CYAN}Manually mounting ${DIR}...$PLAIN"
mount_samba "${SHARE}" "${DIR}"
if [ $? -ne 0 ]; then
cecho $RED "failed"
exit 1
fi
cecho $GREEN "done"
exit 0
;;
-p)
PREVIEW=1; shift 1;
info "[preview enabled]"
;;
-s)
SCANNEROPTS="-scanner \"$2\""; shift 2;
;;
-T)
info "[temp mode]"
TEMP=1; shift 1;
;;
-t)
info "[test mode]"
TEST=1; shift 1;
;;
-f)
info "[tag ${BOLD}$2${PLAIN}${BLUE} will use financial year paths]"
FINYEARTAGS="$FINYEARTAGS $2"
shift 2
;;
--)
shift
;;
esac
done
if [[ $TEMP -eq 1 ]]; then
DIR=/tmp
FILENAME=a
else
if [ $# -lt 1 ]; then
usage
exit 1
fi
# first arg is filename, rest are tags
FILENAME=$1
shift 1
autotags "$FILENAME" # determine tags from filename
while [[ $# -ge 1 ]]; do
action "* Got tag '${BOLD}$1${PLAIN}${CYAN}' on command line."
if [[ -z $TAGS ]]; then
TAGS="$1"
else
TAGS="$TAGS $1"
fi
shift 1
done
if [[ -z $TAGS ]]; then
error "No tags specified or inferred from filename."
exit 1
fi
# This will be the directory which scanline writes the pdf to
FIRSTTAG=`echo $TAGS | awk '{ print $1 }'`
# Remove duplicate tags
TAGS=`echo $TAGS | tr ' ' '\n' | sort -u | tr '\n' ' ' | sed -e 's/ $//'`
if [[ $TEST -eq 1 ]]; then
echo -e "Tags found: ${GREEN}${TAGS}${PLAIN}"
echo -e "PDF will be written to ${BOLD}${DIR}/${GREEN}${FIRSTTAG}${PLAIN}${BOLD}/${FILENAME}.pdf${PLAIN}."
echo -e "Symlinks will be created in:"
for t in $TAGS; do
if [[ $t != $FIRSTTAG ]]; then
echo -e " - ${DIR}/${GREEN}${t}${PLAIN}/${FILENAME}.pdf${PLAIN}"
fi
done
exit 0
fi
fi
if [[ $TEMP -eq 0 ]]; then
# Check that target pdfs share is mounted
if [[ $DIR =~ mnt|pdf ]]; then
df -h ${DIR} 2>&1 | grep @ >/dev/null 2>&1
is_mounted "$DIR"
if [ $? -ne 0 ]; then
info "$DIR not mounted - trying to mount it..."
#mount_samba "${SHARE}" "${DIR}"
mount_local "${DIR}"
if [ $? -ne 0 ]; then
exit 1
fi
fi
fi
# Check that we didn't mix up the filename and tags
if ! [ -d ${DIR}/${FIRSTTAG} ] ; then
error "$DIR/$FIRSTTAG doesn't exist, did you mix up filename and tags?"
exit 1
fi
if [[ $FILENAME == */* ]]; then
error "Filename ${BOLD}$FILENAME${PLAIN}${RED} contains a slash - did you mix up filename and tags?"
exit 1
fi
if [[ $FILENAME == *,* ]]; then
error "Filename ${BOLD}$FILENAME${PLAIN}${RED} is illegal - commas not allowed."
exit 1
fi
fi
# do the scan - capture output
#${SCANLINE} -dir "${DIR}" -name "${FILENAME}" "$DUPLEXOPTS" "$SCANNEROPTS" $*
#exec 5>&1
#OUTPUT=$( ${SCANLINE} -dir "${DIR}" -name "${FILENAME}" "$DUPLEXOPTS" "$SCANNEROPTS" $* 2>&1 |tee /dev/fd/5; exit ${PIPESTATUS[0]})
#rv=$?
finished=0
NUMPAGES=0
TEMPFILE=`mktemp /tmp/scan.XXXXXX`
TEMPFILE2=`mktemp /tmp/scan.XXXXXX`
PDFFILE=""
while [[ $finished -eq 0 ]]; do
gotexisting=0
if [[ $MULTIPAGE -eq 1 ]]; then
if ! [[ -z $PDFFILE ]]; then
# move already scanned pages out of the way
mv ${PDFFILE} ${TEMPFILE} 2>/dev/null
gotexisting=1
fi
fi
# scan new file
echo -e -n "${CYAN}Scanning..."
OUTPUT=$( ${SCANLINE} -verbose -dir "${DIR}" -name "${FILENAME}" "$DUPLEXOPTS" "$SCANNEROPTS" $TAGS 2>&1)
rv=$?
if [ $rv -eq 0 ]; then
cecho $GREEN "done"
else
cecho $RED "failed"
echo ""
echo -e "${BOLD}${UNDERLINE}Full output:${PLAIN}"
echo "$OUTPUT" | sed -e 's/^/ /'
exit 1
fi
thisnpages=`printf %d $( echo "$OUTPUT" | grep 'Scan complete' | wc -l )`
NUMPAGES=$(( $NUMPAGES + $thisnpages ))
PDFFILE=`echo "$OUTPUT" | grep to: | awk '{ print $NF }'`
if [[ $MULTIPAGE -eq 0 ]]; then
finished=1
else
# multi-line mode
cecho $GREEN "Scanned + $thisnpages page(s) -> $NUMPAGES total."
if [[ $gotexisting -eq 1 ]]; then
# append newly-scanned page file on to rest of the pdf.
pdftk ${TEMPFILE} ${PDFFILE} cat output ${TEMPFILE2}
mv ${TEMPFILE2} "${PDFFILE}"
# remove newly-scanned page file
rm -rf ${TEMPFILE}
fi
echo -en "Insert next pages and press ${BOLD}ENTER${PLAIN}, or type '${BOLD}n${PLAIN}':${PLAIN}"
read -p " " yn
if [[ $yn == "n" ]]; then
finished=1
fi
fi
done
if ! [[ -z $TEMPFILE ]]; then
if [[ -e $TEMPFILE ]]; then
# should never happen...
rm -f ${TEMPFILE}
fi
fi
# Adjust year for financial year if required.
curyear=$(date +%Y)
curmonth=$(date +%m | bc)
nextyear=$((curyear + 1))
if [[ $curmonth -ge 7 ]]; then
for x in $PDFFILE; do
for t in $FINYEARTAGS; do
if [[ $x == */${t}/* ]]; then
newname=`echo "$x" | sed -e "s,/${t}/$curyear/,/${t}/$nextyear/,"`
action "* Adjusting path for financial year: $x -> ${BOLD}$newname${PLAIN}"
mv -f "$x" "$newname"
PDFFILE=$(echo "$PDFFILE" | sed -e "s,${x},${newname},")
fi
done
done
fi
PREVIEWFILE=""
if [[ $PDFFILE == *" "* || $PDFFILE == *$'\n'* ]]; then
echo -e -n "${GREEN}Scanned $NUMPAGES page(s) to "
count=1
for x in $PDFFILE; do
if [[ $count -eq 1 ]]; then
PREVIEWFILE="$x"
echo -e -n "${BOLD}${GREEN}${x}${PLAIN}"
else
echo -e -n "${GREEN} + ${BOLD}${x}${PLAIN}"
fi
count=$((count + 1))
done
echo -e "${PLAIN}"
else
PREVIEWFILE="$PDFFILE"
cecho $GREEN "Scanned $NUMPAGES page(s) to ${BOLD}${PDFFILE}${PLAIN}"
fi
# Put the full path onto the copy buffer
echo -n "${PREVIEWFILE}" | pbcopy
if [ $PREVIEW -eq 1 ]; then
action "Showing preview..."
open "${PREVIEWFILE}"
fi