Initial checkin
This commit is contained in:
commit
c24bf0c96a
|
@ -0,0 +1,71 @@
|
|||
# Overview
|
||||
|
||||
Bash wrapper for [scanline](https://github.com/klep/scanline) to allow for fast document scanning.
|
||||
|
||||
# Features
|
||||
|
||||
- Automatically tag PDFs based on filename
|
||||
- Duplex scanning
|
||||
- "multipage" scanning to allow for scanner adjustment between pages (for example if pages are different sizes)
|
||||
- Store documents in directories based on financial year insteaf of calendar year for defined tags
|
||||
|
||||
|
||||
# Usage
|
||||
|
||||
rpearce@crom:scan$ ./scan
|
||||
[processing /Users/rpearce/.scanrc]
|
||||
[tag tax will use financial year paths]
|
||||
[preview enabled]
|
||||
usage: ./scan [OPTIONS] filename tag1 [tag2] [tag3] ... [tagX]
|
||||
|
||||
Scans to: $PDFDIR/tag1/<year>/filename
|
||||
Creates symlinks in:
|
||||
$PDFDIR/tag2/<year>/filename
|
||||
$PDFDIR/tag3/<year>/filename
|
||||
...etc...
|
||||
|
||||
-d scan in duplex mode
|
||||
-f xxx for given tag, use financial year in path rather than calendar year
|
||||
-h show this text
|
||||
-l list all available scanners
|
||||
-m multi-page mode (prompts to load new pages each time)
|
||||
-p preview document after scanning
|
||||
-s xxx select scanner to use
|
||||
-T Temporary mode - scan to /tmp/a.pdf
|
||||
|
||||
|
||||
# Examples
|
||||
|
||||
## .scanrc example
|
||||
bash$ cat ~/.scanrc
|
||||
# Always preview documents
|
||||
-p
|
||||
# If "tax" tag is found, file based on financial year
|
||||
-f tax
|
||||
# AutoTag based on certain filenames
|
||||
at nrma car/mazda
|
||||
at rego car/mazda
|
||||
at super super
|
||||
at homeloan house
|
||||
at visa bank
|
||||
at youthsaver bank
|
||||
at rcpt receipts
|
||||
at receipt receipts
|
||||
at elec utilities
|
||||
at water utilities
|
||||
at gas utilities
|
||||
at ticket tickets
|
||||
at quote quotes
|
||||
|
||||
## Usage examples
|
||||
|
||||
# Scan a double sided document
|
||||
bash$ ./scan -d my_duplex_bank_statement.pdf bank financial
|
||||
|
||||
# Make a combined PDF of an A4 document and a small form factor reciept
|
||||
bash$ ./scan -m combined.pdf misc
|
||||
|
||||
# Scan something without filing it to $PDFDIR/<tag>/<year>
|
||||
bash$ ./scan -T
|
||||
bash$ mv /tmp/a.pdf wherever
|
||||
|
|
@ -0,0 +1,415 @@
|
|||
#!/bin/bash
|
||||
|
||||
SCANLINE=/usr/local/bin/scanline
|
||||
|
||||
# ANSI stuff
|
||||
BOLD="\033[1m"
|
||||
PLAIN="\033[0m"
|
||||
UNDERLINE="\033[4m"
|
||||
RED="\033[31m"
|
||||
GREEN="\033[32m"
|
||||
BLUE="\033[34m"
|
||||
CYAN="\033[36m"
|
||||
LINK="$BLUE$UNDERLINE"
|
||||
|
||||
|
||||
function autotags() {
|
||||
local file idx
|
||||
file="$1"
|
||||
|
||||
idx=0
|
||||
while [ $idx -lt $NAUTOTAGS ]; do
|
||||
if [[ $file == *${LOOKFOR[$idx]}* ]]; then
|
||||
if [[ -z $TAGS ]]; then
|
||||
TAGS="${ADDTAG[$idx]}"
|
||||
else
|
||||
TAGS="$TAGS ${ADDTAG[$idx]}"
|
||||
fi
|
||||
action "* Inferred tag '${BOLD}${ADDTAG[$idx]}${PLAIN}${CYAN}' from filename."
|
||||
fi
|
||||
idx=$((idx+1))
|
||||
done
|
||||
}
|
||||
|
||||
function cecho() {
|
||||
local COL
|
||||
COL="$1"
|
||||
shift
|
||||
echo -en "$COL"
|
||||
echo -e "$*${PLAIN}"
|
||||
}
|
||||
|
||||
function info() {
|
||||
cecho "$BLUE" "$*"
|
||||
}
|
||||
|
||||
function action() {
|
||||
cecho "$CYAN" "$*"
|
||||
}
|
||||
|
||||
function error() {
|
||||
cecho "$RED" "ERROR: $*" >/dev/stderr
|
||||
}
|
||||
|
||||
function mount_local() { # $1=mountpoint
|
||||
local mydir
|
||||
mydir=$2
|
||||
|
||||
# try an ls on case we have automount set up
|
||||
ls ${mydir}/ >/dev/null 2>&1
|
||||
is_mounted "$mydir"
|
||||
if [ $? -eq 0 ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# otherwise, make sure it exists first
|
||||
if [[ ! -d ${mydir} ]]; then
|
||||
sudo mkdir ${mydir}
|
||||
sudo chown $USER:staff ${mydir}
|
||||
fi
|
||||
mount ${mydir}
|
||||
|
||||
# check again...
|
||||
is_mounted "$mydir"
|
||||
if [ $? -ne 0 ]; then
|
||||
error "$mydir could not be mounted."
|
||||
sudo rmdir ${mydir}
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
function mount_samba() { # $1=share $2=mountpoint
|
||||
local myshare mydir
|
||||
myshare=$1
|
||||
mydir=$2
|
||||
|
||||
sudo mkdir ${mydir}
|
||||
sudo chown $USER:staff ${mydir}
|
||||
# Make password prompt be cyan
|
||||
echo -e -n "$CYAN"
|
||||
mount_smbfs ${myshare} ${mydir}
|
||||
echo -e -n "$PLAIN"
|
||||
|
||||
# check again...
|
||||
is_mounted "$mydir"
|
||||
if [ $? -ne 0 ]; then
|
||||
error "$mydir could not be mounted."
|
||||
sudo rmdir ${mydir}
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
function is_mounted() {
|
||||
local dir a b
|
||||
dir="$1"
|
||||
a=`stat -f %d "${dir}/."`
|
||||
b=`stat -f %d "${dir}/.."`
|
||||
if [[ $a == $b ]]; then
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
function usage() {
|
||||
echo "usage: $0 [OPTIONS] filename tag1 [tag2] [tag3] ... [tagX]"
|
||||
echo ""
|
||||
echo " Scans to: \$PDFDIR/tag1/<year>/filename"
|
||||
echo " Creates symlinks in:"
|
||||
echo " \$PDFDIR/tag2/<year>/filename"
|
||||
echo " \$PDFDIR/tag3/<year>/filename"
|
||||
echo " ...etc..."
|
||||
echo ""
|
||||
echo " -d scan in duplex mode"
|
||||
echo " -f xxx for given tag, use financial year in path rather than calendar year"
|
||||
echo " -h show this text"
|
||||
echo " -l list all available scanners"
|
||||
echo " -m multi-page mode (prompts to load new pages each time)"
|
||||
echo " -p preview document after scanning"
|
||||
echo " -s xxx select scanner to use"
|
||||
echo " -T Temporary mode - scan to /tmp/a.pdf"
|
||||
echo ""
|
||||
}
|
||||
|
||||
DUPLEXOPTS=""
|
||||
MODE="scan"
|
||||
DIR=${PDFDIR:-"~/Documents"}
|
||||
SHARE=${PDFSHARE:-"//rob@nas.nethack.net:/pdfs"}
|
||||
SCANNEROPTS=""
|
||||
MULTIPAGE=0
|
||||
PREVIEW=0
|
||||
RCFILE=${HOME}/.scanrc
|
||||
TAGS=""
|
||||
NAUTOTAGS=0
|
||||
TEST=0
|
||||
FINYEARTAGS=""
|
||||
|
||||
ALLARGS="$*"
|
||||
if [[ -e $RCFILE ]]; then
|
||||
info "[processing $RCFILE]"
|
||||
|
||||
while read -r f ; do
|
||||
if [[ $f =~ ^auto\ || $f =~ ^at\ || $f =~ ^autotag\ ]]; then
|
||||
thislookfor=`echo "$f" | awk '{ print $2 }'`
|
||||
thisaddtag=`echo "$f" | awk '{ print $3 }'`
|
||||
LOOKFOR[$NAUTOTAGS]="$thislookfor"
|
||||
ADDTAG[$NAUTOTAGS]="$thisaddtag"
|
||||
NAUTOTAGS=$(($NAUTOTAGS + 1))
|
||||
else
|
||||
ALLARGS="$f $ALLARGS"
|
||||
fi
|
||||
done < <(egrep -v "(^#|^$)" $RCFILE)
|
||||
fi
|
||||
|
||||
ARGS=$(getopt cdf:hlmMps:Tt $ALLARGS)
|
||||
eval set -- $ARGS
|
||||
for i do
|
||||
case "$i" in
|
||||
-d)
|
||||
DUPLEXOPTS="-duplex"; shift 1;
|
||||
info "[duplex mode]"
|
||||
;;
|
||||
-h)
|
||||
usage; exit 1;
|
||||
;;
|
||||
-l)
|
||||
${SCANLINE} -list; exit 1;
|
||||
;;
|
||||
-m)
|
||||
MULTIPAGE=1; shift 1;
|
||||
info "[multi-page mode enabled]"
|
||||
;;
|
||||
-M)
|
||||
shift 1;
|
||||
echo -ne "${CYAN}Manually mounting ${DIR}...$PLAIN"
|
||||
mount_samba "${SHARE}" "${DIR}"
|
||||
if [ $? -ne 0 ]; then
|
||||
cecho $RED "failed"
|
||||
exit 1
|
||||
fi
|
||||
cecho $GREEN "done"
|
||||
exit 0
|
||||
;;
|
||||
-p)
|
||||
PREVIEW=1; shift 1;
|
||||
info "[preview enabled]"
|
||||
;;
|
||||
-s)
|
||||
SCANNEROPTS="-scanner \"$2\""; shift 2;
|
||||
;;
|
||||
-T)
|
||||
info "[temp mode]"
|
||||
TEMP=1; shift 1;
|
||||
;;
|
||||
-t)
|
||||
info "[test mode]"
|
||||
TEST=1; shift 1;
|
||||
;;
|
||||
-f)
|
||||
info "[tag ${BOLD}$2${PLAIN}${BLUE} will use financial year paths]"
|
||||
FINYEARTAGS="$FINYEARTAGS $2"
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ $TEMP -eq 1 ]]; then
|
||||
DIR=/tmp
|
||||
FILENAME=a
|
||||
else
|
||||
if [ $# -lt 1 ]; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# first arg is filename, rest are tags
|
||||
FILENAME=$1
|
||||
shift 1
|
||||
|
||||
|
||||
autotags "$FILENAME" # determine tags from filename
|
||||
while [[ $# -ge 1 ]]; do
|
||||
action "* Got tag '${BOLD}$1${PLAIN}${CYAN}' on command line."
|
||||
if [[ -z $TAGS ]]; then
|
||||
TAGS="$1"
|
||||
else
|
||||
TAGS="$TAGS $1"
|
||||
fi
|
||||
shift 1
|
||||
done
|
||||
if [[ -z $TAGS ]]; then
|
||||
error "No tags specified or inferred from filename."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# This will be the directory which scanline writes the pdf to
|
||||
FIRSTTAG=`echo $TAGS | awk '{ print $1 }'`
|
||||
|
||||
# Remove duplicate tags
|
||||
TAGS=`echo $TAGS | tr ' ' '\n' | sort -u | tr '\n' ' ' | sed -e 's/ $//'`
|
||||
|
||||
if [[ $TEST -eq 1 ]]; then
|
||||
echo -e "Tags found: ${GREEN}${TAGS}${PLAIN}"
|
||||
echo -e "PDF will be written to ${BOLD}${DIR}/${GREEN}${FIRSTTAG}${PLAIN}${BOLD}/${FILENAME}.pdf${PLAIN}."
|
||||
echo -e "Symlinks will be created in:"
|
||||
for t in $TAGS; do
|
||||
if [[ $t != $FIRSTTAG ]]; then
|
||||
echo -e " - ${DIR}/${GREEN}${t}${PLAIN}/${FILENAME}.pdf${PLAIN}"
|
||||
fi
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ $TEMP -eq 0 ]]; then
|
||||
# Check that target pdfs share is mounted
|
||||
if [[ $DIR =~ mnt|pdf ]]; then
|
||||
df -h ${DIR} 2>&1 | grep @ >/dev/null 2>&1
|
||||
is_mounted "$DIR"
|
||||
if [ $? -ne 0 ]; then
|
||||
info "$DIR not mounted - trying to mount it..."
|
||||
#mount_samba "${SHARE}" "${DIR}"
|
||||
mount_local "${DIR}"
|
||||
if [ $? -ne 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check that we didn't mix up the filename and tags
|
||||
if ! [ -d ${DIR}/${FIRSTTAG} ] ; then
|
||||
error "$DIR/$FIRSTTAG doesn't exist, did you mix up filename and tags?"
|
||||
exit 1
|
||||
fi
|
||||
if [[ $FILENAME == */* ]]; then
|
||||
error "Filename ${BOLD}$FILENAME${PLAIN}${RED} contains a slash - did you mix up filename and tags?"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $FILENAME == *,* ]]; then
|
||||
error "Filename ${BOLD}$FILENAME${PLAIN}${RED} is illegal - commas not allowed."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# do the scan - capture output
|
||||
#${SCANLINE} -dir "${DIR}" -name "${FILENAME}" "$DUPLEXOPTS" "$SCANNEROPTS" $*
|
||||
|
||||
#exec 5>&1
|
||||
#OUTPUT=$( ${SCANLINE} -dir "${DIR}" -name "${FILENAME}" "$DUPLEXOPTS" "$SCANNEROPTS" $* 2>&1 |tee /dev/fd/5; exit ${PIPESTATUS[0]})
|
||||
#rv=$?
|
||||
|
||||
|
||||
finished=0
|
||||
NUMPAGES=0
|
||||
TEMPFILE=`mktemp /tmp/scan.XXXXXX`
|
||||
TEMPFILE2=`mktemp /tmp/scan.XXXXXX`
|
||||
PDFFILE=""
|
||||
while [[ $finished -eq 0 ]]; do
|
||||
gotexisting=0
|
||||
if [[ $MULTIPAGE -eq 1 ]]; then
|
||||
if ! [[ -z $PDFFILE ]]; then
|
||||
# move already scanned pages out of the way
|
||||
mv ${PDFFILE} ${TEMPFILE} 2>/dev/null
|
||||
gotexisting=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# scan new file
|
||||
|
||||
echo -e -n "${CYAN}Scanning..."
|
||||
OUTPUT=$( ${SCANLINE} -verbose -dir "${DIR}" -name "${FILENAME}" "$DUPLEXOPTS" "$SCANNEROPTS" $TAGS 2>&1)
|
||||
rv=$?
|
||||
|
||||
if [ $rv -eq 0 ]; then
|
||||
cecho $GREEN "done"
|
||||
else
|
||||
cecho $RED "failed"
|
||||
echo ""
|
||||
echo -e "${BOLD}${UNDERLINE}Full output:${PLAIN}"
|
||||
echo "$OUTPUT" | sed -e 's/^/ /'
|
||||
exit 1
|
||||
fi
|
||||
thisnpages=`printf %d $( echo "$OUTPUT" | grep 'Scan complete' | wc -l )`
|
||||
NUMPAGES=$(( $NUMPAGES + $thisnpages ))
|
||||
|
||||
PDFFILE=`echo "$OUTPUT" | grep to: | awk '{ print $NF }'`
|
||||
|
||||
if [[ $MULTIPAGE -eq 0 ]]; then
|
||||
finished=1
|
||||
else
|
||||
# multi-line mode
|
||||
|
||||
cecho $GREEN "Scanned + $thisnpages page(s) -> $NUMPAGES total."
|
||||
if [[ $gotexisting -eq 1 ]]; then
|
||||
# append newly-scanned page file on to rest of the pdf.
|
||||
pdftk ${TEMPFILE} ${PDFFILE} cat output ${TEMPFILE2}
|
||||
mv ${TEMPFILE2} "${PDFFILE}"
|
||||
# remove newly-scanned page file
|
||||
rm -rf ${TEMPFILE}
|
||||
fi
|
||||
|
||||
echo -en "Insert next pages and press ${BOLD}ENTER${PLAIN}, or type '${BOLD}n${PLAIN}':${PLAIN}"
|
||||
read -p " " yn
|
||||
if [[ $yn == "n" ]]; then
|
||||
finished=1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if ! [[ -z $TEMPFILE ]]; then
|
||||
if [[ -e $TEMPFILE ]]; then
|
||||
# should never happen...
|
||||
rm -f ${TEMPFILE}
|
||||
fi
|
||||
fi
|
||||
|
||||
# Adjust year for financial year if required.
|
||||
curyear=$(date +%Y)
|
||||
curmonth=$(date +%m | bc)
|
||||
nextyear=$((curyear + 1))
|
||||
if [[ $curmonth -ge 7 ]]; then
|
||||
for x in $PDFFILE; do
|
||||
for t in $FINYEARTAGS; do
|
||||
if [[ $x == */${t}/* ]]; then
|
||||
newname=`echo "$x" | sed -e "s,/${t}/$curyear/,/${t}/$nextyear/,"`
|
||||
action "* Adjusting path for financial year: $x -> ${BOLD}$newname${PLAIN}"
|
||||
mv -f "$x" "$newname"
|
||||
PDFFILE=$(echo "$PDFFILE" | sed -e "s,${x},${newname},")
|
||||
fi
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
PREVIEWFILE=""
|
||||
if [[ $PDFFILE == *" "* || $PDFFILE == *$'\n'* ]]; then
|
||||
echo -e -n "${GREEN}Scanned $NUMPAGES page(s) to "
|
||||
count=1
|
||||
for x in $PDFFILE; do
|
||||
if [[ $count -eq 1 ]]; then
|
||||
PREVIEWFILE="$x"
|
||||
echo -e -n "${BOLD}${GREEN}${x}${PLAIN}"
|
||||
else
|
||||
echo -e -n "${GREEN} + ${BOLD}${x}${PLAIN}"
|
||||
fi
|
||||
count=$((count + 1))
|
||||
done
|
||||
echo -e "${PLAIN}"
|
||||
else
|
||||
PREVIEWFILE="$PDFFILE"
|
||||
cecho $GREEN "Scanned $NUMPAGES page(s) to ${BOLD}${PDFFILE}${PLAIN}"
|
||||
fi
|
||||
|
||||
# Put the full path onto the copy buffer
|
||||
echo -n "${PREVIEWFILE}" | pbcopy
|
||||
|
||||
if [ $PREVIEW -eq 1 ]; then
|
||||
action "Showing preview..."
|
||||
open "${PREVIEWFILE}"
|
||||
fi
|
||||
|
||||
|
Loading…
Reference in New Issue