Add -L option tk use lynx to render html
This commit is contained in:
parent
391d137a21
commit
2825e35152
|
@ -15,6 +15,8 @@ OPTIONS:
|
||||||
-F char Use given character as a field separator in config file instead of default (@)
|
-F char Use given character as a field separator in config file instead of default (@)
|
||||||
-gc After site scrapes, run 'git add' on all files, then 'git commit'
|
-gc After site scrapes, run 'git add' on all files, then 'git commit'
|
||||||
-gp After site scrapes, run 'git add' on all files, then 'git commit', then 'git push'
|
-gp After site scrapes, run 'git add' on all files, then 'git commit', then 'git push'
|
||||||
|
-l List configured sites then exit
|
||||||
|
-L Use lynx to render html
|
||||||
-o dirname Use given output dir instead of default (.)
|
-o dirname Use given output dir instead of default (.)
|
||||||
-t sitename Just output raw content of given site, useful for finding start/end regexps.
|
-t sitename Just output raw content of given site, useful for finding start/end regexps.
|
||||||
-T sitename Just output content of given site between re_start and re_end regexps.
|
-T sitename Just output content of given site between re_start and re_end regexps.
|
||||||
|
|
|
@ -18,6 +18,8 @@ function usage() {
|
||||||
echo " -F char Use given character as a field separator in config file instead of default (${SEP})"
|
echo " -F char Use given character as a field separator in config file instead of default (${SEP})"
|
||||||
echo " -gc After site scrapes, run 'git add' on all files, then 'git commit'"
|
echo " -gc After site scrapes, run 'git add' on all files, then 'git commit'"
|
||||||
echo " -gp After site scrapes, run 'git add' on all files, then 'git commit', then 'git push'"
|
echo " -gp After site scrapes, run 'git add' on all files, then 'git commit', then 'git push'"
|
||||||
|
echo " -l List configured sites then exit"
|
||||||
|
echo " -L Use lynx to render html"
|
||||||
echo " -o dirname Use given output dir instead of default (.)"
|
echo " -o dirname Use given output dir instead of default (.)"
|
||||||
echo " -t sitename Just output raw content of given site, useful for finding start/end regexps."
|
echo " -t sitename Just output raw content of given site, useful for finding start/end regexps."
|
||||||
echo " -T sitename Just output content of given site between re_start and re_end regexps."
|
echo " -T sitename Just output content of given site between re_start and re_end regexps."
|
||||||
|
@ -77,6 +79,17 @@ function showsite_byidx() { #1=idx
|
||||||
printf "$FORMAT" "${site_name[$idx]}" "${site_url[$idx]}"
|
printf "$FORMAT" "${site_name[$idx]}" "${site_url[$idx]}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getcontent() { # 1=url 2=outputfile
|
||||||
|
local url="$1"
|
||||||
|
local outfile="$2"
|
||||||
|
|
||||||
|
if [[ -n $LYNX ]]; then
|
||||||
|
curl -sL "$url" | ${LYNX} -stdin -dump > "${out}"
|
||||||
|
else
|
||||||
|
curl -sL "$url" | ${SED} 's/>/>\n/g;s/</\n</g;' | awk NF | egrep -v "^<.*>$" > "${out}"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
trap cleanup EXIT TERM
|
trap cleanup EXIT TERM
|
||||||
|
|
||||||
|
|
||||||
|
@ -89,8 +102,9 @@ DOGITPUSH=0
|
||||||
MODE="normal"
|
MODE="normal"
|
||||||
TESTSITE=""
|
TESTSITE=""
|
||||||
TEST_USERES=0
|
TEST_USERES=0
|
||||||
|
LYNX=""
|
||||||
|
|
||||||
ARGS="hc:F:g:lo:t:T:"
|
ARGS="hc:F:g:lLo:t:T:"
|
||||||
while getopts "$ARGS" i; do
|
while getopts "$ARGS" i; do
|
||||||
case "$i" in
|
case "$i" in
|
||||||
h)
|
h)
|
||||||
|
@ -111,6 +125,10 @@ while getopts "$ARGS" i; do
|
||||||
l)
|
l)
|
||||||
MODE="list"
|
MODE="list"
|
||||||
;;
|
;;
|
||||||
|
L)
|
||||||
|
LYNX=$(type -p lynx 2>/dev/null)
|
||||||
|
[[ $? -ne 0 ]] && fatal "lynx not found in path"
|
||||||
|
;;
|
||||||
o)
|
o)
|
||||||
OUTDIR="$OPTARG"
|
OUTDIR="$OPTARG"
|
||||||
;;
|
;;
|
||||||
|
@ -148,7 +166,6 @@ fi
|
||||||
SED=$(which gsed 2>/dev/null)
|
SED=$(which gsed 2>/dev/null)
|
||||||
[[ $? -ne 0 ]] && SED=$(which sed 2>/dev/null)
|
[[ $? -ne 0 ]] && SED=$(which sed 2>/dev/null)
|
||||||
[[ -z $SED ]] && fatal "can't find sed in path"
|
[[ -z $SED ]] && fatal "can't find sed in path"
|
||||||
|
|
||||||
# Read config file
|
# Read config file
|
||||||
if [[ ! -e ${CONFIG} || ! -f ${CONFIG} ]]; then
|
if [[ ! -e ${CONFIG} || ! -f ${CONFIG} ]]; then
|
||||||
fatal "config file '$CONFIG' doesn't exist or isn't a plaintext file"
|
fatal "config file '$CONFIG' doesn't exist or isn't a plaintext file"
|
||||||
|
@ -197,8 +214,7 @@ elif [[ $MODE == "test" ]]; then
|
||||||
url="${site_url[$idx]}"
|
url="${site_url[$idx]}"
|
||||||
|
|
||||||
temp=$(newtempfile)
|
temp=$(newtempfile)
|
||||||
|
getcontent "$url" "$temp"
|
||||||
curl -sL "$url" | ${SED} 's/>/>\n/g;s/</\n</g;' | awk NF | egrep -v "^<.*>$" > ${temp}
|
|
||||||
if [[ $TEST_USERES -eq 1 ]]; then
|
if [[ $TEST_USERES -eq 1 ]]; then
|
||||||
sedcmd="/${site_re_start[$idx]}/,/${site_re_end[$idx]}/p"
|
sedcmd="/${site_re_start[$idx]}/,/${site_re_end[$idx]}/p"
|
||||||
cat "$temp" | ${SED} -n "$sedcmd"
|
cat "$temp" | ${SED} -n "$sedcmd"
|
||||||
|
@ -256,10 +272,11 @@ for x in ${!site_name[@]}; do
|
||||||
outfile="${OUTDIR}/${thisfile}"
|
outfile="${OUTDIR}/${thisfile}"
|
||||||
temp=$(newtempfile)
|
temp=$(newtempfile)
|
||||||
temp2=$(newtempfile)
|
temp2=$(newtempfile)
|
||||||
curl -sL "$url" | ${SED} 's/>/>\n/g;s/</\n</g;' | awk NF | egrep -v "^<.*>$" > "${temp}"
|
getcontent "$url" "$temp"
|
||||||
sedcmd="/${re_start}/,/${re_end}/p"
|
sedcmd="/${re_start}/,/${re_end}/p"
|
||||||
|
|
||||||
echo -e "SITE: ${sitename}\nURL: $url\n\n" >${temp2}
|
echo -e "SITE: ${sitename}\nURL: $url\n\n" >${temp2}
|
||||||
|
|
||||||
cat "$temp" | ${SED} -n "$sedcmd" >> ${temp2}
|
cat "$temp" | ${SED} -n "$sedcmd" >> ${temp2}
|
||||||
|
|
||||||
if [[ -e ${outfile} ]]; then
|
if [[ -e ${outfile} ]]; then
|
||||||
|
@ -293,7 +310,7 @@ done
|
||||||
|
|
||||||
if [[ $DOGITCOMMIT -eq 1 ]]; then
|
if [[ $DOGITCOMMIT -eq 1 ]]; then
|
||||||
if [[ -n $filesdone ]]; then
|
if [[ -n $filesdone ]]; then
|
||||||
echo -n "Doing git add..." >&2
|
echo -n "Doing git add for [${filesdone}]..." >&2
|
||||||
|
|
||||||
msg="Policies have been updated for the following sites:"
|
msg="Policies have been updated for the following sites:"
|
||||||
msg=$(echo -e "Policies have been updated for the following sites:\n$summary")
|
msg=$(echo -e "Policies have been updated for the following sites:\n$summary")
|
||||||
|
|
Loading…
Reference in New Issue