#!/bin/bash # checkexternal-traverse-T all URLs on a Webwsite to build a # list of external references, then check each one to ascertain # which might be dead or otherwise broken. The -a flag forces the # script to list all matches, whether they're accessible or not; by # default, only unreachable links are shown. setglobal listall = '0'; setglobal errors = '0'; setglobal checked = '0' if test $1 = "-a" { setglobal listall = '1'; shift } if test -z $1 { echo "Usage: $[basename $0] [-a] URL" > !2 ; exit 1 } trap "$[which rm] -f traverse*.errors reject*.dat traverse*.dat" 0 setglobal outfile = ""$[echo $1 | cut -d/ -f3].errors.ext"" setglobal URLlist = ""$[echo $1 | cut -d/ -f3 | sed 's/www\.//].rejects"" rm -f $outfile # Ready for new output if test ! -e $URLlist { echo "File $URLlist not found. Please run checklinks first." > !2; exit 1 } if test ! -s $URLlist { echo "There don't appear to be any external links ($URLlist is empty)" > !2 exit 1 } #### Now, finally, we're ready to begin... for URL in [$[cat $URLlist | sort | uniq]] { curl -s $URL > /dev/null !2 > !1; setglobal return = $Status if test $return -eq 0 { if test $listall -eq 1 { echo "$URL is fine." } } else { echo "$URL fails with error code $return" setglobal errors = $shExpr(' $errors + 1 ') } setglobal checked = $shExpr(' $checked + 1 ') } echo "" echo "Done. Checked $checked URLs and found $errors errors." exit 0