#!/bin/bash # changetrack--Tracks a given URL and, if it's changed since the last # visit, emails the new page to the specified address. setglobal sendmail = $[which sendmail] setglobal sitearchive = '"/tmp/changetrack'" setglobal tmpchanges = ""$sitearchive/changes.$Pid"" # Temp file setglobal fromaddr = '"webscraper@intuitive.com'" setglobal dirperm = '755' # read+write+execute for dir owner setglobal fileperm = '644' # read+write for owner, read only for others trap "$[which rm] -f $tmpchanges" 0 1 15 # Remove temp file on exit if test $Argc -ne 2 { echo "Usage: $[basename $0] url email" > !2 echo " tip: to have changes displayed on screen, use email addr '-'" > !2 exit 1 } if test ! -d $sitearchive { if ! mkdir $sitearchive { echo "$[basename $0] failed: couldn't create $sitearchive." > !2 exit 1 } chmod $dirperm $sitearchive } if test $[echo $1 | cut -c1-5] != "http:" { echo "Please use fully qualified URLs (e.g. start with 'http://')" > !2 exit 1 } setglobal fname = $[echo $1 | sed 's/http:\/\///g' | tr '/?&' '...] setglobal baseurl = ""$[echo $1 | cut -d/ -f1-3]/"" # Grab a copy of the Web page and put it in an archive file. Note that we # can track changes by looking just at the content (that is, -dump, not # -source), so we can skip any HTML parsing.... lynx -dump $1 | uniq > $sitearchive/$(fname).new if test -f "$sitearchive/$fname" { # We've seen this site before, so compare the two with diff. diff $sitearchive/$fname $sitearchive/$(fname).new > $tmpchanges if test -s $tmpchanges { echo "Status: Site $1 has changed since our last check." } else { echo "Status: No changes for site $1 since last check" rm -f $sitearchive/$(fname).new # Nothing new... exit 0 # No change--we're outta here. } } else { echo "Status: first visit to $1. Copy archived for future analysis." mv $sitearchive/$(fname).new $sitearchive/$fname chmod $fileperm $sitearchive/$fname exit 0 } # If we're here, the site has changed, and we need to send the contents # of the .new file to the user and replace the original with the .new # for the next invocation of the script. if test $2 != "-" { shell { echo "Content-type: text/html" echo "From: $fromaddr (Web Site Change Tracker)" echo "Subject: Web Site $1 Has Changed" echo "To: $2" echo "" curl -s -dump $1 | \ sed -e "s|src=\"|SRC=\"$baseurl|gi" \ -e "s|href=\"|HREF=\"$baseurl|gi" \ -e "s|$baseurl\/http:|http:|g" } | $sendmail -t } else { # Just showing the differences on the screen– is ugly. Solution? diff $sitearchive/$fname $sitearchive/$(fname).new } # Update the saved snapshot of the website. mv $sitearchive/$(fname).new $sitearchive/$fname chmod 755 $sitearchive/$fname exit 0