#!/bin/bash # searchinfo - Extracts and analyzes search engine traffic indicated in the # referrer field of a Common Log Format access log. setglobal host = '"intuitive.com'" # change to your domain, as desired setglobal maxmatches = '20' setglobal count = '0' setglobal temp = ""/tmp/$[basename $0].$Pid"" trap "$[which rm] -f $temp" 0 if test $Argc -eq 0 { echo "Usage: $[basename $0] logfile" > !2 exit 1 } if test ! -r $1 { echo "Error: can't open file $1 for analysis." > !2 exit 1 } for URL in [$[awk '{ if (length($11) > 4) { print $11 } }' $1 | \ grep -vE "(/www.$host|/$host)" | grep '?]] { setglobal searchengine = $[echo $URL | cut -d/ -f3 | rev | cut -d. -f1-2 | rev] setglobal args = $[echo $URL | cut -d'?' -f2 | tr '&' '\n' | \ grep -E '(^q=|^sid=|^p=|query=|item=|ask=|name=|topic=)' | \ sed -e 's/+/ /g' -e 's/%20/ /g' -e 's/"//g' | cut -d= -f2] if test ! -z $args { echo "$(searchengine): $args" >> $temp } else { # No well-known match, show entire GET string instead... echo "$(searchengine) $[echo $URL | cut -d'?' -f2]" >> $temp } setglobal count = "$shExpr(' $count + 1 ')" } echo "Search engine referrer info extracted from $(1):" sort $temp | uniq -c | sort -rn | head -$maxmatches | sed 's/^/ /g' echo "" echo Scanned $count entries in log file out of $[wc -l < $1] total. exit 0