#!/bin/sh # Generate an RSS 2.0 feed directly from your existing web site # Author - Pádraig Brady # Licence - LGPLV2 # Releases - # 1.0 - Jun 19 2006 - Initial release # 1.1 - Jun 26 2006 - Exclude files with "Exclude from bashfeed" # HTML comment within the first 10 lines. # 1.2 - May 01 2007 - Add author elements (from html if present) # 1.4 - May 04 2011 # http://github.com/pixelb/scripts/commits/master/scripts/bashfeed # Just run this script from the root directory of your web site and # it will generate feed items for the newest files. Generally I do this # just before I sync my local web site copy to my public server. # One can generate a feed for a subset of the site by still running # from the root directory, and passing a subdirectory to start at. # To change the feed TITLE and DESCRIPTION, set those environment # variables before running the script. # Which files are selected and excluded can be configured below. # Note for html files it will extract the following elements if present # # Item title # # # # Note this script will keep the same item guid for an updated file. # Just updating the pubDate will not cause liferea 1.0.11 at least # to mark the item as updated (or update the timestamp even). # One must change the description or title also, and so # I set the (hidden) description to the file timestamp. # # Testing with thunderbird 1.0.8 shows that it indexes on link # and so wont ever show updates to other fields. Therefore I append #seconds # to the link to force it to create a new entry for an updated item. # # Note you may find the http://www.pixelbeat.org/scripts/fix script # useful for doing edits to files that you don't want to show up # as updated content in the feed, or generally edit a file without # changing the modification date. setglobal num_files = '10' setglobal site = '"www.pixelbeat.org'" setglobal author = '"P@draigBrady.com (Pádraig Brady)'" setglobal suggested_update_freq = '1440' #mins : $(TITLE=$site) : $(DESCRIPTION="latest from $site") #files starting with . | files without a . | files ending in .c .cpp ... setglobal include_re = ''(^|/)[.].+|(^|/)[^.]+$|[.](c|cpp|py|sh|rc|tips|fortune|html)$'' #only show these files setglobal exclude_re = ''(\.git/|priv/|tmp/|.htaccess|xvpics|timeline\.html|modified\.html|head\.html|header\.html|footer\.html|footer-home\.html|adds\.html|last\.html|fslint/(NEWS\.html|md5sum)|README)'' #don't show these paths setglobal default_files = '"index.html index.shtml index.php'" ############# No user serviceable parts below ################### for file in [$default_files] { setglobal replace_default_files = ""$replace_default_files; s/\(.*\)$file$/\1/;t"" } echo ' ' setglobal time = $[date --rfc-2822] echo " $TITLE $suggested_update_freq http://$site/$1 http://www.pixelbeat.org/scripts/bashfeed $DESCRIPTION $author $time $[echo $LANG | sed 's/\(..\)_.*/\1/] " proc xml_unescaped { sed 's/&//g; s/&[lg]t;//g; s/"//g' | grep -q "[&<>]"; } find $1 -type f -printf "%p\t%T@\n" | sed 's/^\.\///' | # strip leading ./ when "$1" is empty sort -k2,2nr | cut -f1 | grep -E $include_re | grep -Ev $exclude_re | while read file { if ! head $file | grep -Fiq '' { echo $file setglobal i = $shExpr('i+1') test $i -eq $num_files && break } } | while read file { setglobal pubDate = $[date --reference="$file" --rfc-2822] setglobal force_update = $[date --reference="$file" "+%s] setglobal title = ''""; setglobal keywords = ''""; setglobal description = ''"" if echo $file | grep -Eq '\.(html|shtml|php)$' { setglobal title = $[sed -n 's/.*\(.*\)<\/title>.*/\1/ip;T;q' < $file] setglobal keywords = $[sed -n 's/.*<META.*NAME="keywords".*CONTENT="\(.*\)".*/\1/ip;T;q' < $file] setglobal description = $[sed -n 's/.*<META.*NAME="description".*CONTENT="\(.*\)".*/\1/ip;T;q' < $file] setglobal page_author = $[sed -n 's/.*<META.*NAME="author".*CONTENT="\(.*\)".*/\1/ip;T;q' < $file] setglobal extracted_text = ""$title $keywords $description$ $page_author"" if echo $extracted_text | xml_unescaped { echo "Error: HTML metadata in $file will not produce a valid XML feed" > !2 exit 1 } } elif test -x $file { # I always have a 1 line description on line 3 of my scripts setglobal description = $[sed -n '3s/# \(.*\)/\1/p' $file] } setglobal file = $[echo $file | sed $replace_default_files] test -z $title && setglobal title = $file setglobal tags = ''"" if test ! -z $keywords { for keyword in [$keywords] { setglobal tags = $[echo -ne "$tags<category>$keyword</category>\n ] } } if test $page_author { if test $page_author = $author { setglobal page_author = ''"" } else { setglobal page_author = $[echo -ne "<author>$page_author</author>\n ] } } echo " <item> <title>$title http://$site/$file $pubDate http://$site/$file#$force_update $page_author$tags]]> " } && echo ' '