#!/bin/sh
# Generate an RSS 2.0 feed directly from your existing web site
# Author - Pádraig Brady
# Licence - LGPLV2
# Releases -
# 1.0 - Jun 19 2006 - Initial release
# 1.1 - Jun 26 2006 - Exclude files with "Exclude from bashfeed"
# HTML comment within the first 10 lines.
# 1.2 - May 01 2007 - Add author elements (from html if present)
# 1.4 - May 04 2011
# http://github.com/pixelb/scripts/commits/master/scripts/bashfeed
# Just run this script from the root directory of your web site and
# it will generate feed items for the newest files. Generally I do this
# just before I sync my local web site copy to my public server.
# One can generate a feed for a subset of the site by still running
# from the root directory, and passing a subdirectory to start at.
# To change the feed TITLE and DESCRIPTION, set those environment
# variables before running the script.
# Which files are selected and excluded can be configured below.
# Note for html files it will extract the following elements if present
#
# Item title
#
#
#
# Note this script will keep the same item guid for an updated file.
# Just updating the pubDate will not cause liferea 1.0.11 at least
# to mark the item as updated (or update the timestamp even).
# One must change the description or title also, and so
# I set the (hidden) description to the file timestamp.
#
# Testing with thunderbird 1.0.8 shows that it indexes on link
# and so wont ever show updates to other fields. Therefore I append #seconds
# to the link to force it to create a new entry for an updated item.
#
# Note you may find the http://www.pixelbeat.org/scripts/fix script
# useful for doing edits to files that you don't want to show up
# as updated content in the feed, or generally edit a file without
# changing the modification date.
setglobal num_files = '10'
setglobal site = '"www.pixelbeat.org'"
setglobal author = '"P@draigBrady.com (Pádraig Brady)'"
setglobal suggested_update_freq = '1440' #mins
: $(TITLE=$site)
: $(DESCRIPTION="latest from $site")
#files starting with . | files without a . | files ending in .c .cpp ...
setglobal include_re = ''(^|/)[.].+|(^|/)[^.]+$|[.](c|cpp|py|sh|rc|tips|fortune|html)$'' #only show these files
setglobal exclude_re = ''(\.git/|priv/|tmp/|.htaccess|xvpics|timeline\.html|modified\.html|head\.html|header\.html|footer\.html|footer-home\.html|adds\.html|last\.html|fslint/(NEWS\.html|md5sum)|README)'' #don't show these paths
setglobal default_files = '"index.html index.shtml index.php'"
############# No user serviceable parts below ###################
for file in [$default_files] {
setglobal replace_default_files = ""$replace_default_files; s/\(.*\)$file$/\1/;t""
}
echo '
'
setglobal time = $[date --rfc-2822]
echo "
$TITLE$suggested_update_freq
http://$site/$1
http://www.pixelbeat.org/scripts/bashfeed$DESCRIPTION$author$time$[echo $LANG | sed 's/\(..\)_.*/\1/]
"
proc xml_unescaped { sed 's/&//g; s/&[lg]t;//g; s/"//g' | grep -q "[&<>]"; }
find $1 -type f -printf "%p\t%T@\n" |
sed 's/^\.\///' | # strip leading ./ when "$1" is empty
sort -k2,2nr |
cut -f1 |
grep -E $include_re |
grep -Ev $exclude_re |
while read file {
if ! head $file | grep -Fiq '' {
echo $file
setglobal i = $shExpr('i+1')
test $i -eq $num_files && break
}
} |
while read file {
setglobal pubDate = $[date --reference="$file" --rfc-2822]
setglobal force_update = $[date --reference="$file" "+%s]
setglobal title = ''""; setglobal keywords = ''""; setglobal description = ''""
if echo $file | grep -Eq '\.(html|shtml|php)$' {
setglobal title = $[sed -n 's/.*\(.*\)<\/title>.*/\1/ip;T;q]
setglobal keywords = $[sed -n 's/.*&2
exit 1
}
} elif test -x $file { # I always have a 1 line description on line 3 of my scripts
setglobal description = $[sed -n '3s/# \(.*\)/\1/p' $file]
}
setglobal file = $[echo $file | sed $replace_default_files]
test -z $title && setglobal title = $file
setglobal tags = ''""
if test ! -z $keywords {
for keyword in [$keywords] {
setglobal tags = $[echo -ne "$tags$keyword\n ]
}
}
if test $page_author {
if test $page_author = $author {
setglobal page_author = ''""
} else {
setglobal page_author = $[echo -ne "$page_author\n ]
}
}
echo "
$titlehttp://$site/$file$pubDate
http://$site/$file#$force_update
$page_author$tags]]>
"
} &&
echo ''