#!/bin/sh # Generate an RSS 2.0 feed directly from your existing web site # Author - Pádraig Brady # Licence - LGPLV2 # Releases - # 1.0 - Jun 19 2006 - Initial release # 1.1 - Jun 26 2006 - Exclude files with "Exclude from bashfeed" # HTML comment within the first 10 lines. # 1.2 - May 01 2007 - Add author elements (from html if present) # 1.4 - May 04 2011 # http://github.com/pixelb/scripts/commits/master/scripts/bashfeed # Just run this script from the root directory of your web site and # it will generate feed items for the newest files. Generally I do this # just before I sync my local web site copy to my public server. # One can generate a feed for a subset of the site by still running # from the root directory, and passing a subdirectory to start at. # To change the feed TITLE and DESCRIPTION, set those environment # variables before running the script. # Which files are selected and excluded can be configured below. # Note for html files it will extract the following elements if present # # Item title # # # # Note this script will keep the same item guid for an updated file. # Just updating the pubDate will not cause liferea 1.0.11 at least # to mark the item as updated (or update the timestamp even). # One must change the description or title also, and so # I set the (hidden) description to the file timestamp. # # Testing with thunderbird 1.0.8 shows that it indexes on link # and so wont ever show updates to other fields. Therefore I append #seconds # to the link to force it to create a new entry for an updated item. # # Note you may find the http://www.pixelbeat.org/scripts/fix script # useful for doing edits to files that you don't want to show up # as updated content in the feed, or generally edit a file without # changing the modification date. global num_files := '10' global site := '"www.pixelbeat.org'" global author := '"P@draigBrady.com (Pádraig Brady)'" global suggested_update_freq := '1440' #mins : $(TITLE=$site) : $(DESCRIPTION="latest from $site") #files starting with . | files without a . | files ending in .c .cpp ... global include_re := ''(^|/)[.].+|(^|/)[^.]+$|[.](c|cpp|py|sh|rc|tips|fortune|html)$'' #only show these files global exclude_re := ''(\.git/|priv/|tmp/|.htaccess|xvpics|timeline\.html|modified\.html|head\.html|header\.html|footer\.html|footer-home\.html|adds\.html|last\.html|fslint/(NEWS\.html|md5sum)|README)'' #don't show these paths global default_files := '"index.html index.shtml index.php'" ############# No user serviceable parts below ################### for file in [$default_files] { global replace_default_files := ""$replace_default_files; s/\(.*\)$file$/\1/;t"" } echo ' ' global time := $[date --rfc-2822] echo " $TITLE $suggested_update_freq http://$site/$1 http://www.pixelbeat.org/scripts/bashfeed $DESCRIPTION $author $time $[echo $LANG | sed 's/\(..\)_.*/\1/] " proc xml_unescaped { sed 's/&//g; s/&[lg]t;//g; s/"//g' | grep -q "[&<>]"; } find $1 -type f -printf "%p\t%T@\n" | sed 's/^\.\///' | # strip leading ./ when "$1" is empty sort -k2,2nr | cut -f1 | grep -E $include_re | grep -Ev $exclude_re | while read file { if ! head $file | grep -Fiq '' { echo $file global i := $(i+1) test $i -eq $num_files && break } } | while read file { global pubDate := $[date --reference="$file" --rfc-2822] global force_update := $[date --reference="$file" "+%s] global title := ''""; global keywords := ''""; global description := ''"" if echo $file | grep -Eq '\.(html|shtml|php)$' { global title := $[sed -n 's/.*\(.*\)<\/title>.*/\1/ip;T;q' < $file] global keywords := $[sed -n 's/.*<META.*NAME="keywords".*CONTENT="\(.*\)".*/\1/ip;T;q' < $file] global description := $[sed -n 's/.*<META.*NAME="description".*CONTENT="\(.*\)".*/\1/ip;T;q' < $file] global page_author := $[sed -n 's/.*<META.*NAME="author".*CONTENT="\(.*\)".*/\1/ip;T;q' < $file] global extracted_text := ""$title $keywords $description$ $page_author"" if echo $extracted_text | xml_unescaped { echo "Error: HTML metadata in $file will not produce a valid XML feed" > !2 exit 1 } } elif test -x $file { # I always have a 1 line description on line 3 of my scripts global description := $[sed -n '3s/# \(.*\)/\1/p' $file] } global file := $[echo $file | sed $replace_default_files] test -z $title && global title := $file global tags := ''"" if test ! -z $keywords { for keyword in [$keywords] { global tags := $[echo -ne "$tags<category>$keyword</category>\n ] } } if test $page_author { if test $page_author = $author { global page_author := ''"" } else { global page_author := $[echo -ne "<author>$page_author</author>\n ] } } echo " <item> <title>$title http://$site/$file $pubDate http://$site/$file#$force_update $page_author$tags]]> " } && echo ' ' (CommandList children: [ (Assignment keyword: Assign_None pairs: [(assign_pair lhs:(LhsName name:num_files) op:Equal rhs:{(10)} spids:[125])] spids: [125] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:site) op: Equal rhs: {(DQ (www.pixelbeat.org))} spids: [128] ) ] spids: [128] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:author) op: Equal rhs: {(DQ ("P@draigBrady.com (P\u00e1draig Brady)"))} spids: [133] ) ] spids: [133] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:suggested_update_freq) op: Equal rhs: {(1440)} spids: [138] ) ] spids: [138] ) (C {(Lit_Other ":")} { (BracedVarSub token: suffix_op: (StringUnary op_id:VTest_Equals arg_word:{($ VSub_Name "$site")}) spids: [147 151] ) } ) (C {(Lit_Other ":")} { (BracedVarSub token: suffix_op: (StringUnary op_id: VTest_Equals arg_word: {(DQ ("latest from ") ($ VSub_Name "$site"))} ) spids: [155 162] ) } ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:include_re) op: Equal rhs: {(SQ <"(^|/)[.].+|(^|/)[^.]+$|[.](c|cpp|py|sh|rc|tips|fortune|html)$">)} spids: [168] ) ] spids: [168] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:exclude_re) op: Equal rhs: { (SQ < "(\\.git/|priv/|tmp/|.htaccess|xvpics|timeline\\.html|modified\\.html|head\\.html|header\\.html|footer\\.html|footer-home\\.html|adds\\.html|last\\.html|fslint/(NEWS\\.html|md5sum)|README)" > ) } spids: [177] ) ] spids: [177] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:default_files) op: Equal rhs: {(DQ ("index.html index.shtml index.php"))} spids: [186] ) ] spids: [186] ) (ForEach iter_name: file iter_words: [{($ VSub_Name "$default_files")}] do_arg_iter: False body: (DoGroup children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:replace_default_files) op: Equal rhs: { (DQ ($ VSub_Name "$replace_default_files") ("; s/") (EscapedLiteralPart token:) (".*") (EscapedLiteralPart token:) ($ VSub_Name "$file") (Lit_Other "$") (/) (EscapedLiteralPart token:) ("/;t") ) } spids: [208] ) ] spids: [208] ) ] spids: [205 222] ) spids: [201 203] ) (C {(echo)} { (SQ <"\n"> <"\n"> <"\n"> <"\n"> <""> ) } ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:time) op: Equal rhs: { (CommandSubPart command_list: (CommandList children:[(C {(date)} {(--rfc-2822)})]) left_token: spids: [237 241] ) } spids: [236] ) ] spids: [236] ) (C {(echo)} { (DQ ("\n") ("") ($ VSub_Name "$TITLE") ("\n") ("") ($ VSub_Name "$suggested_update_freq") ("\n") ("http://") ($ VSub_Name "$site") (/) ($ VSub_Number "$1") ("\n") ("http://www.pixelbeat.org/scripts/bashfeed\n") ("") ($ VSub_Name "$DESCRIPTION") ("\n") ("") ($ VSub_Name "$author") ("\n") ("") ($ VSub_Name "$time") ("\n") ("") (CommandSubPart command_list: (CommandList children: [ (Pipeline children: [ (C {(echo)} {($ VSub_Name "$LANG")}) (C {(sed)} {(SQ <"s/\\(..\\)_.*/\\1/">)}) ] negated: False ) ] ) left_token: spids: [270 282] ) ("\n") ) } ) (FuncDef name: xml_unescaped body: (BraceGroup children: [ (Sentence child: (Pipeline children: [ (C {(sed)} {(SQ <"s/&//g; s/&[lg]t;//g; s/"//g">)}) (C {(grep)} {(-q)} {(DQ ("[&<>]"))}) ] negated: False ) terminator: ) ] spids: [291] ) spids: [287 290] ) (AndOr children: [ (Pipeline children: [ (C {(find)} {($ VSub_Number "$1")} {(-type)} {(f)} {(-printf)} { (DQ ("%p") (EscapedLiteralPart token:) ("%T@") (EscapedLiteralPart token:) ) } ) (C {(sed)} {(SQ <"s/^\\.\\///">)}) (C {(sort)} {(-k2) (Lit_Comma ",") (2nr)}) (C {(cut)} {(-f1)}) (C {(grep)} {(-E)} {(DQ ($ VSub_Name "$include_re"))}) (C {(grep)} {(-Ev)} {(DQ ($ VSub_Name "$exclude_re"))}) (While cond: [(Sentence child:(C {(read)} {(file)}) terminator:)] body: (DoGroup children: [ (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [ (C {(head)} {(DQ ($ VSub_Name "$file"))}) (C {(grep)} {(-Fiq)} {(SQ <"">)}) ] negated: True ) terminator: ) ] action: [ (C {(echo)} {(DQ ($ VSub_Name "$file"))}) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:i) op: Equal rhs: { (ArithSubPart anode: (ArithBinary op_id: Arith_Plus left: (ArithVarRef name:i) right: (ArithWord w:{(Lit_Digits 1)}) ) spids: [419 424] ) } spids: [418] ) ] spids: [418] ) (AndOr children: [ (C {(Lit_Other "[")} {($ VSub_Name "$i")} {(-eq)} {($ VSub_Name "$num_files")} {(Lit_Other "]")} ) (ControlFlow token: ) ] op_id: Op_DAmp ) ] spids: [-1 408] ) ] spids: [-1 442] ) ] spids: [384 444] ) ) (While cond: [(Sentence child:(C {(read)} {(file)}) terminator:)] body: (DoGroup children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:pubDate) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (C {(date)} {(--reference) (Lit_Other "=") (DQ ($ VSub_Name "$file"))} {(--rfc-2822)} ) ] ) left_token: spids: [459 469] ) } spids: [458] ) ] spids: [458] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:force_update) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (C {(date)} {(--reference) (Lit_Other "=") (DQ ($ VSub_Name "$file"))} {(DQ ("+%s"))} ) ] ) left_token: spids: [473 485] ) } spids: [472] ) ] spids: [472] ) (Sentence child: (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:title) op: Equal rhs: {(DQ )} spids: [488] ) ] spids: [488] ) terminator: ) (Sentence child: (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:keywords) op: Equal rhs: {(DQ )} spids: [493] ) ] spids: [493] ) terminator: ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:description) op: Equal rhs: {(DQ )} spids: [498] ) ] spids: [498] ) (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [ (C {(echo)} {(DQ ($ VSub_Name "$file"))}) (C {(grep)} {(-Eq)} {(SQ <"\\.(html|shtml|php)$">)}) ] negated: False ) terminator: ) ] action: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:title) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (SimpleCommand words: [ {(sed)} {(-n)} { (SQ < "s/.*\\(.*\\)<\\/title>.*/\\1/ip;T;q" > ) } ] redirects: [ (Redir op_id: Redir_Less fd: -1 arg_word: {(DQ ($ VSub_Name "$file"))} spids: [535] ) ] ) ] ) left_token: <Left_Backtick "`"> spids: [526 540] ) } spids: [525] ) ] spids: [525] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:keywords) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (SimpleCommand words: [ {(sed)} {(-n)} { (SQ < "s/.*<META.*NAME=\"keywords\".*CONTENT=\"\\(.*\\)\".*/\\1/ip;T;q" > ) } ] redirects: [ (Redir op_id: Redir_Less fd: -1 arg_word: {(DQ ($ VSub_Name "$file"))} spids: [553] ) ] ) ] ) left_token: <Left_Backtick "`"> spids: [544 558] ) } spids: [543] ) ] spids: [543] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:description) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (SimpleCommand words: [ {(sed)} {(-n)} { (SQ < "s/.*<META.*NAME=\"description\".*CONTENT=\"\\(.*\\)\".*/\\1/ip;T;q" > ) } ] redirects: [ (Redir op_id: Redir_Less fd: -1 arg_word: {(DQ ($ VSub_Name "$file"))} spids: [571] ) ] ) ] ) left_token: <Left_Backtick "`"> spids: [562 576] ) } spids: [561] ) ] spids: [561] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:page_author) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (SimpleCommand words: [ {(sed)} {(-n)} { (SQ < "s/.*<META.*NAME=\"author\".*CONTENT=\"\\(.*\\)\".*/\\1/ip;T;q" > ) } ] redirects: [ (Redir op_id: Redir_Less fd: -1 arg_word: {(DQ ($ VSub_Name "$file"))} spids: [589] ) ] ) ] ) left_token: <Left_Backtick "`"> spids: [580 594] ) } spids: [579] ) ] spids: [579] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:extracted_text) op: Equal rhs: { (DQ ($ VSub_Name "$title") (" ") ($ VSub_Name "$keywords") (" ") ($ VSub_Name "$description") (Lit_Other "$") (" ") ($ VSub_Name "$page_author") ) } spids: [597] ) ] spids: [597] ) (If arms: [ (if_arm cond: [ (Sentence child: (Pipeline children: [ (C {(echo)} {(DQ ($ VSub_Name "$extracted_text"))}) (C {(xml_unescaped)}) ] negated: False ) terminator: <Op_Semi ";"> ) ] action: [ (SimpleCommand words: [ {(echo)} { (DQ ("Error: HTML metadata in ") ($ VSub_Name "$file") (" will not produce a valid XML feed") ) } ] redirects: [ (Redir op_id: Redir_GreatAnd fd: -1 arg_word: {(2)} spids: [634] ) ] ) (C {(exit)} {(1)}) ] spids: [-1 623] ) ] spids: [-1 643] ) ] spids: [-1 522] ) (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(-x)} {(DQ ($ VSub_Name "$file"))} {(Lit_Other "]")} ) terminator: <Op_Semi ";"> ) ] action: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:description) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (C {(sed)} {(-n)} {(SQ <"3s/# \\(.*\\)/\\1/p">)} {(DQ ($ VSub_Name "$file"))} ) ] ) left_token: <Left_Backtick "`"> spids: [666 678] ) } spids: [665] ) ] spids: [665] ) ] spids: [646 659] ) ] spids: [-1 681] ) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:file) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (Pipeline children: [ (C {(echo)} {(DQ ($ VSub_Name "$file"))}) (C {(sed)} {(DQ ($ VSub_Name "$replace_default_files"))}) ] negated: False ) ] ) left_token: <Left_Backtick "`"> spids: [685 699] ) } spids: [684] ) ] spids: [684] ) (AndOr children: [ (C {(Lit_Other "[")} {(-z)} {(DQ ($ VSub_Name "$title"))} {(Lit_Other "]")}) (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:title) op: Equal rhs: {(DQ ($ VSub_Name "$file"))} spids: [714] ) ] spids: [714] ) ] op_id: Op_DAmp ) (Assignment keyword: Assign_None pairs: [(assign_pair lhs:(LhsName name:tags) op:Equal rhs:{(DQ )} spids:[720])] spids: [720] ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(KW_Bang "!")} {(-z)} {(DQ ($ VSub_Name "$keywords"))} {(Lit_Other "]")} ) terminator: <Op_Semi ";"> ) ] action: [ (ForEach iter_name: keyword iter_words: [{($ VSub_Name "$keywords")}] do_arg_iter: False body: (DoGroup children: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:tags) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (C {(echo)} {(-ne)} { (DQ ($ VSub_Name "$tags") ("<category>") ($ VSub_Name "$keyword") ("</category>") (EscapedLiteralPart token:<Lit_EscapedChar "\\n">) (" ") ) } ) ] ) left_token: <Left_Backtick "`"> spids: [756 769] ) } spids: [755] ) ] spids: [755] ) ] spids: [752 772] ) spids: [748 750] ) ] spids: [-1 740] ) ] spids: [-1 775] ) (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ ($ VSub_Name "$page_author"))} {(Lit_Other "]")} ) terminator: <Op_Semi ";"> ) ] action: [ (If arms: [ (if_arm cond: [ (Sentence child: (C {(Lit_Other "[")} {(DQ ($ VSub_Name "$page_author"))} {(Lit_Other "=")} {(DQ ($ VSub_Name "$author"))} {(Lit_Other "]")} ) terminator: <Op_Semi ";"> ) ] action: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:page_author) op: Equal rhs: {(DQ )} spids: [812] ) ] spids: [812] ) ] spids: [-1 809] ) ] else_action: [ (Assignment keyword: Assign_None pairs: [ (assign_pair lhs: (LhsName name:page_author) op: Equal rhs: { (CommandSubPart command_list: (CommandList children: [ (C {(echo)} {(-ne)} { (DQ ("<author>") ($ VSub_Name "$page_author") ("</author>") (EscapedLiteralPart token:<Lit_EscapedChar "\\n">) (" ") ) } ) ] ) left_token: <Left_Backtick "`"> spids: [821 833] ) } spids: [820] ) ] spids: [820] ) ] spids: [817 836] ) ] spids: [-1 789] ) ] spids: [-1 839] ) (C {(echo)} { (DQ ("\n") ("<item>\n") (" <title>") ($ VSub_Name "$title") ("\n") (" http://") ($ VSub_Name "$site") (/) ($ VSub_Name "$file") ("\n") (" ") ($ VSub_Name "$pubDate") ("\n") (" http://") ($ VSub_Name "$site") (/) ($ VSub_Name "$file") ("#") ($ VSub_Name "$force_update") ("\n") (" ") ($ VSub_Name "$page_author") ($ VSub_Name "$tags") ("]]>\n") ("\n") ) } ) ] spids: [455 875] ) ) ] negated: False ) (C {(echo)} {(SQ <"\n"> <"">)}) ] op_id: Op_DAmp ) ] )