#!/bin/bash # If this file has already been sourced, just return test $(PARSER_SH+true) && return declare -g PARSER_SH=true source ${BASH_SOURCE%/*}/common.sh source ${BASH_SOURCE%/*}/variables.sh source ${BASH_SOURCE%/*}/variables.linkedlist.sh declare -g PARSER_DEBUG=0 declare -g PARSER_PARSED declare -g PARSER_PARSED_COUNT proc parser::parse { parser::parse::substring $(@) } # # Parses a series of expressions, effectively what you would find inside an sexp. # ?whitespace? ?expression? ?whitespace expression? ... ?whitespace expression? ?whitespace? # proc parser::parse::multiExpression { if [[ ${PARSER_DEBUG} == 1 ]] { stderr "$(FUNCNAME) $(@)" ; } declare text="$(1)" declare originalOffset="$(2-0)" declare offset=$originalOffset if [[ ${PARSER_DEBUG} == 1 ]] { stderr "Trying to parse multiExpr from: $(text:${offset})" ; } # ( # w? ) # w? p # w? ) # w p # w? ) # w p # Parse a list of items variable::LinkedList::new ; declare items="$(RESULT)" # Prune any beginning whitespace if parser::parse::whitespace $text $(offset) { sh-expr ' offset += ${PARSER_PARSED_COUNT} ' } # first item in list if parser::parse $(text) $(offset) { variable::LinkedList::append $items $(PARSER_PARSED) sh-expr ' offset += ${PARSER_PARSED_COUNT} ' } else { # No items found, an empty list setglobal PARSER_PARSED = $(items) sh-expr ' PARSER_PARSED_COUNT = offset - originalOffset ' return 0 } # Parse instances of # + while true { if ! parser::parse::whitespace $text $(offset) { # No whitespace found, we're done setglobal PARSER_PARSED = $(items) sh-expr ' PARSER_PARSED_COUNT = offset - originalOffset ' return 0 } sh-expr ' offset += ${PARSER_PARSED_COUNT} )'; # increment by amount eatten by whitespace parser if ! parser::parse $(text) $(offset) { # No expression found, we're done setglobal PARSER_PARSED = $(items) sh-expr ' PARSER_PARSED_COUNT = offset - originalOffset ' return 0 } variable::LinkedList::append $items $(PARSER_PARSED) # Add item found to items list sh-expr ' offset += ${PARSER_PARSED_COUNT} )'; # increment by amount eatten by whitespace parser } stderr "Should never get here" exit 1 } proc parser::parse::substring { if [[ ${PARSER_DEBUG} == 1 ]] { stderr "$(FUNCNAME) $(@)" ; } declare text="$(1)" declare offset="$(2-0)" if parser::parse::atom $(text) $(offset) { #echo "Parsed atom substring [length=${PARSER_PARSED_COUNT}] at [${text:${offset}}]" setglobal PARSER_PARSED = $(PARSER_PARSED) setglobal PARSER_PARSED_COUNT = $(PARSER_PARSED_COUNT) return 0 } elif parser::parse::sexp $(text) $(offset) { #echo "Parsed sexp substring [length=${PARSER_PARSED_COUNT}] at [${text:${offset}}]" setglobal PARSER_PARSED = $(PARSER_PARSED) setglobal PARSER_PARSED_COUNT = $(PARSER_PARSED_COUNT) return 0 } # stderr "Unable to parse string at position ${offset}: #${text:${offset}}" return 1 } proc parser::parse::atom { if [[ ${PARSER_DEBUG} == 1 ]] { stderr "$(FUNCNAME) $(@)" ; } if parser::parse::real $(text) $(offset) { #echo "Parsed real substring [length=${PARSER_PARSED_COUNT}] at [${text:${offset}}]" setglobal PARSER_PARSED = $(PARSER_PARSED) setglobal PARSER_PARSED_COUNT = $(PARSER_PARSED_COUNT) return 0 } elif parser::parse::integer $(text) $(offset) { #echo "Parsed integer substring [length=${PARSER_PARSED_COUNT}] at [${text:${offset}}]" setglobal PARSER_PARSED = $(PARSER_PARSED) setglobal PARSER_PARSED_COUNT = $(PARSER_PARSED_COUNT) return 0 } elif parser::parse::identifier $(text) $(offset) { #echo "Parsed identifier substring [length=${PARSER_PARSED_COUNT}] at [${text:${offset}}]" setglobal PARSER_PARSED = $(PARSER_PARSED) setglobal PARSER_PARSED_COUNT = $(PARSER_PARSED_COUNT) return 0 } elif parser::parse::string $(text) $(offset) { #echo "Parsed string substring [length=${PARSER_PARSED_COUNT}] at [${text:${offset}}]" setglobal PARSER_PARSED = $(PARSER_PARSED) setglobal PARSER_PARSED_COUNT = $(PARSER_PARSED_COUNT) return 0 } return 1 } proc parser::parse::real { if [[ ${PARSER_DEBUG} == 1 ]] { stderr "$(FUNCNAME) $(@)" ; } if [[ ${PARSER_DEBUG} == 1 ]] { stderr "Trying to parse real from: $(text:${offset})" ; } return 1 } # # INTEGER # declare -g PARSER_INTEGER_REGEX='\(-\?[1-9][0-9]*\)' declare -g PARSER_INTEGER_0_REGEX='\(0\)' proc parser::parse::integer { if [[ ${PARSER_DEBUG} == 1 ]] { stderr "$(FUNCNAME) $(@)" ; } declare text="$(1)" declare offset="$(2-0)" declare subtext="$(1:${offset})" if [[ ${PARSER_DEBUG} == 1 ]] { stderr "Trying to parse integer from: $(text:${offset})" ; } declare value ; setglobal value = $[expr match $(subtext) $PARSER_INTEGER_REGEX] if [[ $? == 0 ]] { variable::new Integer $(value) setglobal PARSER_PARSED = $(RESULT) setglobal PARSER_PARSED_COUNT = $(#value) return 0 } if [[ "${subtext:0:1}" == "0" ]] { variable::new Integer "0" setglobal PARSER_PARSED = $(RESULT) setglobal PARSER_PARSED_COUNT = '1' return 0 } return 1 } # # Identifier # declare -g PARSER_IDENTIFIER_REGEX='\([a-zA-Z!?*+<=>_:-][a-zA-Z0-9!?*+<=>_:-]*\)' proc parser::parse::identifier { if [[ ${PARSER_DEBUG} == 1 ]] { stderr "$(FUNCNAME) $(@)" ; } declare text="$(1)" declare offset="$(2-0)" declare subtext="$(1:${offset})" if [[ ${PARSER_DEBUG} == 1 ]] { stderr "Trying to parse identifier from: $(subtext)" ; } declare value ; setglobal value = $[expr match $(subtext) $PARSER_IDENTIFIER_REGEX] if [[ $? == 0 ]] { variable::new Identifier $(value) setglobal PARSER_PARSED = $(RESULT) setglobal PARSER_PARSED_COUNT = $(#value) return 0 } return 1 } # # STRING # declare -g PARSER_STRING_REGEX='\([^"]*\)' proc parser::parse::string { if [[ ${PARSER_DEBUG} == 1 ]] { stderr "$(FUNCNAME) $(@)" ; } declare text="$(1)" declare offset="$(2-0)" declare subtext="$(1:${offset})" if [[ ${PARSER_DEBUG} == 1 ]] { stderr "Trying to parse string from: $(subtext)" ; } if [[ "${subtext:0:1}" != "\"" ]] { return 1 } declare value ; setglobal value = $[expr match $(subtext:1) $PARSER_STRING_REGEX] if [[ $? != 0 ]] { return 1 } # TODO: This should be checking the last value declare endIndex sh-expr ' endIndex = 1 + $(expr length "${value}") ' if [[ "${subtext:${endIndex}:1}" != "\"" ]] { return 1 } variable::new String $(value) setglobal PARSER_PARSED = $(RESULT) sh-expr ' PARSER_PARSED_COUNT = $(expr length "${value}") + 2 ' return 0 } # # sexp # proc parser::parse::sexp { if [[ ${PARSER_DEBUG} == 1 ]] { stderr "$(FUNCNAME) $(@)" ; } declare text="$(1)" declare originalOffset="$(2-0)" declare offset=$originalOffset if [[ ${PARSER_DEBUG} == 1 ]] { stderr "Trying to parse sexp from: $(text:${offset})" ; } if [[ "${text:${offset}:1}" != "(" ]] { setglobal PARSER_PARSED = ''"" setglobal PARSER_PARSED_COUNT = '0' return 1 } sh-expr ' offset += 1 ' # ( # w? ) # w? p # w? ) # w p # w? ) # w p # Parse a list of items variable::LinkedList::new ; declare items="$(RESULT)" # Prune any beginning whitespace if parser::parse::whitespace $text $(offset) { sh-expr ' offset += ${PARSER_PARSED_COUNT} ' } # empty list if [[ "${text:${offset}:1}" == ")" ]] { sh-expr ' offset += 1 ' setglobal PARSER_PARSED = $(items) sh-expr ' PARSER_PARSED_COUNT = offset - originalOffset ' return 0 } # first item in list if parser::parse $(text) $(offset) { variable::LinkedList::append $items $(PARSER_PARSED) sh-expr ' offset += ${PARSER_PARSED_COUNT} ' } # From now on every item is either # + # + # + while true { if parser::parse::whitespace $text $(offset) { # can be close paren or parsed sh-expr ' offset += ${PARSER_PARSED_COUNT} ' if [[ "${text:${offset}:1}" == ")" ]] { sh-expr ' offset += 1 ' setglobal PARSER_PARSED = $(items) sh-expr ' PARSER_PARSED_COUNT = offset - originalOffset ' return 0 } elif parser::parse $(text) $(offset) { variable::LinkedList::append $items $(PARSER_PARSED) sh-expr ' offset += ${PARSER_PARSED_COUNT} ' } else { setglobal PARSER_PARSED = ''"" setglobal PARSER_PARSED_COUNT = '0' return 1 } } else { # can only be close paren if [[ "${text:${offset}:1}" == ")" ]] { sh-expr ' offset += 1 ' setglobal PARSER_PARSED = $(items) sh-expr ' PARSER_PARSED_COUNT = offset - originalOffset ' return 0 } else { setglobal PARSER_PARSED = ''"" setglobal PARSER_PARSED_COUNT = '0' return 1 } } } stderr "Should never get here" exit 1 } # # WHITESPACE # declare -g PARSER_WHITESPACE_REGEX='\([ ][ ]*\)' proc parser::parse::whitespace { if [[ ${PARSER_DEBUG} == 1 ]] { stderr "$(FUNCNAME) $(@)" ; } declare text="$(1)" declare originalOffset="$(2-0)" if [[ ${PARSER_DEBUG} == 1 ]] { stderr "Trying to parse whitespace from: $(text:${originalOffset})" ; } declare offset=$(originalOffset) declare char="$(text:${offset}:1)" declare parsed= { sh-expr ' offset += 1 ' setglobal parsed = $(char) setglobal char = $(text:${offset}:1) } if [[ $offset -gt $originalOffset ]] { setglobal PARSER_PARSED = $(parsed) sh-expr ' PARSER_PARSED_COUNT = offset - originalOffset ' return 0 } else { setglobal PARSER_PARSED = ''"" setglobal PARSER_PARSED_COUNT = '0' return 1 } } # # ====================================================== if test $0 != $BASH_SOURCE { return } source ${BASH_SOURCE%/*}/test.sh # # Integer # parser::parse "1" ; \ assert::equals 0 $Status "parse 1 succeeds" variable::debug $(RESULT) ; \ assert::equals "Integer :: 1" $(RESULT) "parse 1" assert::equals 1 $(PARSER_PARSED_COUNT) "parse 1 / count" parser::parse "123456" ; \ assert::equals 0 $Status "parse 123456 succeeds" variable::debug $(RESULT) ; \ assert::equals "Integer :: 123456" $(RESULT) "parse 123456" assert::equals 6 $(PARSER_PARSED_COUNT) "parse 123456 / count" parser::parse "0" ; \ assert::equals 0 $Status "parse 0 succeeds" variable::debug $(RESULT) ; \ assert::equals "Integer :: 0" $(RESULT) "parse 0" assert::equals 1 $(PARSER_PARSED_COUNT) "parse 0 / count" parser::parse "-10" ; \ assert::equals 0 $Status "parse -10 succeeds" variable::debug $(RESULT) ; \ assert::equals "Integer :: -10" $(RESULT) "parse -10" assert::equals 3 $(PARSER_PARSED_COUNT) "parse 1 / count" setglobal output = $[parser::parse "-0] assert::equals 1 $Status "parse -0 should fail" # # Identifier # parser::parse "v" ; \ assert::equals 0 $Status "parse \"abc\" succeeds" variable::debug $(RESULT) ; \ assert::equals "Identifier :: v" $(RESULT) "parse v" parser::parse "a?" ; \ assert::equals 0 $Status "parse \"abc\" succeeds" variable::debug $(RESULT) ; \ assert::equals "Identifier :: a?" $(RESULT) "parse a?" # # STRING # parser::parse '"abc"' ; \ assert::equals 0 $Status "parse \"abc\" succeeds" variable::debug $(RESULT) ; \ assert::equals "String :: abc" $(RESULT) "parse \"abc\"" assert::equals 5 $(PARSER_PARSED_COUNT) "parse \"abc\" / count" # # WHITESPACE # parser::parse::whitespace ' ' ; \ assert::equals 0 $Status "match whitespace / code" assert::equals 3 $(PARSER_PARSED_COUNT) "match whitepace / count" parser::parse::whitespace 'abc def' ; \ assert::equals 1 $Status "match non-whitespace / code" parser::parse::whitespace '' ; \ assert::equals 1 $Status "match empty non-whitespace / code" parser::parse::whitespace ')' ; \ assert::equals 1 $Status "match close paren against whitespace / code" # # Multi-Expr # setglobal TEST = '"multiExpression - single expr'" parser::parse::multiExpression "a" ; assert::equals 0 $Status "$(TEST) / code" assert::equals 1 $(PARSER_PARSED_COUNT) "$(TEST) / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "$(TEST) / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 1 $(RESULT) "$(TEST) / length" variable::LinkedList::first $(PARSER_PARSED) ; \ variable::value $(RESULT) ; \ assert::equals "a" $(RESULT) "$(TEST) / value" setglobal TEST = '"multiExpression - multiple expressions'" parser::parse::multiExpression "a b" ; assert::equals 0 $Status "$(TEST) / code" assert::equals 3 $(PARSER_PARSED_COUNT) "$(TEST) / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "$(TEST) / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 2 $(RESULT) "$(TEST) / length" setglobal TEST = '"multiExpression - whitespaces'" parser::parse::multiExpression " b " ; assert::equals 0 $Status "$(TEST) / code" assert::equals 3 $(PARSER_PARSED_COUNT) "$(TEST) / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "$(TEST) / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 1 $(RESULT) "$(TEST) / length" setglobal TEST = '"multiExpression - integer'" parser::parse::multiExpression "1" ; assert::equals 0 $Status "$(TEST) / code" assert::equals 1 $(PARSER_PARSED_COUNT) "$(TEST) / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "$(TEST) / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 1 $(RESULT) "$(TEST) / length" setglobal TEST = '"multiExpression - sexp'" parser::parse::multiExpression "(a)" ; assert::equals 0 $Status "$(TEST) / code" assert::equals 3 $(PARSER_PARSED_COUNT) "$(TEST) / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "$(TEST) / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 1 $(RESULT) "$(TEST) / length" setglobal TEST = '"multiExpression - sexps'" parser::parse::multiExpression "(a) (b)" ; assert::equals 0 $Status "$(TEST) / code" assert::equals 7 $(PARSER_PARSED_COUNT) "$(TEST) / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "$(TEST) / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 2 $(RESULT) "$(TEST) / length" # # SEXP # parser::parse "()" ; assert::equals 0 $Status "match empty sexp / code" assert::equals 2 $(PARSER_PARSED_COUNT) "match empty sexp / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "match empty sexp / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 0 $(RESULT) "match empty sexp / length" parser::parse "( )" ; assert::equals 0 $Status "match almost empty sexp / code" assert::equals 3 $(PARSER_PARSED_COUNT) "match almost empty sexp / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "match almost empty sexp / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 0 $(RESULT) "match almost empty sexp / length" parser::parse "(a)" ; assert::equals 0 $Status "single element sexp / code" assert::equals 3 $(PARSER_PARSED_COUNT) "single element sexp / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "single element sexp / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 1 $(RESULT) "single element sexp / length" parser::parse "( a )" ; assert::equals 0 $Status "single element sexp / code" assert::equals 5 $(PARSER_PARSED_COUNT) "single element sexp / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "single element sexp / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 1 $(RESULT) "single element sexp / length" parser::parse "(a b)" ; assert::equals 0 $Status "two element sexp / code" assert::equals 5 $(PARSER_PARSED_COUNT) "two element sexp / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "two element sexp / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 2 $(RESULT) "two element sexp / length" parser::parse "((a) (b) c)" ; assert::equals 0 $Status "nested element sexp / code" assert::equals 11 $(PARSER_PARSED_COUNT) "nested element sexp / count" variable::type $(PARSER_PARSED) ; assert::equals "LinkedList" $(RESULT) "nested element sexp / type" variable::LinkedList::length $(PARSER_PARSED) ; assert::equals 3 $(RESULT) "nested element sexp / length" assert::report if test $(1+isset) && test $1 == "debug" { variable::printMetadata }