| 1 | #!/bin/bash |
| 2 | # |
| 3 | # Only bash and zsh seem to implement [[ foo =~ '' ]] |
| 4 | # |
| 5 | # ^(a b)$ is a regex that should match 'a b' in a group. |
| 6 | # |
| 7 | # Not sure what bash is doing here... I think I have to just be empirical. |
| 8 | # Might need "compat" switch for parsing the regex. It should be an opaque |
| 9 | # string like zsh, not sure why it isn't. |
| 10 | # |
| 11 | # I think this is just papering over bugs... |
| 12 | # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs |
| 13 | # |
| 14 | # Storing the regular expression in a shell variable is often a useful way to |
| 15 | # avoid problems with quoting characters that are special to the shell. It is |
| 16 | # sometimes difficult to specify a regular expression literally without using |
| 17 | # quotes, or to keep track of the quoting used by regular expressions while |
| 18 | # paying attention to the shell’s quote removal. Using a shell variable to |
| 19 | # store the pattern decreases these problems. For example, the following is |
| 20 | # equivalent to the above: |
| 21 | # |
| 22 | # pattern='[[:space:]]*(a)?b' |
| 23 | # [[ $line =~ $pattern ]] |
| 24 | # |
| 25 | # If you want to match a character that’s special to the regular expression |
| 26 | # grammar, it has to be quoted to remove its special meaning. This means that in |
| 27 | # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual |
| 28 | # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a |
| 29 | # literal ‘.’. Shell programmers should take special care with backslashes, since |
| 30 | # backslashes are used both by the shell and regular expressions to remove the |
| 31 | # special meaning from the following character. The following two sets of |
| 32 | # commands are not equivalent: |
| 33 | # |
| 34 | # From bash code: ( | ) are treated special. Normally they must be quoted, but |
| 35 | # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted! |
| 36 | |
| 37 | ### Match is unanchored at both ends |
| 38 | [[ 'bar' =~ a ]] && echo true |
| 39 | # stdout: true |
| 40 | |
| 41 | ### Failed match |
| 42 | [[ 'bar' =~ X ]] && echo true |
| 43 | # stdout-json: "" |
| 44 | |
| 45 | ### Regex quoted with \ -- preferred in bash |
| 46 | [[ 'a b' =~ ^(a\ b)$ ]] && echo true |
| 47 | # stdout: true |
| 48 | |
| 49 | ### Regex quoted with single quotes |
| 50 | # bash doesn't like the quotes |
| 51 | [[ 'a b' =~ '^(a b)$' ]] && echo true |
| 52 | # stdout: true |
| 53 | # OK bash stdout-json: "" |
| 54 | |
| 55 | ### Regex quoted with double quotes |
| 56 | # bash doesn't like the quotes |
| 57 | [[ 'a b' =~ "^(a b)$" ]] && echo true |
| 58 | # stdout: true |
| 59 | # OK bash stdout-json: "" |
| 60 | |
| 61 | ### Fix single quotes by storing in variable |
| 62 | pat='^(a b)$' |
| 63 | [[ 'a b' =~ $pat ]] && echo true |
| 64 | # stdout: true |
| 65 | |
| 66 | ### Fix single quotes by storing in variable |
| 67 | pat="^(a b)$" |
| 68 | [[ 'a b' =~ $pat ]] && echo true |
| 69 | # stdout: true |
| 70 | |
| 71 | ### Double quoting pat variable -- again bash doesn't like it. |
| 72 | pat="^(a b)$" |
| 73 | [[ 'a b' =~ "$pat" ]] && echo true |
| 74 | # stdout: true |
| 75 | # OK bash stdout-json: "" |
| 76 | |
| 77 | ### Regex with == and not =~ is parse error, different lexer mode required |
| 78 | # They both give a syntax error. This is lame. |
| 79 | [[ '^(a b)$' == ^(a\ b)$ ]] && echo true |
| 80 | # status: 2 |
| 81 | # OK zsh status: 1 |
| 82 | |
| 83 | ### Omitting ( ) |
| 84 | [[ '^a b$' == ^a\ b$ ]] && echo true |
| 85 | # stdout: true |
| 86 | |
| 87 | ### Malformed regex |
| 88 | # Are they trying to PARSE the regex? Do they feed the buffer directly to |
| 89 | # regcomp()? |
| 90 | [[ 'a b' =~ ^)a\ b($ ]] && echo true |
| 91 | # status: 2 |
| 92 | # OK zsh status: 1 |
| 93 | |
| 94 | ### Regex with char class |
| 95 | # For some reason it doesn't work without parens? |
| 96 | [[ 'ba ba ' =~ ([a b]+) ]] && echo true |
| 97 | # stdout: true |
| 98 | |
| 99 | ### Operators lose meaning in () in regex state (BASH_REGEX_CAHRS) |
| 100 | [[ '< >' =~ (< >) ]] && echo true |
| 101 | # stdout: true |
| 102 | # N-I zsh stdout-json: "" |
| 103 | # N-I zsh status: 1 |
| 104 | |
| 105 | ### Regex with | |
| 106 | [[ 'bar' =~ foo|bar ]] && echo true |
| 107 | # stdout: true |
| 108 | # N-I zsh stdout-json: "" |
| 109 | # N-I zsh status: 1 |