| 1 | #!/usr/bin/env bash |
| 2 | # |
| 3 | # Only bash and zsh seem to implement [[ foo =~ '' ]] |
| 4 | # |
| 5 | # ^(a b)$ is a regex that should match 'a b' in a group. |
| 6 | # |
| 7 | # Not sure what bash is doing here... I think I have to just be empirical. |
| 8 | # Might need "compat" switch for parsing the regex. It should be an opaque |
| 9 | # string like zsh, not sure why it isn't. |
| 10 | # |
| 11 | # I think this is just papering over bugs... |
| 12 | # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs |
| 13 | # |
| 14 | # Storing the regular expression in a shell variable is often a useful way to |
| 15 | # avoid problems with quoting characters that are special to the shell. It is |
| 16 | # sometimes difficult to specify a regular expression literally without using |
| 17 | # quotes, or to keep track of the quoting used by regular expressions while |
| 18 | # paying attention to the shell’s quote removal. Using a shell variable to |
| 19 | # store the pattern decreases these problems. For example, the following is |
| 20 | # equivalent to the above: |
| 21 | # |
| 22 | # pattern='[[:space:]]*(a)?b' |
| 23 | # [[ $line =~ $pattern ]] |
| 24 | # |
| 25 | # If you want to match a character that’s special to the regular expression |
| 26 | # grammar, it has to be quoted to remove its special meaning. This means that in |
| 27 | # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual |
| 28 | # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a |
| 29 | # literal ‘.’. Shell programmers should take special care with backslashes, since |
| 30 | # backslashes are used both by the shell and regular expressions to remove the |
| 31 | # special meaning from the following character. The following two sets of |
| 32 | # commands are not equivalent: |
| 33 | # |
| 34 | # From bash code: ( | ) are treated special. Normally they must be quoted, but |
| 35 | # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted! |
| 36 | |
| 37 | #### BASH_REMATCH |
| 38 | [[ foo123 =~ ([a-z]+)([0-9]+) ]] |
| 39 | argv.py "${BASH_REMATCH[@]}" |
| 40 | ## STDOUT: |
| 41 | ['foo123', 'foo', '123'] |
| 42 | ## END |
| 43 | ## N-I zsh STDOUT: |
| 44 | [''] |
| 45 | ## END |
| 46 | |
| 47 | #### Match is unanchored at both ends |
| 48 | [[ 'bar' =~ a ]] && echo true |
| 49 | ## stdout: true |
| 50 | |
| 51 | #### Failed match |
| 52 | [[ 'bar' =~ X ]] && echo true |
| 53 | ## status: 1 |
| 54 | ## stdout-json: "" |
| 55 | |
| 56 | #### Regex quoted with \ -- preferred in bash |
| 57 | [[ 'a b' =~ ^(a\ b)$ ]] && echo true |
| 58 | ## stdout: true |
| 59 | |
| 60 | #### Regex quoted with single quotes |
| 61 | # bash doesn't like the quotes |
| 62 | [[ 'a b' =~ '^(a b)$' ]] && echo true |
| 63 | ## stdout-json: "" |
| 64 | ## status: 1 |
| 65 | ## OK zsh stdout: true |
| 66 | ## OK zsh status: 0 |
| 67 | |
| 68 | #### Regex quoted with double quotes |
| 69 | # bash doesn't like the quotes |
| 70 | [[ 'a b' =~ "^(a b)$" ]] && echo true |
| 71 | ## stdout-json: "" |
| 72 | ## status: 1 |
| 73 | ## OK zsh stdout: true |
| 74 | ## OK zsh status: 0 |
| 75 | |
| 76 | #### Fix single quotes by storing in variable |
| 77 | pat='^(a b)$' |
| 78 | [[ 'a b' =~ $pat ]] && echo true |
| 79 | ## stdout: true |
| 80 | |
| 81 | #### Fix single quotes by storing in variable |
| 82 | pat="^(a b)$" |
| 83 | [[ 'a b' =~ $pat ]] && echo true |
| 84 | ## stdout: true |
| 85 | |
| 86 | #### Double quoting pat variable -- again bash doesn't like it. |
| 87 | pat="^(a b)$" |
| 88 | [[ 'a b' =~ "$pat" ]] && echo true |
| 89 | ## stdout-json: "" |
| 90 | ## status: 1 |
| 91 | ## OK zsh stdout: true |
| 92 | ## OK zsh status: 0 |
| 93 | |
| 94 | #### Mixing quoted and unquoted parts |
| 95 | [[ 'a b' =~ 'a 'b ]] && echo true |
| 96 | [[ "a b" =~ "a "'b' ]] && echo true |
| 97 | ## STDOUT: |
| 98 | true |
| 99 | true |
| 100 | ## END |
| 101 | |
| 102 | #### Regex with == and not =~ is parse error, different lexer mode required |
| 103 | # They both give a syntax error. This is lame. |
| 104 | [[ '^(a b)$' == ^(a\ b)$ ]] && echo true |
| 105 | ## status: 2 |
| 106 | ## OK zsh status: 1 |
| 107 | |
| 108 | #### Omitting ( ) |
| 109 | [[ '^a b$' == ^a\ b$ ]] && echo true |
| 110 | ## stdout: true |
| 111 | |
| 112 | #### Malformed regex |
| 113 | # Are they trying to PARSE the regex? Do they feed the buffer directly to |
| 114 | # regcomp()? |
| 115 | [[ 'a b' =~ ^)a\ b($ ]] && echo true |
| 116 | ## status: 2 |
| 117 | ## OK zsh status: 1 |
| 118 | |
| 119 | #### Regex with char class containing space |
| 120 | # For some reason it doesn't work without parens? |
| 121 | [[ 'ba ba ' =~ ([a b]+) ]] && echo true |
| 122 | ## stdout: true |
| 123 | |
| 124 | #### Operators and space lose meaning inside () |
| 125 | [[ '< >' =~ (< >) ]] && echo true |
| 126 | ## stdout: true |
| 127 | ## N-I zsh stdout-json: "" |
| 128 | ## N-I zsh status: 1 |
| 129 | |
| 130 | #### Regex with | |
| 131 | [[ 'bar' =~ foo|bar ]] && echo true |
| 132 | ## stdout: true |
| 133 | ## N-I zsh stdout-json: "" |
| 134 | ## N-I zsh status: 1 |
| 135 | |
| 136 | #### Regex to match literal brackets [] |
| 137 | |
| 138 | # bash-completion relies on this, so we're making it match bash. |
| 139 | # zsh understandably differs. |
| 140 | [[ '[]' =~ \[\] ]] && echo true |
| 141 | |
| 142 | # Another way to write this. |
| 143 | pat='\[\]' |
| 144 | [[ '[]' =~ $pat ]] && echo true |
| 145 | ## STDOUT: |
| 146 | true |
| 147 | true |
| 148 | ## END |
| 149 | ## OK zsh STDOUT: |
| 150 | true |
| 151 | ## END |
| 152 | |
| 153 | #### Regex to match literals . ^ $ etc. |
| 154 | [[ 'x' =~ \. ]] || echo false |
| 155 | [[ '.' =~ \. ]] && echo true |
| 156 | |
| 157 | [[ 'xx' =~ \^\$ ]] || echo false |
| 158 | [[ '^$' =~ \^\$ ]] && echo true |
| 159 | |
| 160 | [[ 'xxx' =~ \+\*\? ]] || echo false |
| 161 | [[ '*+?' =~ \*\+\? ]] && echo true |
| 162 | |
| 163 | [[ 'xx' =~ \{\} ]] || echo false |
| 164 | [[ '{}' =~ \{\} ]] && echo true |
| 165 | ## STDOUT: |
| 166 | false |
| 167 | true |
| 168 | false |
| 169 | true |
| 170 | false |
| 171 | true |
| 172 | false |
| 173 | true |
| 174 | ## END |
| 175 | ## BUG zsh STDOUT: |
| 176 | true |
| 177 | false |
| 178 | false |
| 179 | false |
| 180 | ## END |
| 181 | ## BUG zsh status: 1 |
| 182 | |
| 183 | #### Unquoted { is a regex parse error |
| 184 | [[ { =~ { ]] && echo true |
| 185 | echo status=$? |
| 186 | ## stdout-json: "" |
| 187 | ## status: 2 |
| 188 | ## BUG bash stdout-json: "status=2\n" |
| 189 | ## BUG bash status: 0 |
| 190 | ## BUG zsh stdout-json: "status=1\n" |
| 191 | ## BUG zsh status: 0 |
| 192 | |
| 193 | #### Fatal error inside [[ =~ ]] |
| 194 | |
| 195 | # zsh and osh are stricter than bash. bash treats [[ like a command. |
| 196 | |
| 197 | [[ a =~ $(( 1 / 0 )) ]] |
| 198 | echo status=$? |
| 199 | ## stdout-json: "" |
| 200 | ## status: 1 |
| 201 | ## BUG bash stdout: status=1 |
| 202 | ## BUG bash status: 0 |
| 203 | |
| 204 | #### Quoted { |
| 205 | [[ { =~ "{" ]] && echo true |
| 206 | echo status=$? |
| 207 | ## STDOUT: |
| 208 | true |
| 209 | status=0 |
| 210 | ## END |
| 211 | ## N-I zsh STDOUT: |
| 212 | status=1 |
| 213 | ## END |
| 214 | |
| 215 | #### Escaped { |
| 216 | # from bash-completion |
| 217 | [[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}" |
| 218 | ## STDOUT: |
| 219 | ['$PA', '$', 'PA'] |
| 220 | ## END |
| 221 | ## BUG zsh stdout-json: "" |
| 222 | ## BUG zsh status: 1 |
| 223 | |
| 224 | #### Escaped { stored in variable first |
| 225 | # from bash-completion |
| 226 | pat='^(\$\{?)([A-Za-z0-9_]*)$' |
| 227 | [[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}" |
| 228 | ## STDOUT: |
| 229 | ['$PA', '$', 'PA'] |
| 230 | ## END |
| 231 | ## BUG zsh STDOUT: |
| 232 | [''] |
| 233 | ## END |
| 234 | |
| 235 | #### regex with ? |
| 236 | [[ 'c' =~ c? ]] && echo true |
| 237 | [[ '' =~ c? ]] && echo true |
| 238 | ## STDOUT: |
| 239 | true |
| 240 | true |
| 241 | ## END |