1 #!/usr/bin/env bash
2 #
3 # Only bash and zsh seem to implement [[ foo =~ '' ]]
4 #
5 # ^(a b)$ is a regex that should match 'a b' in a group.
6 #
7 # Not sure what bash is doing here... I think I have to just be empirical.
8 # Might need "compat" switch for parsing the regex. It should be an opaque
9 # string like zsh, not sure why it isn't.
10 #
11 # I think this is just papering over bugs...
12 # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
13 #
14 # Storing the regular expression in a shell variable is often a useful way to
15 # avoid problems with quoting characters that are special to the shell. It is
16 # sometimes difficult to specify a regular expression literally without using
17 # quotes, or to keep track of the quoting used by regular expressions while
18 # paying attention to the shell’s quote removal. Using a shell variable to
19 # store the pattern decreases these problems. For example, the following is
20 # equivalent to the above:
21 #
22 # pattern='[[:space:]]*(a)?b'
23 # [[ $line =~ $pattern ]]
24 #
25 # If you want to match a character that’s special to the regular expression
26 # grammar, it has to be quoted to remove its special meaning. This means that in
27 # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
28 # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
29 # literal ‘.’. Shell programmers should take special care with backslashes, since
30 # backslashes are used both by the shell and regular expressions to remove the
31 # special meaning from the following character. The following two sets of
32 # commands are not equivalent:
33 #
34 # From bash code: ( | ) are treated special. Normally they must be quoted, but
35 # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
36
37 #### BASH_REMATCH
38 [[ foo123 =~ ([a-z]+)([0-9]+) ]]
39 argv.py "${BASH_REMATCH[@]}"
40 ## STDOUT:
41 ['foo123', 'foo', '123']
42 ## END
43 ## N-I zsh STDOUT:
44 ['']
45 ## END
46
47 #### Match is unanchored at both ends
48 [[ 'bar' =~ a ]] && echo true
49 ## stdout: true
50
51 #### Failed match
52 [[ 'bar' =~ X ]] && echo true
53 ## status: 1
54 ## stdout-json: ""
55
56 #### Regex quoted with \ -- preferred in bash
57 [[ 'a b' =~ ^(a\ b)$ ]] && echo true
58 ## stdout: true
59
60 #### Regex quoted with single quotes
61 # bash doesn't like the quotes
62 [[ 'a b' =~ '^(a b)$' ]] && echo true
63 ## stdout-json: ""
64 ## status: 1
65 ## OK zsh stdout: true
66 ## OK zsh status: 0
67
68 #### Regex quoted with double quotes
69 # bash doesn't like the quotes
70 [[ 'a b' =~ "^(a b)$" ]] && echo true
71 ## stdout-json: ""
72 ## status: 1
73 ## OK zsh stdout: true
74 ## OK zsh status: 0
75
76 #### Fix single quotes by storing in variable
77 pat='^(a b)$'
78 [[ 'a b' =~ $pat ]] && echo true
79 ## stdout: true
80
81 #### Fix single quotes by storing in variable
82 pat="^(a b)$"
83 [[ 'a b' =~ $pat ]] && echo true
84 ## stdout: true
85
86 #### Double quoting pat variable -- again bash doesn't like it.
87 pat="^(a b)$"
88 [[ 'a b' =~ "$pat" ]] && echo true
89 ## stdout-json: ""
90 ## status: 1
91 ## OK zsh stdout: true
92 ## OK zsh status: 0
93
94 #### Mixing quoted and unquoted parts
95 [[ 'a b' =~ 'a 'b ]] && echo true
96 [[ "a b" =~ "a "'b' ]] && echo true
97 ## STDOUT:
98 true
99 true
100 ## END
101
102 #### Regex with == and not =~ is parse error, different lexer mode required
103 # They both give a syntax error. This is lame.
104 [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
105 ## status: 2
106 ## OK zsh status: 1
107
108 #### Omitting ( )
109 [[ '^a b$' == ^a\ b$ ]] && echo true
110 ## stdout: true
111
112 #### Malformed regex
113 # Are they trying to PARSE the regex? Do they feed the buffer directly to
114 # regcomp()?
115 [[ 'a b' =~ ^)a\ b($ ]] && echo true
116 ## status: 2
117 ## OK zsh status: 1
118
119 #### Regex with char class containing space
120 # For some reason it doesn't work without parens?
121 [[ 'ba ba ' =~ ([a b]+) ]] && echo true
122 ## stdout: true
123
124 #### Operators and space lose meaning inside ()
125 [[ '< >' =~ (< >) ]] && echo true
126 ## stdout: true
127 ## N-I zsh stdout-json: ""
128 ## N-I zsh status: 1
129
130 #### Regex with |
131 [[ 'bar' =~ foo|bar ]] && echo true
132 ## stdout: true
133 ## N-I zsh stdout-json: ""
134 ## N-I zsh status: 1
135
136 #### Regex to match literal brackets []
137
138 # bash-completion relies on this, so we're making it match bash.
139 # zsh understandably differs.
140 [[ '[]' =~ \[\] ]] && echo true
141
142 # Another way to write this.
143 pat='\[\]'
144 [[ '[]' =~ $pat ]] && echo true
145 ## STDOUT:
146 true
147 true
148 ## END
149 ## OK zsh STDOUT:
150 true
151 ## END
152
153 #### Regex to match literals . ^ $ etc.
154 [[ 'x' =~ \. ]] || echo false
155 [[ '.' =~ \. ]] && echo true
156
157 [[ 'xx' =~ \^\$ ]] || echo false
158 [[ '^$' =~ \^\$ ]] && echo true
159
160 [[ 'xxx' =~ \+\*\? ]] || echo false
161 [[ '*+?' =~ \*\+\? ]] && echo true
162
163 [[ 'xx' =~ \{\} ]] || echo false
164 [[ '{}' =~ \{\} ]] && echo true
165 ## STDOUT:
166 false
167 true
168 false
169 true
170 false
171 true
172 false
173 true
174 ## END
175 ## BUG zsh STDOUT:
176 true
177 false
178 false
179 false
180 ## END
181 ## BUG zsh status: 1
182
183 #### Unquoted { is a regex parse error
184 [[ { =~ { ]] && echo true
185 echo status=$?
186 ## stdout-json: ""
187 ## status: 2
188 ## BUG bash stdout-json: "status=2\n"
189 ## BUG bash status: 0
190 ## BUG zsh stdout-json: "status=1\n"
191 ## BUG zsh status: 0
192
193 #### Fatal error inside [[ =~ ]]
194
195 # zsh and osh are stricter than bash. bash treats [[ like a command.
196
197 [[ a =~ $(( 1 / 0 )) ]]
198 echo status=$?
199 ## stdout-json: ""
200 ## status: 1
201 ## BUG bash stdout: status=1
202 ## BUG bash status: 0
203
204 #### Quoted { and +
205 [[ { =~ "{" ]] && echo 'yes {'
206 [[ + =~ "+" ]] && echo 'yes +'
207 [[ * =~ "*" ]] && echo 'yes *'
208 [[ ? =~ "?" ]] && echo 'yes ?'
209 [[ ^ =~ "^" ]] && echo 'yes ^'
210 [[ $ =~ "$" ]] && echo 'yes $'
211 [[ '(' =~ '(' ]] && echo 'yes ('
212 [[ ')' =~ ')' ]] && echo 'yes )'
213 [[ '|' =~ '|' ]] && echo 'yes |'
214 [[ '\' =~ '\' ]] && echo 'yes \'
215 echo ---
216
217 [[ . =~ "." ]] && echo 'yes .'
218 [[ z =~ "." ]] || echo 'no .'
219 echo ---
220
221 # This rule is weird but all shells agree. I would expect that the - gets
222 # escaped? It's an operator? but it behaves like a-z.
223 [[ a =~ ["a-z"] ]]; echo "a $?"
224 [[ - =~ ["a-z"] ]]; echo "- $?"
225 [[ b =~ ['a-z'] ]]; echo "b $?"
226 [[ z =~ ['a-z'] ]]; echo "z $?"
227
228 echo status=$?
229 ## STDOUT:
230 yes {
231 yes +
232 yes *
233 yes ?
234 yes ^
235 yes $
236 yes (
237 yes )
238 yes |
239 yes \
240 ---
241 yes .
242 no .
243 ---
244 a 0
245 - 1
246 b 0
247 z 0
248 status=0
249 ## END
250 ## N-I zsh STDOUT:
251 yes ^
252 yes $
253 yes )
254 yes |
255 ---
256 yes .
257 ---
258 a 0
259 - 1
260 b 0
261 z 0
262 status=0
263 ## END
264
265 #### Escaped {
266 # from bash-completion
267 [[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
268 ## STDOUT:
269 ['$PA', '$', 'PA']
270 ## END
271 ## BUG zsh stdout-json: ""
272 ## BUG zsh status: 1
273
274 #### Escaped { stored in variable first
275 # from bash-completion
276 pat='^(\$\{?)([A-Za-z0-9_]*)$'
277 [[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
278 ## STDOUT:
279 ['$PA', '$', 'PA']
280 ## END
281 ## BUG zsh STDOUT:
282 ['']
283 ## END
284
285 #### regex with ?
286 [[ 'c' =~ c? ]] && echo true
287 [[ '' =~ c? ]] && echo true
288 ## STDOUT:
289 true
290 true
291 ## END
292
293 #### regex with unprintable characters
294 # can't have nul byte
295
296 # This pattern has literal characters
297 pat=$'^[\x01\x02]+$'
298
299 [[ $'\x01\x02\x01' =~ $pat ]]; echo status=$?
300 [[ $'a\x01' =~ $pat ]]; echo status=$?
301
302 # NOTE: There doesn't appear to be any way to escape these!
303 pat2='^[\x01\x02]+$'
304
305 ## STDOUT:
306 status=0
307 status=1
308 ## END