1 #!/usr/bin/env bash
2 #
3 # Only bash and zsh seem to implement [[ foo =~ '' ]]
4 #
5 # ^(a b)$ is a regex that should match 'a b' in a group.
6 #
7 # Not sure what bash is doing here... I think I have to just be empirical.
8 # Might need "compat" switch for parsing the regex. It should be an opaque
9 # string like zsh, not sure why it isn't.
10 #
11 # I think this is just papering over bugs...
12 # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
13 #
14 # Storing the regular expression in a shell variable is often a useful way to
15 # avoid problems with quoting characters that are special to the shell. It is
16 # sometimes difficult to specify a regular expression literally without using
17 # quotes, or to keep track of the quoting used by regular expressions while
18 # paying attention to the shell’s quote removal. Using a shell variable to
19 # store the pattern decreases these problems. For example, the following is
20 # equivalent to the above:
21 #
22 # pattern='[[:space:]]*(a)?b'
23 # [[ $line =~ $pattern ]]
24 #
25 # If you want to match a character that’s special to the regular expression
26 # grammar, it has to be quoted to remove its special meaning. This means that in
27 # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
28 # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
29 # literal ‘.’. Shell programmers should take special care with backslashes, since
30 # backslashes are used both by the shell and regular expressions to remove the
31 # special meaning from the following character. The following two sets of
32 # commands are not equivalent:
33 #
34 # From bash code: ( | ) are treated special. Normally they must be quoted, but
35 # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
36
37 #### BASH_REMATCH
38 [[ foo123 =~ ([a-z]+)([0-9]+) ]]
39 argv.py "${BASH_REMATCH[@]}"
40 ## STDOUT:
41 ['foo123', 'foo', '123']
42 ## END
43 ## N-I zsh STDOUT:
44 ['']
45 ## END
46
47 #### Match is unanchored at both ends
48 [[ 'bar' =~ a ]] && echo true
49 ## stdout: true
50
51 #### Failed match
52 [[ 'bar' =~ X ]] && echo true
53 ## status: 1
54 ## stdout-json: ""
55
56 #### Regex quoted with \ -- preferred in bash
57 [[ 'a b' =~ ^(a\ b)$ ]] && echo true
58 ## stdout: true
59
60 #### Regex quoted with single quotes
61 # bash doesn't like the quotes
62 [[ 'a b' =~ '^(a b)$' ]] && echo true
63 ## stdout-json: ""
64 ## status: 1
65 ## OK zsh stdout: true
66 ## OK zsh status: 0
67
68 #### Regex quoted with double quotes
69 # bash doesn't like the quotes
70 [[ 'a b' =~ "^(a b)$" ]] && echo true
71 ## stdout-json: ""
72 ## status: 1
73 ## OK zsh stdout: true
74 ## OK zsh status: 0
75
76 #### Fix single quotes by storing in variable
77 pat='^(a b)$'
78 [[ 'a b' =~ $pat ]] && echo true
79 ## stdout: true
80
81 #### Fix single quotes by storing in variable
82 pat="^(a b)$"
83 [[ 'a b' =~ $pat ]] && echo true
84 ## stdout: true
85
86 #### Double quoting pat variable -- again bash doesn't like it.
87 pat="^(a b)$"
88 [[ 'a b' =~ "$pat" ]] && echo true
89 ## stdout-json: ""
90 ## status: 1
91 ## OK zsh stdout: true
92 ## OK zsh status: 0
93
94 #### Mixing quoted and unquoted parts
95 [[ 'a b' =~ 'a 'b ]] && echo true
96 [[ "a b" =~ "a "'b' ]] && echo true
97 ## STDOUT:
98 true
99 true
100 ## END
101
102 #### Regex with == and not =~ is parse error, different lexer mode required
103 # They both give a syntax error. This is lame.
104 [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
105 ## status: 2
106 ## OK zsh status: 1
107
108 #### Omitting ( )
109 [[ '^a b$' == ^a\ b$ ]] && echo true
110 ## stdout: true
111
112 #### Malformed regex
113 # Are they trying to PARSE the regex? Do they feed the buffer directly to
114 # regcomp()?
115 [[ 'a b' =~ ^)a\ b($ ]] && echo true
116 ## stdout-json: ""
117 ## status: 2
118 ## OK zsh status: 1
119
120 #### Regex with char class containing space
121 # For some reason it doesn't work without parens?
122 [[ 'ba ba ' =~ ([a b]+) ]] && echo true
123 ## stdout: true
124
125 #### Operators and space lose meaning inside ()
126 [[ '< >' =~ (< >) ]] && echo true
127 ## stdout: true
128 ## N-I zsh stdout-json: ""
129 ## N-I zsh status: 1
130
131 #### Regex with |
132 [[ 'bar' =~ foo|bar ]] && echo true
133 ## stdout: true
134 ## N-I zsh stdout-json: ""
135 ## N-I zsh status: 1
136
137 #### Regex to match literal brackets []
138
139 # bash-completion relies on this, so we're making it match bash.
140 # zsh understandably differs.
141 [[ '[]' =~ \[\] ]] && echo true
142
143 # Another way to write this.
144 pat='\[\]'
145 [[ '[]' =~ $pat ]] && echo true
146 ## STDOUT:
147 true
148 true
149 ## END
150 ## OK zsh STDOUT:
151 true
152 ## END
153
154 #### Regex to match literals . ^ $ etc.
155 [[ 'x' =~ \. ]] || echo false
156 [[ '.' =~ \. ]] && echo true
157
158 [[ 'xx' =~ \^\$ ]] || echo false
159 [[ '^$' =~ \^\$ ]] && echo true
160
161 [[ 'xxx' =~ \+\*\? ]] || echo false
162 [[ '*+?' =~ \*\+\? ]] && echo true
163
164 [[ 'xx' =~ \{\} ]] || echo false
165 [[ '{}' =~ \{\} ]] && echo true
166 ## STDOUT:
167 false
168 true
169 false
170 true
171 false
172 true
173 false
174 true
175 ## END
176 ## BUG zsh STDOUT:
177 true
178 false
179 false
180 false
181 ## END
182 ## BUG zsh status: 1
183
184 #### Unquoted { is a regex parse error
185 [[ { =~ { ]] && echo true
186 echo status=$?
187 ## stdout-json: ""
188 ## status: 2
189 ## BUG bash stdout-json: "status=2\n"
190 ## BUG bash status: 0
191 ## BUG zsh stdout-json: "status=1\n"
192 ## BUG zsh status: 0
193
194 #### Fatal error inside [[ =~ ]]
195
196 # zsh and osh are stricter than bash. bash treats [[ like a command.
197
198 [[ a =~ $(( 1 / 0 )) ]]
199 echo status=$?
200 ## stdout-json: ""
201 ## status: 1
202 ## BUG bash stdout: status=1
203 ## BUG bash status: 0
204
205 #### Quoted { and +
206 [[ { =~ "{" ]] && echo 'yes {'
207 [[ + =~ "+" ]] && echo 'yes +'
208 [[ * =~ "*" ]] && echo 'yes *'
209 [[ ? =~ "?" ]] && echo 'yes ?'
210 [[ ^ =~ "^" ]] && echo 'yes ^'
211 [[ $ =~ "$" ]] && echo 'yes $'
212 [[ '(' =~ '(' ]] && echo 'yes ('
213 [[ ')' =~ ')' ]] && echo 'yes )'
214 [[ '|' =~ '|' ]] && echo 'yes |'
215 [[ '\' =~ '\' ]] && echo 'yes \'
216 echo ---
217
218 [[ . =~ "." ]] && echo 'yes .'
219 [[ z =~ "." ]] || echo 'no .'
220 echo ---
221
222 # This rule is weird but all shells agree. I would expect that the - gets
223 # escaped? It's an operator? but it behaves like a-z.
224 [[ a =~ ["a-z"] ]]; echo "a $?"
225 [[ - =~ ["a-z"] ]]; echo "- $?"
226 [[ b =~ ['a-z'] ]]; echo "b $?"
227 [[ z =~ ['a-z'] ]]; echo "z $?"
228
229 echo status=$?
230 ## STDOUT:
231 yes {
232 yes +
233 yes *
234 yes ?
235 yes ^
236 yes $
237 yes (
238 yes )
239 yes |
240 yes \
241 ---
242 yes .
243 no .
244 ---
245 a 0
246 - 1
247 b 0
248 z 0
249 status=0
250 ## END
251 ## N-I zsh STDOUT:
252 yes ^
253 yes $
254 yes )
255 yes |
256 ---
257 yes .
258 ---
259 a 0
260 - 1
261 b 0
262 z 0
263 status=0
264 ## END
265
266 #### Escaped {
267 # from bash-completion
268 [[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
269 ## STDOUT:
270 ['$PA', '$', 'PA']
271 ## END
272 ## BUG zsh stdout-json: ""
273 ## BUG zsh status: 1
274
275 #### Escaped { stored in variable first
276 # from bash-completion
277 pat='^(\$\{?)([A-Za-z0-9_]*)$'
278 [[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
279 ## STDOUT:
280 ['$PA', '$', 'PA']
281 ## END
282 ## BUG zsh STDOUT:
283 ['']
284 ## END
285
286 #### regex with ?
287 [[ 'c' =~ c? ]] && echo true
288 [[ '' =~ c? ]] && echo true
289 ## STDOUT:
290 true
291 true
292 ## END
293
294 #### regex with unprintable characters
295 # can't have nul byte
296
297 # This pattern has literal characters
298 pat=$'^[\x01\x02]+$'
299
300 [[ $'\x01\x02\x01' =~ $pat ]]; echo status=$?
301 [[ $'a\x01' =~ $pat ]]; echo status=$?
302
303 # NOTE: There doesn't appear to be any way to escape these!
304 pat2='^[\x01\x02]+$'
305
306 ## STDOUT:
307 status=0
308 status=1
309 ## END
310
311 #### pattern $f(x) -- regression
312 f=fff
313 [[ fffx =~ $f(x) ]]
314 echo status=$?
315 [[ ffx =~ $f(x) ]]
316 echo status=$?
317 ## STDOUT:
318 status=0
319 status=1
320 ## END
321
322 #### pattern a=(1)
323 [[ a=x =~ a=(x) ]]
324 echo status=$?
325 [[ =x =~ a=(x) ]]
326 echo status=$?
327 ## STDOUT:
328 status=0
329 status=1
330 ## END
331 ## BUG zsh status: 1
332 ## BUG zsh STDOUT:
333 status=0
334 ## END
335
336 #### pattern @f(x)
337 shopt -s parse_at
338 [[ @fx =~ @f(x) ]]
339 echo status=$?
340 [[ fx =~ @f(x) ]]
341 echo status=$?
342 ## STDOUT:
343 status=0
344 status=1
345 ## END