1 #!/usr/bin/env bash
2 #
3 # Only bash and zsh seem to implement [[ foo =~ '' ]]
4 #
5 # ^(a b)$ is a regex that should match 'a b' in a group.
6 #
7 # Not sure what bash is doing here... I think I have to just be empirical.
8 # Might need "compat" switch for parsing the regex. It should be an opaque
9 # string like zsh, not sure why it isn't.
10 #
11 # I think this is just papering over bugs...
12 # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
13 #
14 # Storing the regular expression in a shell variable is often a useful way to
15 # avoid problems with quoting characters that are special to the shell. It is
16 # sometimes difficult to specify a regular expression literally without using
17 # quotes, or to keep track of the quoting used by regular expressions while
18 # paying attention to the shell’s quote removal. Using a shell variable to
19 # store the pattern decreases these problems. For example, the following is
20 # equivalent to the above:
21 #
22 # pattern='[[:space:]]*(a)?b'
23 # [[ $line =~ $pattern ]]
24 #
25 # If you want to match a character that’s special to the regular expression
26 # grammar, it has to be quoted to remove its special meaning. This means that in
27 # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
28 # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
29 # literal ‘.’. Shell programmers should take special care with backslashes, since
30 # backslashes are used both by the shell and regular expressions to remove the
31 # special meaning from the following character. The following two sets of
32 # commands are not equivalent:
33 #
34 # From bash code: ( | ) are treated special. Normally they must be quoted, but
35 # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
36
37 #### BASH_REMATCH
38 [[ foo123 =~ ([a-z]+)([0-9]+) ]]
39 argv.py "${BASH_REMATCH[@]}"
40 ## STDOUT:
41 ['foo123', 'foo', '123']
42 ## END
43 ## N-I zsh STDOUT:
44 ['']
45 ## END
46
47 #### Match is unanchored at both ends
48 [[ 'bar' =~ a ]] && echo true
49 ## stdout: true
50
51 #### Failed match
52 [[ 'bar' =~ X ]] && echo true
53 ## status: 1
54 ## stdout-json: ""
55
56 #### Regex quoted with \ -- preferred in bash
57 [[ 'a b' =~ ^(a\ b)$ ]] && echo true
58 ## stdout: true
59
60 #### Regex quoted with single quotes
61 # bash doesn't like the quotes
62 [[ 'a b' =~ '^(a b)$' ]] && echo true
63 ## stdout-json: ""
64 ## status: 1
65 ## OK zsh stdout: true
66 ## OK zsh status: 0
67
68 #### Regex quoted with double quotes
69 # bash doesn't like the quotes
70 [[ 'a b' =~ "^(a b)$" ]] && echo true
71 ## stdout-json: ""
72 ## status: 1
73 ## OK zsh stdout: true
74 ## OK zsh status: 0
75
76 #### Fix single quotes by storing in variable
77 pat='^(a b)$'
78 [[ 'a b' =~ $pat ]] && echo true
79 ## stdout: true
80
81 #### Fix single quotes by storing in variable
82 pat="^(a b)$"
83 [[ 'a b' =~ $pat ]] && echo true
84 ## stdout: true
85
86 #### Double quoting pat variable -- again bash doesn't like it.
87 pat="^(a b)$"
88 [[ 'a b' =~ "$pat" ]] && echo true
89 ## stdout-json: ""
90 ## status: 1
91 ## OK zsh stdout: true
92 ## OK zsh status: 0
93
94 #### Regex with == and not =~ is parse error, different lexer mode required
95 # They both give a syntax error. This is lame.
96 [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
97 ## status: 2
98 ## OK zsh status: 1
99
100 #### Omitting ( )
101 [[ '^a b$' == ^a\ b$ ]] && echo true
102 ## stdout: true
103
104 #### Malformed regex
105 # Are they trying to PARSE the regex? Do they feed the buffer directly to
106 # regcomp()?
107 [[ 'a b' =~ ^)a\ b($ ]] && echo true
108 ## status: 2
109 ## OK zsh status: 1
110
111 #### Regex with char class containing space
112 # For some reason it doesn't work without parens?
113 [[ 'ba ba ' =~ ([a b]+) ]] && echo true
114 ## stdout: true
115
116 #### Operators and space lose meaning inside ()
117 [[ '< >' =~ (< >) ]] && echo true
118 ## stdout: true
119 ## N-I zsh stdout-json: ""
120 ## N-I zsh status: 1
121
122 #### Regex with |
123 [[ 'bar' =~ foo|bar ]] && echo true
124 ## stdout: true
125 ## N-I zsh stdout-json: ""
126 ## N-I zsh status: 1
127
128 #### Unquoted { is parse error in bash/zsh
129 [[ { =~ { ]] && echo true
130 echo status=$?
131 ## STDOUT:
132 status=2
133 ## END
134 ## N-I zsh STDOUT:
135 status=1
136 ## END
137
138 #### Quoted {
139 [[ { =~ "{" ]] && echo true
140 echo status=$?
141 ## STDOUT:
142 true
143 status=0
144 ## END
145 ## N-I zsh STDOUT:
146 status=1
147 ## END
148
149 #### Escaped {
150 # from bash-completion
151 [[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
152 ## STDOUT:
153 ['$PA', '$', 'PA']
154 ## END
155 ## BUG zsh stdout-json: ""
156 ## BUG zsh status: 1
157
158 #### Escaped { stored in variable first
159 # from bash-completion
160 pat='^(\$\{?)([A-Za-z0-9_]*)$'
161 [[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
162 ## STDOUT:
163 ['$PA', '$', 'PA']
164 ## END
165 ## BUG zsh STDOUT:
166 ['']
167 ## END
168
169 #### regex with ?
170 [[ 'c' =~ c? ]] && echo true
171 [[ '' =~ c? ]] && echo true
172 ## STDOUT:
173 true
174 true
175 ## END