1 |
#!/usr/bin/env bash |
2 |
# |
3 |
# Only bash and zsh seem to implement [[ foo =~ '' ]] |
4 |
# |
5 |
# ^(a b)$ is a regex that should match 'a b' in a group. |
6 |
# |
7 |
# Not sure what bash is doing here... I think I have to just be empirical. |
8 |
# Might need "compat" switch for parsing the regex. It should be an opaque |
9 |
# string like zsh, not sure why it isn't. |
10 |
# |
11 |
# I think this is just papering over bugs... |
12 |
# https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs |
13 |
# |
14 |
# Storing the regular expression in a shell variable is often a useful way to |
15 |
# avoid problems with quoting characters that are special to the shell. It is |
16 |
# sometimes difficult to specify a regular expression literally without using |
17 |
# quotes, or to keep track of the quoting used by regular expressions while |
18 |
# paying attention to the shell’s quote removal. Using a shell variable to |
19 |
# store the pattern decreases these problems. For example, the following is |
20 |
# equivalent to the above: |
21 |
# |
22 |
# pattern='[[:space:]]*(a)?b' |
23 |
# [[ $line =~ $pattern ]] |
24 |
# |
25 |
# If you want to match a character that’s special to the regular expression |
26 |
# grammar, it has to be quoted to remove its special meaning. This means that in |
27 |
# the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual |
28 |
# regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a |
29 |
# literal ‘.’. Shell programmers should take special care with backslashes, since |
30 |
# backslashes are used both by the shell and regular expressions to remove the |
31 |
# special meaning from the following character. The following two sets of |
32 |
# commands are not equivalent: |
33 |
# |
34 |
# From bash code: ( | ) are treated special. Normally they must be quoted, but |
35 |
# they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted! |
36 |
|
37 |
#### BASH_REMATCH |
38 |
[[ foo123 =~ ([a-z]+)([0-9]+) ]] |
39 |
argv.py "${BASH_REMATCH[@]}" |
40 |
## STDOUT: |
41 |
['foo123', 'foo', '123'] |
42 |
## END |
43 |
## N-I zsh STDOUT: |
44 |
[''] |
45 |
## END |
46 |
|
47 |
#### Match is unanchored at both ends |
48 |
[[ 'bar' =~ a ]] && echo true |
49 |
## stdout: true |
50 |
|
51 |
#### Failed match |
52 |
[[ 'bar' =~ X ]] && echo true |
53 |
## status: 1 |
54 |
## stdout-json: "" |
55 |
|
56 |
#### Regex quoted with \ -- preferred in bash |
57 |
[[ 'a b' =~ ^(a\ b)$ ]] && echo true |
58 |
## stdout: true |
59 |
|
60 |
#### Regex quoted with single quotes |
61 |
# bash doesn't like the quotes |
62 |
[[ 'a b' =~ '^(a b)$' ]] && echo true |
63 |
## stdout-json: "" |
64 |
## status: 1 |
65 |
## OK zsh stdout: true |
66 |
## OK zsh status: 0 |
67 |
|
68 |
#### Regex quoted with double quotes |
69 |
# bash doesn't like the quotes |
70 |
[[ 'a b' =~ "^(a b)$" ]] && echo true |
71 |
## stdout-json: "" |
72 |
## status: 1 |
73 |
## OK zsh stdout: true |
74 |
## OK zsh status: 0 |
75 |
|
76 |
#### Fix single quotes by storing in variable |
77 |
pat='^(a b)$' |
78 |
[[ 'a b' =~ $pat ]] && echo true |
79 |
## stdout: true |
80 |
|
81 |
#### Fix single quotes by storing in variable |
82 |
pat="^(a b)$" |
83 |
[[ 'a b' =~ $pat ]] && echo true |
84 |
## stdout: true |
85 |
|
86 |
#### Double quoting pat variable -- again bash doesn't like it. |
87 |
pat="^(a b)$" |
88 |
[[ 'a b' =~ "$pat" ]] && echo true |
89 |
## stdout-json: "" |
90 |
## status: 1 |
91 |
## OK zsh stdout: true |
92 |
## OK zsh status: 0 |
93 |
|
94 |
#### Regex with == and not =~ is parse error, different lexer mode required |
95 |
# They both give a syntax error. This is lame. |
96 |
[[ '^(a b)$' == ^(a\ b)$ ]] && echo true |
97 |
## status: 2 |
98 |
## OK zsh status: 1 |
99 |
|
100 |
#### Omitting ( ) |
101 |
[[ '^a b$' == ^a\ b$ ]] && echo true |
102 |
## stdout: true |
103 |
|
104 |
#### Malformed regex |
105 |
# Are they trying to PARSE the regex? Do they feed the buffer directly to |
106 |
# regcomp()? |
107 |
[[ 'a b' =~ ^)a\ b($ ]] && echo true |
108 |
## status: 2 |
109 |
## OK zsh status: 1 |
110 |
|
111 |
#### Regex with char class containing space |
112 |
# For some reason it doesn't work without parens? |
113 |
[[ 'ba ba ' =~ ([a b]+) ]] && echo true |
114 |
## stdout: true |
115 |
|
116 |
#### Operators and space lose meaning inside () |
117 |
[[ '< >' =~ (< >) ]] && echo true |
118 |
## stdout: true |
119 |
## N-I zsh stdout-json: "" |
120 |
## N-I zsh status: 1 |
121 |
|
122 |
#### Regex with | |
123 |
[[ 'bar' =~ foo|bar ]] && echo true |
124 |
## stdout: true |
125 |
## N-I zsh stdout-json: "" |
126 |
## N-I zsh status: 1 |
127 |
|
128 |
#### Unquoted { is parse error in bash/zsh |
129 |
[[ { =~ { ]] && echo true |
130 |
echo status=$? |
131 |
## STDOUT: |
132 |
status=2 |
133 |
## END |
134 |
## N-I zsh STDOUT: |
135 |
status=1 |
136 |
## END |
137 |
|
138 |
#### Quoted { |
139 |
[[ { =~ "{" ]] && echo true |
140 |
echo status=$? |
141 |
## STDOUT: |
142 |
true |
143 |
status=0 |
144 |
## END |
145 |
## N-I zsh STDOUT: |
146 |
status=1 |
147 |
## END |
148 |
|
149 |
#### Escaped { |
150 |
# from bash-completion |
151 |
[[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}" |
152 |
## STDOUT: |
153 |
['$PA', '$', 'PA'] |
154 |
## END |
155 |
## BUG zsh stdout-json: "" |
156 |
## BUG zsh status: 1 |
157 |
|
158 |
#### Escaped { stored in variable first |
159 |
# from bash-completion |
160 |
pat='^(\$\{?)([A-Za-z0-9_]*)$' |
161 |
[[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}" |
162 |
## STDOUT: |
163 |
['$PA', '$', 'PA'] |
164 |
## END |
165 |
## BUG zsh STDOUT: |
166 |
[''] |
167 |
## END |
168 |
|
169 |
#### regex with ? |
170 |
[[ 'c' =~ c? ]] && echo true |
171 |
[[ '' =~ c? ]] && echo true |
172 |
## STDOUT: |
173 |
true |
174 |
true |
175 |
## END |