1 #!/usr/bin/env bash
2 #
3 # Only bash and zsh seem to implement [[ foo =~ '' ]]
4 #
5 # ^(a b)$ is a regex that should match 'a b' in a group.
6 #
7 # Not sure what bash is doing here... I think I have to just be empirical.
8 # Might need "compat" switch for parsing the regex. It should be an opaque
9 # string like zsh, not sure why it isn't.
10 #
11 # I think this is just papering over bugs...
12 # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
13 #
14 # Storing the regular expression in a shell variable is often a useful way to
15 # avoid problems with quoting characters that are special to the shell. It is
16 # sometimes difficult to specify a regular expression literally without using
17 # quotes, or to keep track of the quoting used by regular expressions while
18 # paying attention to the shell’s quote removal. Using a shell variable to
19 # store the pattern decreases these problems. For example, the following is
20 # equivalent to the above:
21 #
22 # pattern='[[:space:]]*(a)?b'
23 # [[ $line =~ $pattern ]]
24 #
25 # If you want to match a character that’s special to the regular expression
26 # grammar, it has to be quoted to remove its special meaning. This means that in
27 # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
28 # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
29 # literal ‘.’. Shell programmers should take special care with backslashes, since
30 # backslashes are used both by the shell and regular expressions to remove the
31 # special meaning from the following character. The following two sets of
32 # commands are not equivalent:
33 #
34 # From bash code: ( | ) are treated special. Normally they must be quoted, but
35 # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
36
37 #### BASH_REMATCH
38 [[ foo123 =~ ([a-z]+)([0-9]+) ]]
39 argv.py "${BASH_REMATCH[@]}"
40 ## STDOUT:
41 ['foo123', 'foo', '123']
42 ## END
43 ## N-I zsh STDOUT:
44 ['']
45 ## END
46
47 #### Match is unanchored at both ends
48 [[ 'bar' =~ a ]] && echo true
49 ## stdout: true
50
51 #### Failed match
52 [[ 'bar' =~ X ]] && echo true
53 ## status: 1
54 ## stdout-json: ""
55
56 #### Regex quoted with \ -- preferred in bash
57 [[ 'a b' =~ ^(a\ b)$ ]] && echo true
58 ## stdout: true
59
60 #### Regex quoted with single quotes
61 # bash doesn't like the quotes
62 [[ 'a b' =~ '^(a b)$' ]] && echo true
63 ## stdout: true
64 ## status: 0
65 ## OK bash stdout-json: ""
66 ## OK bash status: 1
67
68 #### Regex quoted with double quotes
69 # bash doesn't like the quotes
70 [[ 'a b' =~ "^(a b)$" ]] && echo true
71 ## stdout: true
72 ## status: 0
73 ## OK bash stdout-json: ""
74 ## OK bash status: 1
75
76 #### Fix single quotes by storing in variable
77 pat='^(a b)$'
78 [[ 'a b' =~ $pat ]] && echo true
79 ## stdout: true
80
81 #### Fix single quotes by storing in variable
82 pat="^(a b)$"
83 [[ 'a b' =~ $pat ]] && echo true
84 ## stdout: true
85
86 #### Double quoting pat variable -- again bash doesn't like it.
87 pat="^(a b)$"
88 [[ 'a b' =~ "$pat" ]] && echo true
89 ## stdout: true
90 ## status: 0
91 ## OK bash stdout-json: ""
92 ## OK bash status: 1
93
94 #### Regex with == and not =~ is parse error, different lexer mode required
95 # They both give a syntax error. This is lame.
96 [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
97 ## status: 2
98 ## OK zsh status: 1
99
100 #### Omitting ( )
101 [[ '^a b$' == ^a\ b$ ]] && echo true
102 ## stdout: true
103
104 #### Malformed regex
105 # Are they trying to PARSE the regex? Do they feed the buffer directly to
106 # regcomp()?
107 [[ 'a b' =~ ^)a\ b($ ]] && echo true
108 ## status: 2
109 ## OK zsh status: 1
110
111 #### Regex with char class
112 # For some reason it doesn't work without parens?
113 [[ 'ba ba ' =~ ([a b]+) ]] && echo true
114 ## stdout: true
115
116 #### Operators lose meaning in () in regex state (BASH_REGEX_CAHRS)
117 [[ '< >' =~ (< >) ]] && echo true
118 ## stdout: true
119 ## N-I zsh stdout-json: ""
120 ## N-I zsh status: 1
121
122 #### Regex with |
123 [[ 'bar' =~ foo|bar ]] && echo true
124 ## stdout: true
125 ## N-I zsh stdout-json: ""
126 ## N-I zsh status: 1
127
128 #### Double quoted regex gets regex-escaped
129 [[ { =~ "{" ]] && echo true
130 ## stdout: true
131 ## N-I zsh status: 1
132 ## N-I zsh stdout-json: ""