1 #!/bin/bash
2 #
3 # Only bash and zsh seem to implement [[ foo =~ '' ]]
4 #
5 # ^(a b)$ is a regex that should match 'a b' in a group.
6 #
7 # Not sure what bash is doing here... I think I have to just be empirical.
8 # Might need "compat" switch for parsing the regex. It should be an opaque
9 # string like zsh, not sure why it isn't.
10 #
11 # I think this is just papering over bugs...
12 # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
13 #
14 # Storing the regular expression in a shell variable is often a useful way to
15 # avoid problems with quoting characters that are special to the shell. It is
16 # sometimes difficult to specify a regular expression literally without using
17 # quotes, or to keep track of the quoting used by regular expressions while
18 # paying attention to the shell’s quote removal. Using a shell variable to
19 # store the pattern decreases these problems. For example, the following is
20 # equivalent to the above:
21 #
22 # pattern='[[:space:]]*(a)?b'
23 # [[ $line =~ $pattern ]]
24 #
25 # If you want to match a character that’s special to the regular expression
26 # grammar, it has to be quoted to remove its special meaning. This means that in
27 # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
28 # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
29 # literal ‘.’. Shell programmers should take special care with backslashes, since
30 # backslashes are used both by the shell and regular expressions to remove the
31 # special meaning from the following character. The following two sets of
32 # commands are not equivalent:
33 #
34 # From bash code: ( | ) are treated special. Normally they must be quoted, but
35 # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
36
37 ### Match is unanchored at both ends
38 [[ 'bar' =~ a ]] && echo true
39 # stdout: true
40
41 ### Failed match
42 [[ 'bar' =~ X ]] && echo true
43 # stdout-json: ""
44
45 ### Regex quoted with \ -- preferred in bash
46 [[ 'a b' =~ ^(a\ b)$ ]] && echo true
47 # stdout: true
48
49 ### Regex quoted with single quotes
50 # bash doesn't like the quotes
51 [[ 'a b' =~ '^(a b)$' ]] && echo true
52 # stdout: true
53 # OK bash stdout-json: ""
54
55 ### Regex quoted with double quotes
56 # bash doesn't like the quotes
57 [[ 'a b' =~ "^(a b)$" ]] && echo true
58 # stdout: true
59 # OK bash stdout-json: ""
60
61 ### Fix single quotes by storing in variable
62 pat='^(a b)$'
63 [[ 'a b' =~ $pat ]] && echo true
64 # stdout: true
65
66 ### Fix single quotes by storing in variable
67 pat="^(a b)$"
68 [[ 'a b' =~ $pat ]] && echo true
69 # stdout: true
70
71 ### Double quoting pat variable -- again bash doesn't like it.
72 pat="^(a b)$"
73 [[ 'a b' =~ "$pat" ]] && echo true
74 # stdout: true
75 # OK bash stdout-json: ""
76
77 ### Regex with == and not =~ is parse error, different lexer mode required
78 # They both give a syntax error. This is lame.
79 [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
80 # status: 2
81 # OK zsh status: 1
82
83 ### Omitting ( )
84 [[ '^a b$' == ^a\ b$ ]] && echo true
85 # stdout: true
86
87 ### Malformed regex
88 # Are they trying to PARSE the regex? Do they feed the buffer directly to
89 # regcomp()?
90 [[ 'a b' =~ ^)a\ b($ ]] && echo true
91 # status: 2
92 # OK zsh status: 1
93
94 ### Regex with char class
95 # For some reason it doesn't work without parens?
96 [[ 'ba ba ' =~ ([a b]+) ]] && echo true
97 # stdout: true
98
99 ### Operators lose meaning in () in regex state (BASH_REGEX_CAHRS)
100 [[ '< >' =~ (< >) ]] && echo true
101 # stdout: true
102 # N-I zsh stdout-json: ""
103 # N-I zsh status: 1
104
105 ### Regex with |
106 [[ 'bar' =~ foo|bar ]] && echo true
107 # stdout: true
108 # N-I zsh stdout-json: ""
109 # N-I zsh status: 1