| 1 |
#!/bin/bash |
| 2 |
# |
| 3 |
# Only bash and zsh seem to implement [[ foo =~ '' ]] |
| 4 |
# |
| 5 |
# ^(a b)$ is a regex that should match 'a b' in a group. |
| 6 |
# |
| 7 |
# Not sure what bash is doing here... I think I have to just be empirical. |
| 8 |
# Might need "compat" switch for parsing the regex. It should be an opaque |
| 9 |
# string like zsh, not sure why it isn't. |
| 10 |
# |
| 11 |
# I think this is just papering over bugs... |
| 12 |
# https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs |
| 13 |
# |
| 14 |
# Storing the regular expression in a shell variable is often a useful way to |
| 15 |
# avoid problems with quoting characters that are special to the shell. It is |
| 16 |
# sometimes difficult to specify a regular expression literally without using |
| 17 |
# quotes, or to keep track of the quoting used by regular expressions while |
| 18 |
# paying attention to the shell’s quote removal. Using a shell variable to |
| 19 |
# store the pattern decreases these problems. For example, the following is |
| 20 |
# equivalent to the above: |
| 21 |
# |
| 22 |
# pattern='[[:space:]]*(a)?b' |
| 23 |
# [[ $line =~ $pattern ]] |
| 24 |
# |
| 25 |
# If you want to match a character that’s special to the regular expression |
| 26 |
# grammar, it has to be quoted to remove its special meaning. This means that in |
| 27 |
# the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual |
| 28 |
# regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a |
| 29 |
# literal ‘.’. Shell programmers should take special care with backslashes, since |
| 30 |
# backslashes are used both by the shell and regular expressions to remove the |
| 31 |
# special meaning from the following character. The following two sets of |
| 32 |
# commands are not equivalent: |
| 33 |
# |
| 34 |
# From bash code: ( | ) are treated special. Normally they must be quoted, but |
| 35 |
# they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted! |
| 36 |
|
| 37 |
### Match is unanchored at both ends |
| 38 |
[[ 'bar' =~ a ]] && echo true |
| 39 |
# stdout: true |
| 40 |
|
| 41 |
### Failed match |
| 42 |
[[ 'bar' =~ X ]] && echo true |
| 43 |
# stdout-json: "" |
| 44 |
|
| 45 |
### Regex quoted with \ -- preferred in bash |
| 46 |
[[ 'a b' =~ ^(a\ b)$ ]] && echo true |
| 47 |
# stdout: true |
| 48 |
|
| 49 |
### Regex quoted with single quotes |
| 50 |
# bash doesn't like the quotes |
| 51 |
[[ 'a b' =~ '^(a b)$' ]] && echo true |
| 52 |
# stdout: true |
| 53 |
# OK bash stdout-json: "" |
| 54 |
|
| 55 |
### Regex quoted with double quotes |
| 56 |
# bash doesn't like the quotes |
| 57 |
[[ 'a b' =~ "^(a b)$" ]] && echo true |
| 58 |
# stdout: true |
| 59 |
# OK bash stdout-json: "" |
| 60 |
|
| 61 |
### Fix single quotes by storing in variable |
| 62 |
pat='^(a b)$' |
| 63 |
[[ 'a b' =~ $pat ]] && echo true |
| 64 |
# stdout: true |
| 65 |
|
| 66 |
### Fix single quotes by storing in variable |
| 67 |
pat="^(a b)$" |
| 68 |
[[ 'a b' =~ $pat ]] && echo true |
| 69 |
# stdout: true |
| 70 |
|
| 71 |
### Double quoting pat variable -- again bash doesn't like it. |
| 72 |
pat="^(a b)$" |
| 73 |
[[ 'a b' =~ "$pat" ]] && echo true |
| 74 |
# stdout: true |
| 75 |
# OK bash stdout-json: "" |
| 76 |
|
| 77 |
### Regex with == and not =~ is parse error, different lexer mode required |
| 78 |
# They both give a syntax error. This is lame. |
| 79 |
[[ '^(a b)$' == ^(a\ b)$ ]] && echo true |
| 80 |
# status: 2 |
| 81 |
# OK zsh status: 1 |
| 82 |
|
| 83 |
### Omitting ( ) |
| 84 |
[[ '^a b$' == ^a\ b$ ]] && echo true |
| 85 |
# stdout: true |
| 86 |
|
| 87 |
### Malformed regex |
| 88 |
# Are they trying to PARSE the regex? Do they feed the buffer directly to |
| 89 |
# regcomp()? |
| 90 |
[[ 'a b' =~ ^)a\ b($ ]] && echo true |
| 91 |
# status: 2 |
| 92 |
# OK zsh status: 1 |
| 93 |
|
| 94 |
### Regex with char class |
| 95 |
# For some reason it doesn't work without parens? |
| 96 |
[[ 'ba ba ' =~ ([a b]+) ]] && echo true |
| 97 |
# stdout: true |
| 98 |
|
| 99 |
### Operators lose meaning in () in regex state (BASH_REGEX_CAHRS) |
| 100 |
[[ 'gt;' =~ (gt;) ]] && echo true |
| 101 |
# stdout: true |
| 102 |
# N-I zsh stdout-json: "" |
| 103 |
# N-I zsh status: 1 |
| 104 |
|
| 105 |
### Regex with | |
| 106 |
[[ 'bar' =~ foo|bar ]] && echo true |
| 107 |
# stdout: true |
| 108 |
# N-I zsh stdout-json: "" |
| 109 |
# N-I zsh status: 1 |