1 |
# Test out Oil's regular expression syntax. |
2 |
|
3 |
#### /^.$/ |
4 |
shopt -s oil:all |
5 |
var pat = '' |
6 |
|
7 |
setvar pat = /^.$/ |
8 |
echo pat=$pat |
9 |
|
10 |
setvar pat = /%start dot %end/ |
11 |
echo pat=$pat |
12 |
|
13 |
if ('' ~ pat) { # ERE syntax |
14 |
echo yes |
15 |
} else { |
16 |
echo no |
17 |
} |
18 |
# $pat is same as pat |
19 |
if ('f' ~ pat) { # ERE syntax |
20 |
echo yes |
21 |
} else { |
22 |
echo no |
23 |
} |
24 |
|
25 |
## STDOUT: |
26 |
pat=^.$ |
27 |
pat=^.$ |
28 |
no |
29 |
yes |
30 |
## END |
31 |
|
32 |
|
33 |
#### /.+/ |
34 |
shopt -s oil:all |
35 |
|
36 |
var pat = /.+/ |
37 |
echo $pat |
38 |
|
39 |
var s = 'foo' |
40 |
if (s ~ pat) { # ERE syntax |
41 |
echo yes |
42 |
} |
43 |
var empty = '' |
44 |
if (empty ~ pat) { echo yes } else { echo no } |
45 |
## STDOUT: |
46 |
.+ |
47 |
yes |
48 |
no |
49 |
## END |
50 |
|
51 |
#### Positional captures with _match |
52 |
shopt -s oil:all |
53 |
|
54 |
var x = 'zz 2020-08-20' |
55 |
|
56 |
if [[ $x =~ ([[:digit:]]+)-([[:digit:]]+) ]] { |
57 |
argv.py "${BASH_REMATCH[@]}" |
58 |
} |
59 |
|
60 |
setvar BASH_REMATCH = %(reset) |
61 |
|
62 |
if (x ~ /<d+> '-' <d+>/) { |
63 |
argv.py "${BASH_REMATCH[@]}" |
64 |
argv.py $_match(0) $_match(1) $_match(2) |
65 |
|
66 |
argv.py $_match() # synonym for _match(0) |
67 |
|
68 |
# TODO: Also test _start() and _end() |
69 |
} |
70 |
## STDOUT: |
71 |
['2020-08', '2020', '08'] |
72 |
['reset'] |
73 |
['2020-08', '2020', '08'] |
74 |
['2020-08'] |
75 |
## END |
76 |
|
77 |
#### Named captures with _match |
78 |
shopt -s oil:all |
79 |
|
80 |
var x = 'zz 2020-08-20' |
81 |
|
82 |
if (x ~ /<d+ : year> '-' <d+ : month>/) { |
83 |
argv.py $_match('year') $_match('month') |
84 |
} |
85 |
## STDOUT: |
86 |
['2020', '08'] |
87 |
## END |
88 |
|
89 |
#### s ~ regex and s !~ regex |
90 |
shopt -s oil:basic |
91 |
|
92 |
var s = 'foo' |
93 |
if (s ~ '.([[:alpha:]]+)') { # ERE syntax |
94 |
echo matches |
95 |
argv.py $_match(0) $_match(1) |
96 |
} |
97 |
if (s !~ '[[:digit:]]+') { |
98 |
echo "does not match" |
99 |
argv.py $_match(0) $_match(1) |
100 |
} |
101 |
|
102 |
if (s ~ '[[:digit:]]+') { |
103 |
echo "matches" |
104 |
} |
105 |
# Should be cleared now |
106 |
# should this be Undef rather than ''? |
107 |
var x = _match(0) |
108 |
var y = _match(1) |
109 |
if (x == null and y == null) { |
110 |
echo 'cleared' |
111 |
} |
112 |
|
113 |
## STDOUT: |
114 |
matches |
115 |
['foo', 'oo'] |
116 |
does not match |
117 |
['foo', 'oo'] |
118 |
cleared |
119 |
## END |
120 |
|
121 |
#### _start() and _end() |
122 |
shopt -s oil:basic |
123 |
|
124 |
var s = 'foo123bar' |
125 |
if (s ~ /digit+/) { |
126 |
echo start=$_start() end=$_end() |
127 |
} |
128 |
|
129 |
if (s ~ / word+ <digit+> /) { |
130 |
echo start=$_start(1) end=$_end(1) |
131 |
} |
132 |
## STDOUT: |
133 |
start=3 end=6 |
134 |
start=3 end=6 |
135 |
## END |
136 |
|
137 |
#### Repeat {1,3} etc. |
138 |
var pat = null |
139 |
|
140 |
setvar pat = /d{2}/ |
141 |
echo $pat |
142 |
setvar pat = /d{1,3}/ |
143 |
echo $pat |
144 |
setvar pat = /d{1,}/ |
145 |
echo $pat |
146 |
setvar pat = /d{,3}/ |
147 |
echo $pat |
148 |
|
149 |
|
150 |
## STDOUT: |
151 |
[[:digit:]]{2} |
152 |
[[:digit:]]{1,3} |
153 |
[[:digit:]]{1,} |
154 |
[[:digit:]]{,3} |
155 |
## END |
156 |
|
157 |
|
158 |
#### d+ digit+ !d+ !digit+ |
159 |
shopt -s oil:all |
160 |
|
161 |
var pat = '' |
162 |
|
163 |
setvar pat = /d+/ |
164 |
echo $pat |
165 |
if ('42' ~ pat) { echo yes } |
166 |
|
167 |
var empty = '' |
168 |
if (empty ~ pat) { echo yes } else { echo no } |
169 |
|
170 |
setvar pat = /digit+/ |
171 |
echo $pat |
172 |
setvar pat = /!d+/ |
173 |
echo $pat |
174 |
setvar pat = /!digit+/ |
175 |
echo $pat |
176 |
|
177 |
|
178 |
## STDOUT: |
179 |
[[:digit:]]+ |
180 |
yes |
181 |
no |
182 |
[[:digit:]]+ |
183 |
[^[:digit:]]+ |
184 |
[^[:digit:]]+ |
185 |
## END |
186 |
|
187 |
#### Alternation and sequence |
188 |
var pat = '' |
189 |
setvar pat = /s d+ | w*/ |
190 |
echo $pat |
191 |
setvar pat = /s d+ or w*/ |
192 |
echo $pat |
193 |
## STDOUT: |
194 |
[[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]* |
195 |
[[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]* |
196 |
## END |
197 |
|
198 |
#### Char Class Ranges |
199 |
shopt -s oil:all |
200 |
|
201 |
var pat = '' |
202 |
setvar pat = /[0-9 a-f]+/ |
203 |
echo $pat |
204 |
# This is equivalent |
205 |
setvar pat = /['0' - '9' 'a' - 'f']+/ |
206 |
echo $pat |
207 |
|
208 |
if ('0123' ~ pat) { echo yes } else { echo no } |
209 |
if ('zzz' ~ pat) { echo yes } else { echo no } |
210 |
if ('' ~ pat) { echo yes } else { echo no } |
211 |
## STDOUT: |
212 |
[0-9a-f]+ |
213 |
[0-9a-f]+ |
214 |
yes |
215 |
no |
216 |
no |
217 |
## END |
218 |
|
219 |
#### Char Class Set |
220 |
shopt -s oil:all |
221 |
var pat = '' |
222 |
|
223 |
# This is NOT allowed |
224 |
# setvar pat = /[a b c]+/ |
225 |
|
226 |
setvar pat = /['abc']+/ |
227 |
echo $pat |
228 |
|
229 |
if ('cbcb' ~ pat) { echo yes } else { echo no } |
230 |
if ('0123' ~ pat) { echo yes } else { echo no } |
231 |
if ('' ~ pat) { echo yes } else { echo no } |
232 |
## STDOUT: |
233 |
[abc]+ |
234 |
yes |
235 |
no |
236 |
no |
237 |
## END |
238 |
|
239 |
#### Range with escaped characters |
240 |
shopt -s oil:all |
241 |
|
242 |
var pat = null |
243 |
|
244 |
setvar pat = / [ \x00 - \x0f ] / |
245 |
echo $pat | od -A n -t x1 |
246 |
|
247 |
## STDOUT: |
248 |
5b 00 2d 0f 5d 0a |
249 |
## END |
250 |
|
251 |
|
252 |
#### Group () |
253 |
shopt -s oil:all |
254 |
var pat = '' |
255 |
|
256 |
setvar pat = /(%start s or d d)/ |
257 |
echo $pat |
258 |
|
259 |
if (' foo' ~ pat) { echo yes } else { echo no } |
260 |
if ('-00-' ~ pat) { echo yes } else { echo no } |
261 |
if ('foo' ~ pat) { echo yes } else { echo no } |
262 |
|
263 |
## STDOUT: |
264 |
(^[[:space:]]|[[:digit:]][[:digit:]]) |
265 |
yes |
266 |
yes |
267 |
no |
268 |
## END |
269 |
|
270 |
#### Capture is acceptable as a group |
271 |
shopt -s oil:all |
272 |
var pat = /<%start s | d d>/ |
273 |
echo $pat |
274 |
## STDOUT: |
275 |
(^[[:space:]]|[[:digit:]][[:digit:]]) |
276 |
## END |
277 |
|
278 |
#### Named Capture Decays Without Name |
279 |
shopt -s oil:all |
280 |
var pat = /<d+ : month>/ |
281 |
echo $pat |
282 |
|
283 |
if ('123' ~ pat) { |
284 |
echo yes |
285 |
} |
286 |
|
287 |
## STDOUT: |
288 |
([[:digit:]]+) |
289 |
yes |
290 |
## END |
291 |
|
292 |
#### Named Capture With ~ Assigns Variable |
293 |
shopt -s oil:all |
294 |
var pat = /<d+ : month>/ |
295 |
echo $pat |
296 |
|
297 |
if ('123' ~ pat) { |
298 |
echo yes |
299 |
= month |
300 |
} |
301 |
## STDOUT: |
302 |
([[:digit:]]+) |
303 |
yes |
304 |
TODO MONTH |
305 |
## END |
306 |
|
307 |
#### literal '' |
308 |
shopt -s oil:all |
309 |
var pat = '' |
310 |
|
311 |
setvar pat = /'abc' 'def'/ |
312 |
echo $pat |
313 |
|
314 |
#setvar pat = /'abc' '^ + * ?'/ |
315 |
#echo $pat |
316 |
|
317 |
if ('abcde' ~ pat) { echo yes } else { echo no } |
318 |
if ('abcdef' ~ pat) { echo yes } else { echo no } |
319 |
|
320 |
## STDOUT: |
321 |
abcdef |
322 |
no |
323 |
yes |
324 |
## END |
325 |
|
326 |
#### double quoted, $x, and ${x} |
327 |
shopt -s oil:all |
328 |
var pat = '' |
329 |
|
330 |
var x = 'x' |
331 |
var y = 'y' |
332 |
setvar pat = / $x ${x} "abc" "$x${y}"/ |
333 |
echo $pat |
334 |
|
335 |
if ('xxabcx' ~ pat) { echo yes } else { echo no } |
336 |
if ('xxabcxyf' ~ pat) { echo yes } else { echo no } |
337 |
|
338 |
## STDOUT: |
339 |
xxabcxy |
340 |
no |
341 |
yes |
342 |
## END |
343 |
|
344 |
#### @splice |
345 |
shopt -s oil:all |
346 |
var d = /d+/; |
347 |
var ip = / @d '.' @d '.' @d '.' @d / |
348 |
echo $ip |
349 |
if ('0.0.0.0' ~ ip) { echo yes } else { echo no } |
350 |
if ('0.0.0' ~ ip) { echo yes } else { echo no } |
351 |
## STDOUT: |
352 |
[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+ |
353 |
yes |
354 |
no |
355 |
## END |
356 |
|
357 |
#### splice with capital letters |
358 |
shopt -s oil:all |
359 |
var D = /d+/; |
360 |
var ip = / D '.' D '.' D '.' D / |
361 |
echo $ip |
362 |
if ('0.0.0.0' ~ ip) { echo yes } else { echo no } |
363 |
if ('0.0.0' ~ ip) { echo yes } else { echo no } |
364 |
## STDOUT: |
365 |
[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+ |
366 |
yes |
367 |
no |
368 |
## END |
369 |
|
370 |
#### Matching escaped tab character |
371 |
shopt -s oil:all |
372 |
|
373 |
# BUG: need C strings in array literal |
374 |
var lines=%($'aa\tbb' $'cc\tdd') |
375 |
|
376 |
var pat = / ('a' [\t] 'b') / |
377 |
write pat=$pat |
378 |
write @lines | egrep $pat |
379 |
|
380 |
## stdout-json: "pat=(a[\t]b)\naa\tbb\n" |
381 |
|
382 |
#### Match non-ASCII byte denoted using c'\xff' |
383 |
shopt -s oil:all |
384 |
var pat = /[ c'\xff' ]/; |
385 |
|
386 |
echo $pat | od -A n -t x1 |
387 |
if (c'\xff' ~ pat) { echo yes } else { echo no } |
388 |
if (c'\xfe' ~ pat) { echo yes } else { echo no } |
389 |
|
390 |
## STDOUT: |
391 |
5b ff 5d 0a |
392 |
yes |
393 |
no |
394 |
## END |
395 |
|
396 |
#### Match non-ASCII byte denoted using \xff |
397 |
shopt -s oil:all |
398 |
var pat = /[ \xff ]/; |
399 |
|
400 |
echo $pat | od -A n -t x1 |
401 |
if (c'\xff' ~ pat) { echo yes } else { echo no } |
402 |
if (c'\xfe' ~ pat) { echo yes } else { echo no } |
403 |
|
404 |
## STDOUT: |
405 |
5b ff 5d 0a |
406 |
yes |
407 |
no |
408 |
## END |
409 |
|
410 |
#### ERE can express Unicode escapes that are in the ASCII range |
411 |
shopt -s oil:all |
412 |
var pat = /[ \u{7f} ]/; |
413 |
|
414 |
echo $pat | od -A n -t x1 |
415 |
if (c'\x7f' ~ pat) { echo yes } else { echo no } |
416 |
if (c'\x7e' ~ pat) { echo yes } else { echo no } |
417 |
|
418 |
var pat2 = /[ \u{7f} ]/; |
419 |
var pat3 = /[ \u{0007f} ]/; |
420 |
test "$pat2" = "$pat3" && echo 'equal' |
421 |
|
422 |
## STDOUT: |
423 |
5b 7f 5d 0a |
424 |
yes |
425 |
no |
426 |
equal |
427 |
## END |
428 |
|
429 |
#### ERE can't express higher Unicode escapes |
430 |
shopt -s oil:all |
431 |
var pat = /[ \u{ff} ]/; |
432 |
|
433 |
echo $pat | od -A n -t x1 |
434 |
if (c'\x7f' ~ pat) { echo yes } else { echo no } |
435 |
if (c'\x7e' ~ pat) { echo yes } else { echo no } |
436 |
|
437 |
## status: 1 |
438 |
## stdout-json: "" |
439 |
|
440 |
#### non-ASCII bytes must be singleton terms, e.g. '\x7f\xff' is disallowed |
441 |
var bytes = c'\x7f\xff' |
442 |
var pat = / [ $bytes ] / |
443 |
echo $pat |
444 |
## status: 1 |
445 |
## stdout-json: "" |
446 |
|
447 |
#### Matching escaped tab character |
448 |
shopt -s oil:all |
449 |
|
450 |
# BUG: need C strings in array literal |
451 |
var lines=%($'aa\tbb' $'cc\tdd') |
452 |
|
453 |
var pat = / ('a' [\t] 'b') / |
454 |
write pat=$pat |
455 |
write @lines | egrep $pat |
456 |
|
457 |
## stdout-json: "pat=(a[\t]b)\naa\tbb\n" |
458 |
|
459 |
#### Matching ] and \ and ' and " in character classes |
460 |
shopt -s oil:all |
461 |
|
462 |
# BUG: need C strings in array literal |
463 |
var lines=%( |
464 |
'backslash \' |
465 |
'rbracket ]' |
466 |
'lbracket [' |
467 |
"sq '" |
468 |
'dq "' |
469 |
) |
470 |
|
471 |
# Weird GNU quirk: ] has to come first! |
472 |
# []abc] works. But [abc\]] does NOT work. Stupid rule! |
473 |
|
474 |
var pat = / [ ']' \\ \' \" ] / |
475 |
write pat=$pat |
476 |
write @lines | egrep $pat |
477 |
|
478 |
## STDOUT: |
479 |
pat=[]\\'"] |
480 |
backslash \ |
481 |
rbracket ] |
482 |
sq ' |
483 |
dq " |
484 |
## END |
485 |
|
486 |
#### Matching literal hyphen in character classes |
487 |
shopt -s oil:all |
488 |
|
489 |
var literal = '-' |
490 |
var pat = / [ 'a' $literal 'b' ${literal} "-" ] / |
491 |
write pat=$pat |
492 |
write 'c-d' 'ab' 'cd' | grep $pat |
493 |
## STDOUT: |
494 |
pat=[a\-b\-\-] |
495 |
c-d |
496 |
ab |
497 |
## END |
498 |
|
499 |
#### Repeated String Literal With Single Char |
500 |
shopt -s oil:all |
501 |
|
502 |
var literal = 'f' |
503 |
var pat = null |
504 |
|
505 |
setvar pat = / %start $literal+ %end / |
506 |
echo $pat |
507 |
setvar pat = / %start ($literal)+ %end / |
508 |
echo $pat |
509 |
|
510 |
if ('fff' ~ pat) { echo yes } |
511 |
if ('foo' !~ pat) { echo no } |
512 |
|
513 |
## STDOUT: |
514 |
^f+$ |
515 |
^(f)+$ |
516 |
yes |
517 |
no |
518 |
## END |
519 |
|
520 |
#### Error when unparenthesized string of more than one character is repeated |
521 |
shopt -s oil:all |
522 |
|
523 |
var literal = 'foo' |
524 |
var pat = null |
525 |
|
526 |
setvar pat = / %start $literal+ %end / |
527 |
echo $pat |
528 |
setvar pat = / %start ($literal)+ %end / |
529 |
echo $pat |
530 |
|
531 |
if ('foofoo' ~ pat) { echo yes } |
532 |
if ('foof' !~ pat) { echo no } |
533 |
|
534 |
## status: 1 |
535 |
## stdout-json: "" |
536 |
|
537 |
#### Instead of c'foo\\bar' use 'foo' \\ 'bar' |
538 |
shopt -s oil:all |
539 |
var pat = /'foo' \\ 'bar'/ |
540 |
echo $pat |
541 |
|
542 |
if (r'foo\bar' ~ pat) { echo yes } |
543 |
if (r'foo.bar' !~ pat) { echo no } |
544 |
## STDOUT: |
545 |
foo\\bar |
546 |
yes |
547 |
no |
548 |
## END |
549 |
|
550 |
#### Negation of Character Class ![a-z] |
551 |
shopt -s oil:all |
552 |
|
553 |
var pat = / ![ a-z ] / |
554 |
echo $pat |
555 |
|
556 |
if ('0' ~ pat) { echo yes } |
557 |
if ('a' !~ pat) { echo no } |
558 |
|
559 |
## STDOUT: |
560 |
[^a-z] |
561 |
yes |
562 |
no |
563 |
## END |
564 |
|
565 |
#### Posix and Perl class in class literals |
566 |
shopt -s oil:all |
567 |
|
568 |
var pat = null |
569 |
|
570 |
setvar pat = / [ space 'z' ] / |
571 |
echo $pat |
572 |
#setvar pat = / [ ~space 'z' ] / |
573 |
#echo $pat |
574 |
|
575 |
# PROBLEM: can't negate individual POSIX classes. They would have to be a Perl |
576 |
# class to be \D or \S. |
577 |
# [[:space:]z] negates the whole thing! |
578 |
# [^[:space:]] |
579 |
|
580 |
setvar pat = / [ digit 'z' ] / |
581 |
echo $pat |
582 |
#setvar pat = / [ ~digit 'z' ] / |
583 |
#echo $pat |
584 |
|
585 |
## STDOUT: |
586 |
[[:space:]z] |
587 |
[[:digit:]z] |
588 |
## END |
589 |
|
590 |
#### [!d] can't be negated because it's a literal character |
591 |
setvar pat = / [ !d 'z' ] / |
592 |
echo $pat |
593 |
## status: 2 |
594 |
## stdout-json: "" |
595 |
|
596 |
#### [!digit] can't be negated in POSIX ERE (but yes in Perl) |
597 |
var pat = null |
598 |
setvar pat = / [ !digit 'z' ] / |
599 |
echo $pat |
600 |
## status: 1 |
601 |
## stdout-json: "" |
602 |
|
603 |
#### Long Python Example |
604 |
|
605 |
# https://docs.python.org/3/reference/lexical_analysis.html#integer-literals |
606 |
|
607 |
# integer ::= decinteger | bininteger | octinteger | hexinteger |
608 |
# decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")* |
609 |
# bininteger ::= "0" ("b" | "B") (["_"] bindigit)+ |
610 |
# octinteger ::= "0" ("o" | "O") (["_"] octdigit)+ |
611 |
# hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+ |
612 |
# nonzerodigit ::= "1"..."9" |
613 |
# digit ::= "0"..."9" |
614 |
# bindigit ::= "0" | "1" |
615 |
# octdigit ::= "0"..."7" |
616 |
# hexdigit ::= digit | "a"..."f" | "A"..."F" |
617 |
|
618 |
shopt -s oil:all |
619 |
|
620 |
DecDigit = / [0-9] / |
621 |
BinDigit = / [0-1] / |
622 |
OctDigit = / [0-7] / |
623 |
HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class |
624 |
|
625 |
DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* / |
626 |
BinInt = / '0' [b B] ('_'? BinDigit)+ / |
627 |
OctInt = / '0' [o O] ('_'? OctDigit)+ / |
628 |
HexInt = / '0' [x X] ('_'? HexDigit)+ / |
629 |
|
630 |
Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end / |
631 |
|
632 |
#echo $Integer |
633 |
|
634 |
if ( '123' ~ Integer) { echo 'Y' } |
635 |
if ( 'zzz' !~ Integer) { echo 'N' } |
636 |
|
637 |
if ('123_000' ~ Integer) { echo 'Y decimal' } |
638 |
if ('000_123' !~ Integer) { echo 'N decimal' } |
639 |
|
640 |
if ( '0b100' ~ Integer) { echo 'Y binary' } |
641 |
if ( '0b102' !~ Integer) { echo 'N binary' } |
642 |
|
643 |
if ( '0o755' ~ Integer) { echo 'Y octal' } |
644 |
if ( '0o778' !~ Integer) { echo 'N octal' } |
645 |
|
646 |
if ( '0xFF' ~ Integer) { echo 'Y hex' } |
647 |
if ( '0xFG' !~ Integer) { echo 'N hex' } |
648 |
|
649 |
## STDOUT: |
650 |
Y |
651 |
N |
652 |
Y decimal |
653 |
N decimal |
654 |
Y binary |
655 |
N binary |
656 |
Y octal |
657 |
N octal |
658 |
Y hex |
659 |
N hex |
660 |
## END |
661 |
|
662 |
#### Invalid sh operation on eggex |
663 |
var pat = / d+ / |
664 |
#pat[invalid]=1 |
665 |
pat[invalid]+=1 |
666 |
## status: 1 |
667 |
## stdout-json: "" |
668 |
|