1 # Test out Oil's regular expression syntax.
2
3 #### /^.$/
4 shopt -s oil:all
5 var pat = ''
6
7 setvar pat = /^.$/
8 echo pat=$pat
9
10 setvar pat = /%start dot %end/
11 echo pat=$pat
12
13 if ('' ~ pat) { # ERE syntax
14 echo yes
15 } else {
16 echo no
17 }
18 # $pat is same as pat
19 if ('f' ~ pat) { # ERE syntax
20 echo yes
21 } else {
22 echo no
23 }
24
25 ## STDOUT:
26 pat=^.$
27 pat=^.$
28 no
29 yes
30 ## END
31
32
33 #### /.+/
34 shopt -s oil:all
35
36 var pat = /.+/
37 echo $pat
38
39 var s = 'foo'
40 if (s ~ pat) { # ERE syntax
41 echo yes
42 }
43 var empty = ''
44 if (empty ~ pat) { echo yes } else { echo no }
45 ## STDOUT:
46 .+
47 yes
48 no
49 ## END
50
51 #### Positional captures with _match
52 shopt -s oil:all
53
54 var x = 'zz 2020-08-20'
55
56 if [[ $x =~ ([[:digit:]]+)-([[:digit:]]+) ]] {
57 argv.py "${BASH_REMATCH[@]}"
58 }
59
60 setvar BASH_REMATCH = %(reset)
61
62 if (x ~ /<d+> '-' <d+>/) {
63 argv.py "${BASH_REMATCH[@]}"
64 argv.py $_match(0) $_match(1) $_match(2)
65
66 argv.py $_match() # synonym for _match(0)
67
68 # TODO: Also test _start() and _end()
69 }
70 ## STDOUT:
71 ['2020-08', '2020', '08']
72 ['reset']
73 ['2020-08', '2020', '08']
74 ['2020-08']
75 ## END
76
77 #### Named captures with _match
78 shopt -s oil:all
79
80 var x = 'zz 2020-08-20'
81
82 if (x ~ /<d+ : year> '-' <d+ : month>/) {
83 argv.py $_match('year') $_match('month')
84 }
85 ## STDOUT:
86 ['2020', '08']
87 ## END
88
89 #### s ~ regex and s !~ regex
90 shopt -s oil:basic
91
92 var s = 'foo'
93 if (s ~ '.([[:alpha:]]+)') { # ERE syntax
94 echo matches
95 argv.py $_match(0) $_match(1)
96 }
97 if (s !~ '[[:digit:]]+') {
98 echo "does not match"
99 argv.py $_match(0) $_match(1)
100 }
101
102 if (s ~ '[[:digit:]]+') {
103 echo "matches"
104 }
105 # Should be cleared now
106 # should this be Undef rather than ''?
107 var x = _match(0)
108 var y = _match(1)
109 if (x == null and y == null) {
110 echo 'cleared'
111 }
112
113 ## STDOUT:
114 matches
115 ['foo', 'oo']
116 does not match
117 ['foo', 'oo']
118 cleared
119 ## END
120
121 #### _start() and _end()
122 shopt -s oil:basic
123
124 var s = 'foo123bar'
125 if (s ~ /digit+/) {
126 echo start=$_start() end=$_end()
127 }
128
129 if (s ~ / word+ <digit+> /) {
130 echo start=$_start(1) end=$_end(1)
131 }
132 ## STDOUT:
133 start=3 end=6
134 start=3 end=6
135 ## END
136
137 #### Repeat {1,3} etc.
138 var pat = null
139
140 setvar pat = /d{2}/
141 echo $pat
142 setvar pat = /d{1,3}/
143 echo $pat
144 setvar pat = /d{1,}/
145 echo $pat
146 setvar pat = /d{,3}/
147 echo $pat
148
149
150 ## STDOUT:
151 [[:digit:]]{2}
152 [[:digit:]]{1,3}
153 [[:digit:]]{1,}
154 [[:digit:]]{,3}
155 ## END
156
157
158 #### d+ digit+ !d+ !digit+
159 shopt -s oil:all
160
161 var pat = ''
162
163 setvar pat = /d+/
164 echo $pat
165 if ('42' ~ pat) { echo yes }
166
167 var empty = ''
168 if (empty ~ pat) { echo yes } else { echo no }
169
170 setvar pat = /digit+/
171 echo $pat
172 setvar pat = /!d+/
173 echo $pat
174 setvar pat = /!digit+/
175 echo $pat
176
177
178 ## STDOUT:
179 [[:digit:]]+
180 yes
181 no
182 [[:digit:]]+
183 [^[:digit:]]+
184 [^[:digit:]]+
185 ## END
186
187 #### Alternation and sequence
188 var pat = ''
189 setvar pat = /s d+ | w*/
190 echo $pat
191 setvar pat = /s d+ or w*/
192 echo $pat
193 ## STDOUT:
194 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
195 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
196 ## END
197
198 #### Char Class Ranges
199 shopt -s oil:all
200
201 var pat = ''
202 setvar pat = /[0-9 a-f]+/
203 echo $pat
204 # This is equivalent
205 setvar pat = /['0' - '9' 'a' - 'f']+/
206 echo $pat
207
208 if ('0123' ~ pat) { echo yes } else { echo no }
209 if ('zzz' ~ pat) { echo yes } else { echo no }
210 if ('' ~ pat) { echo yes } else { echo no }
211 ## STDOUT:
212 [0-9a-f]+
213 [0-9a-f]+
214 yes
215 no
216 no
217 ## END
218
219 #### Char Class Set
220 shopt -s oil:all
221 var pat = ''
222
223 # This is NOT allowed
224 # setvar pat = /[a b c]+/
225
226 setvar pat = /['abc']+/
227 echo $pat
228
229 if ('cbcb' ~ pat) { echo yes } else { echo no }
230 if ('0123' ~ pat) { echo yes } else { echo no }
231 if ('' ~ pat) { echo yes } else { echo no }
232 ## STDOUT:
233 [abc]+
234 yes
235 no
236 no
237 ## END
238
239 #### Range with escaped characters
240 shopt -s oil:all
241
242 var pat = null
243
244 setvar pat = / [ \x00 - \x0f ] /
245 echo $pat | od -A n -t x1
246
247 ## STDOUT:
248 5b 00 2d 0f 5d 0a
249 ## END
250
251
252 #### Group ()
253 shopt -s oil:all
254 var pat = ''
255
256 setvar pat = /(%start s or d d)/
257 echo $pat
258
259 if (' foo' ~ pat) { echo yes } else { echo no }
260 if ('-00-' ~ pat) { echo yes } else { echo no }
261 if ('foo' ~ pat) { echo yes } else { echo no }
262
263 ## STDOUT:
264 (^[[:space:]]|[[:digit:]][[:digit:]])
265 yes
266 yes
267 no
268 ## END
269
270 #### Capture is acceptable as a group
271 shopt -s oil:all
272 var pat = /<%start s | d d>/
273 echo $pat
274 ## STDOUT:
275 (^[[:space:]]|[[:digit:]][[:digit:]])
276 ## END
277
278 #### Named Capture Decays Without Name
279 shopt -s oil:all
280 var pat = /<d+ : month>/
281 echo $pat
282
283 if ('123' ~ pat) {
284 echo yes
285 }
286
287 ## STDOUT:
288 ([[:digit:]]+)
289 yes
290 ## END
291
292 #### Named Capture With ~ Assigns Variable
293 shopt -s oil:all
294 var pat = /<d+ : month>/
295 echo $pat
296
297 if ('123' ~ pat) {
298 echo yes
299 = month
300 }
301 ## STDOUT:
302 ([[:digit:]]+)
303 yes
304 TODO MONTH
305 ## END
306
307 #### literal ''
308 shopt -s oil:all
309 var pat = ''
310
311 setvar pat = /'abc' 'def'/
312 echo $pat
313
314 #setvar pat = /'abc' '^ + * ?'/
315 #echo $pat
316
317 if ('abcde' ~ pat) { echo yes } else { echo no }
318 if ('abcdef' ~ pat) { echo yes } else { echo no }
319
320 ## STDOUT:
321 abcdef
322 no
323 yes
324 ## END
325
326 #### double quoted, $x, and ${x}
327 shopt -s oil:all
328 var pat = ''
329
330 var x = 'x'
331 var y = 'y'
332 setvar pat = / $x ${x} "abc" "$x${y}"/
333 echo $pat
334
335 if ('xxabcx' ~ pat) { echo yes } else { echo no }
336 if ('xxabcxyf' ~ pat) { echo yes } else { echo no }
337
338 ## STDOUT:
339 xxabcxy
340 no
341 yes
342 ## END
343
344 #### @splice
345 shopt -s oil:all
346 var d = /d+/;
347 var ip = / @d '.' @d '.' @d '.' @d /
348 echo $ip
349 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
350 if ('0.0.0' ~ ip) { echo yes } else { echo no }
351 ## STDOUT:
352 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
353 yes
354 no
355 ## END
356
357 #### splice with capital letters
358 shopt -s oil:all
359 var D = /d+/;
360 var ip = / D '.' D '.' D '.' D /
361 echo $ip
362 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
363 if ('0.0.0' ~ ip) { echo yes } else { echo no }
364 ## STDOUT:
365 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
366 yes
367 no
368 ## END
369
370 #### Matching escaped tab character
371 shopt -s oil:all
372
373 # BUG: need C strings in array literal
374 var lines=%($'aa\tbb' $'cc\tdd')
375
376 var pat = / ('a' [\t] 'b') /
377 write pat=$pat
378 write @lines | egrep $pat
379
380 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
381
382 #### Match non-ASCII byte denoted using c'\xff'
383 shopt -s oil:all
384 var pat = /[ c'\xff' ]/;
385
386 echo $pat | od -A n -t x1
387 if (c'\xff' ~ pat) { echo yes } else { echo no }
388 if (c'\xfe' ~ pat) { echo yes } else { echo no }
389
390 ## STDOUT:
391 5b ff 5d 0a
392 yes
393 no
394 ## END
395
396 #### Match non-ASCII byte denoted using \xff
397 shopt -s oil:all
398 var pat = /[ \xff ]/;
399
400 echo $pat | od -A n -t x1
401 if (c'\xff' ~ pat) { echo yes } else { echo no }
402 if (c'\xfe' ~ pat) { echo yes } else { echo no }
403
404 ## STDOUT:
405 5b ff 5d 0a
406 yes
407 no
408 ## END
409
410 #### ERE can express Unicode escapes that are in the ASCII range
411 shopt -s oil:all
412 var pat = /[ \u{7f} ]/;
413
414 echo $pat | od -A n -t x1
415 if (c'\x7f' ~ pat) { echo yes } else { echo no }
416 if (c'\x7e' ~ pat) { echo yes } else { echo no }
417
418 var pat2 = /[ \u{7f} ]/;
419 var pat3 = /[ \u{0007f} ]/;
420 test "$pat2" = "$pat3" && echo 'equal'
421
422 ## STDOUT:
423 5b 7f 5d 0a
424 yes
425 no
426 equal
427 ## END
428
429 #### ERE can't express higher Unicode escapes
430 shopt -s oil:all
431 var pat = /[ \u{ff} ]/;
432
433 echo $pat | od -A n -t x1
434 if (c'\x7f' ~ pat) { echo yes } else { echo no }
435 if (c'\x7e' ~ pat) { echo yes } else { echo no }
436
437 ## status: 1
438 ## stdout-json: ""
439
440 #### non-ASCII bytes must be singleton terms, e.g. '\x7f\xff' is disallowed
441 var bytes = c'\x7f\xff'
442 var pat = / [ $bytes ] /
443 echo $pat
444 ## status: 1
445 ## stdout-json: ""
446
447 #### Matching escaped tab character
448 shopt -s oil:all
449
450 # BUG: need C strings in array literal
451 var lines=%($'aa\tbb' $'cc\tdd')
452
453 var pat = / ('a' [\t] 'b') /
454 write pat=$pat
455 write @lines | egrep $pat
456
457 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
458
459 #### Matching ] and \ and ' and " in character classes
460 shopt -s oil:all
461
462 # BUG: need C strings in array literal
463 var lines=%(
464 'backslash \'
465 'rbracket ]'
466 'lbracket ['
467 "sq '"
468 'dq "'
469 )
470
471 # Weird GNU quirk: ] has to come first!
472 # []abc] works. But [abc\]] does NOT work. Stupid rule!
473
474 var pat = / [ ']' \\ \' \" ] /
475 write pat=$pat
476 write @lines | egrep $pat
477
478 ## STDOUT:
479 pat=[]\\'"]
480 backslash \
481 rbracket ]
482 sq '
483 dq "
484 ## END
485
486 #### Matching literal hyphen in character classes
487 shopt -s oil:all
488
489 var literal = '-'
490 var pat = / [ 'a' $literal 'b' ${literal} "-" ] /
491 write pat=$pat
492 write 'c-d' 'ab' 'cd' | grep $pat
493 ## STDOUT:
494 pat=[a\-b\-\-]
495 c-d
496 ab
497 ## END
498
499 #### Repeated String Literal With Single Char
500 shopt -s oil:all
501
502 var literal = 'f'
503 var pat = null
504
505 setvar pat = / %start $literal+ %end /
506 echo $pat
507 setvar pat = / %start ($literal)+ %end /
508 echo $pat
509
510 if ('fff' ~ pat) { echo yes }
511 if ('foo' !~ pat) { echo no }
512
513 ## STDOUT:
514 ^f+$
515 ^(f)+$
516 yes
517 no
518 ## END
519
520 #### Error when unparenthesized string of more than one character is repeated
521 shopt -s oil:all
522
523 var literal = 'foo'
524 var pat = null
525
526 setvar pat = / %start $literal+ %end /
527 echo $pat
528 setvar pat = / %start ($literal)+ %end /
529 echo $pat
530
531 if ('foofoo' ~ pat) { echo yes }
532 if ('foof' !~ pat) { echo no }
533
534 ## status: 1
535 ## stdout-json: ""
536
537 #### Instead of c'foo\\bar' use 'foo' \\ 'bar'
538 shopt -s oil:all
539 var pat = /'foo' \\ 'bar'/
540 echo $pat
541
542 if (r'foo\bar' ~ pat) { echo yes }
543 if (r'foo.bar' !~ pat) { echo no }
544 ## STDOUT:
545 foo\\bar
546 yes
547 no
548 ## END
549
550 #### Negation of Character Class ![a-z]
551 shopt -s oil:all
552
553 var pat = / ![ a-z ] /
554 echo $pat
555
556 if ('0' ~ pat) { echo yes }
557 if ('a' !~ pat) { echo no }
558
559 ## STDOUT:
560 [^a-z]
561 yes
562 no
563 ## END
564
565 #### Posix and Perl class in class literals
566 shopt -s oil:all
567
568 var pat = null
569
570 setvar pat = / [ space 'z' ] /
571 echo $pat
572 #setvar pat = / [ ~space 'z' ] /
573 #echo $pat
574
575 # PROBLEM: can't negate individual POSIX classes. They would have to be a Perl
576 # class to be \D or \S.
577 # [[:space:]z] negates the whole thing!
578 # [^[:space:]]
579
580 setvar pat = / [ digit 'z' ] /
581 echo $pat
582 #setvar pat = / [ ~digit 'z' ] /
583 #echo $pat
584
585 ## STDOUT:
586 [[:space:]z]
587 [[:digit:]z]
588 ## END
589
590 #### [!d] can't be negated because it's a literal character
591 setvar pat = / [ !d 'z' ] /
592 echo $pat
593 ## status: 2
594 ## stdout-json: ""
595
596 #### [!digit] can't be negated in POSIX ERE (but yes in Perl)
597 var pat = null
598 setvar pat = / [ !digit 'z' ] /
599 echo $pat
600 ## status: 1
601 ## stdout-json: ""
602
603 #### Long Python Example
604
605 # https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
606
607 # integer ::= decinteger | bininteger | octinteger | hexinteger
608 # decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")*
609 # bininteger ::= "0" ("b" | "B") (["_"] bindigit)+
610 # octinteger ::= "0" ("o" | "O") (["_"] octdigit)+
611 # hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+
612 # nonzerodigit ::= "1"..."9"
613 # digit ::= "0"..."9"
614 # bindigit ::= "0" | "1"
615 # octdigit ::= "0"..."7"
616 # hexdigit ::= digit | "a"..."f" | "A"..."F"
617
618 shopt -s oil:all
619
620 DecDigit = / [0-9] /
621 BinDigit = / [0-1] /
622 OctDigit = / [0-7] /
623 HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class
624
625 DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* /
626 BinInt = / '0' [b B] ('_'? BinDigit)+ /
627 OctInt = / '0' [o O] ('_'? OctDigit)+ /
628 HexInt = / '0' [x X] ('_'? HexDigit)+ /
629
630 Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end /
631
632 #echo $Integer
633
634 if ( '123' ~ Integer) { echo 'Y' }
635 if ( 'zzz' !~ Integer) { echo 'N' }
636
637 if ('123_000' ~ Integer) { echo 'Y decimal' }
638 if ('000_123' !~ Integer) { echo 'N decimal' }
639
640 if ( '0b100' ~ Integer) { echo 'Y binary' }
641 if ( '0b102' !~ Integer) { echo 'N binary' }
642
643 if ( '0o755' ~ Integer) { echo 'Y octal' }
644 if ( '0o778' !~ Integer) { echo 'N octal' }
645
646 if ( '0xFF' ~ Integer) { echo 'Y hex' }
647 if ( '0xFG' !~ Integer) { echo 'N hex' }
648
649 ## STDOUT:
650 Y
651 N
652 Y decimal
653 N decimal
654 Y binary
655 N binary
656 Y octal
657 N octal
658 Y hex
659 N hex
660 ## END
661
662 #### Invalid sh operation on eggex
663 var pat = / d+ /
664 #pat[invalid]=1
665 pat[invalid]+=1
666 ## status: 1
667 ## stdout-json: ""
668