1 # Test out Oil's regular expression syntax.
2
3 #### /^.$/
4 shopt -s oil:all
5 var pat = ''
6
7 setvar pat = /^.$/
8 echo pat=$pat
9
10 setvar pat = /%start dot %end/
11 echo pat=$pat
12
13 if ('' ~ pat) { # ERE syntax
14 echo yes
15 } else {
16 echo no
17 }
18 # $pat is same as pat
19 if ('f' ~ pat) { # ERE syntax
20 echo yes
21 } else {
22 echo no
23 }
24
25 ## STDOUT:
26 pat=^.$
27 pat=^.$
28 no
29 yes
30 ## END
31
32
33 #### /.+/
34 shopt -s oil:all
35
36 var pat = /.+/
37 echo $pat
38
39 var s = 'foo'
40 if (s ~ pat) { # ERE syntax
41 echo yes
42 }
43 var empty = ''
44 if (empty ~ pat) { echo yes } else { echo no }
45 ## STDOUT:
46 .+
47 yes
48 no
49 ## END
50
51 #### Positional captures with _match
52 shopt -s oil:all
53
54 var x = 'zz 2020-08-20'
55
56 if [[ $x =~ ([[:digit:]]+)-([[:digit:]]+) ]] {
57 argv.py "${BASH_REMATCH[@]}"
58 }
59
60 setvar BASH_REMATCH = %(reset)
61
62 if (x ~ /<d+> '-' <d+>/) {
63 argv.py "${BASH_REMATCH[@]}"
64 argv.py $_match(0) $_match(1) $_match(2)
65
66 argv.py $_match() # synonym for _match(0)
67
68 # TODO: Also test _start() and _end()
69 }
70 ## STDOUT:
71 ['2020-08', '2020', '08']
72 ['reset']
73 ['2020-08', '2020', '08']
74 ## END
75
76 #### Named captures with _match
77 shopt -s oil:all
78
79 var x = 'zz 2020-08-20'
80
81 if (x ~ /<d+ : year> '-' <d+ : month>/) {
82 argv.py $_match('year') $_match('month')
83 }
84 ## STDOUT:
85 ['2020', '08']
86 ## END
87
88 #### Repeat {1,3} etc.
89 var pat = null
90
91 setvar pat = /d{2}/
92 echo $pat
93 setvar pat = /d{1,3}/
94 echo $pat
95 setvar pat = /d{1,}/
96 echo $pat
97 setvar pat = /d{,3}/
98 echo $pat
99
100
101 ## STDOUT:
102 [[:digit:]]{2}
103 [[:digit:]]{1,3}
104 [[:digit:]]{1,}
105 [[:digit:]]{,3}
106 ## END
107
108
109 #### d+ digit+ ~d+ ~digit+
110 shopt -s oil:all
111
112 var pat = ''
113
114 setvar pat = /d+/
115 echo $pat
116 if ('42' ~ pat) { echo yes }
117
118 var empty = ''
119 if (empty ~ pat) { echo yes } else { echo no }
120
121 setvar pat = /digit+/
122 echo $pat
123 setvar pat = /~d+/
124 echo $pat
125 setvar pat = /~digit+/
126 echo $pat
127
128
129 ## STDOUT:
130 [[:digit:]]+
131 yes
132 no
133 [[:digit:]]+
134 [^[:digit:]]+
135 [^[:digit:]]+
136 ## END
137
138 #### Alternation and sequence
139 var pat = ''
140 setvar pat = /s d+ | w*/
141 echo $pat
142 setvar pat = /s d+ or w*/
143 echo $pat
144 ## STDOUT:
145 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
146 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
147 ## END
148
149 #### Char Class Ranges
150 shopt -s oil:all
151
152 var pat = ''
153 setvar pat = /[0-9 a-f]+/
154 echo $pat
155 # This is equivalent
156 setvar pat = /['0' - '9' 'a' - 'f']+/
157 echo $pat
158
159 if ('0123' ~ pat) { echo yes } else { echo no }
160 if ('zzz' ~ pat) { echo yes } else { echo no }
161 if ('' ~ pat) { echo yes } else { echo no }
162 ## STDOUT:
163 [0-9a-f]+
164 [0-9a-f]+
165 yes
166 no
167 no
168 ## END
169
170 #### Char Class Set
171 shopt -s oil:all
172 var pat = ''
173
174 # This is NOT allowed
175 # setvar pat = /[a b c]+/
176
177 setvar pat = /['abc']+/
178 echo $pat
179
180 if ('cbcb' ~ pat) { echo yes } else { echo no }
181 if ('0123' ~ pat) { echo yes } else { echo no }
182 if ('' ~ pat) { echo yes } else { echo no }
183 ## STDOUT:
184 [abc]+
185 yes
186 no
187 no
188 ## END
189
190 #### Range with escaped characters
191 shopt -s oil:all
192
193 var pat = null
194
195 setvar pat = / [ \x00 - \x0f ] /
196 echo $pat | od -A n -t x1
197
198 ## STDOUT:
199 5b 00 2d 0f 5d 0a
200 ## END
201
202
203 #### Group ()
204 shopt -s oil:all
205 var pat = ''
206
207 setvar pat = /(%start s or d d)/
208 echo $pat
209
210 if (' foo' ~ pat) { echo yes } else { echo no }
211 if ('-00-' ~ pat) { echo yes } else { echo no }
212 if ('foo' ~ pat) { echo yes } else { echo no }
213
214 ## STDOUT:
215 (^[[:space:]]|[[:digit:]][[:digit:]])
216 yes
217 yes
218 no
219 ## END
220
221 #### Capture is acceptable as a group
222 shopt -s oil:all
223 var pat = /<%start s | d d>/
224 echo $pat
225 ## STDOUT:
226 (^[[:space:]]|[[:digit:]][[:digit:]])
227 ## END
228
229 #### Named Capture Decays Without Name
230 shopt -s oil:all
231 var pat = /<d+ : month>/
232 echo $pat
233
234 if ('123' ~ pat) {
235 echo yes
236 }
237
238 ## STDOUT:
239 ([[:digit:]]+)
240 yes
241 ## END
242
243 #### Named Capture With ~ Assigns Variable
244 shopt -s oil:all
245 var pat = /<d+ : month>/
246 echo $pat
247
248 if ('123' ~ pat) {
249 echo yes
250 = month
251 }
252 ## STDOUT:
253 ([[:digit:]]+)
254 yes
255 TODO MONTH
256 ## END
257
258 #### literal ''
259 shopt -s oil:all
260 var pat = ''
261
262 setvar pat = /'abc' 'def'/
263 echo $pat
264
265 #setvar pat = /'abc' '^ + * ?'/
266 #echo $pat
267
268 if ('abcde' ~ pat) { echo yes } else { echo no }
269 if ('abcdef' ~ pat) { echo yes } else { echo no }
270
271 ## STDOUT:
272 abcdef
273 no
274 yes
275 ## END
276
277 #### double quoted, $x, and ${x}
278 shopt -s oil:all
279 var pat = ''
280
281 var x = 'x'
282 var y = 'y'
283 setvar pat = / $x ${x} "abc" "$x${y}"/
284 echo $pat
285
286 if ('xxabcx' ~ pat) { echo yes } else { echo no }
287 if ('xxabcxyf' ~ pat) { echo yes } else { echo no }
288
289 ## STDOUT:
290 xxabcxy
291 no
292 yes
293 ## END
294
295 #### @splice
296 shopt -s oil:all
297 var d = /d+/;
298 var ip = / @d '.' @d '.' @d '.' @d /
299 echo $ip
300 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
301 if ('0.0.0' ~ ip) { echo yes } else { echo no }
302 ## STDOUT:
303 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
304 yes
305 no
306 ## END
307
308 #### splice with capital letters
309 shopt -s oil:all
310 var D = /d+/;
311 var ip = / D '.' D '.' D '.' D /
312 echo $ip
313 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
314 if ('0.0.0' ~ ip) { echo yes } else { echo no }
315 ## STDOUT:
316 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
317 yes
318 no
319 ## END
320
321 #### Matching escaped tab character
322 shopt -s oil:all
323
324 # BUG: need C strings in array literal
325 var lines=%($'aa\tbb' $'cc\tdd')
326
327 var pat = / ('a' [\t] 'b') /
328 write pat=$pat
329 write @lines | egrep $pat
330
331 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
332
333 #### Match non-ASCII byte denoted using c'\xff'
334 shopt -s oil:all
335 var pat = /[ c'\xff' ]/;
336
337 echo $pat | od -A n -t x1
338 if (c'\xff' ~ pat) { echo yes } else { echo no }
339 if (c'\xfe' ~ pat) { echo yes } else { echo no }
340
341 ## STDOUT:
342 5b ff 5d 0a
343 yes
344 no
345 ## END
346
347 #### Match non-ASCII byte denoted using \xff
348 shopt -s oil:all
349 var pat = /[ \xff ]/;
350
351 echo $pat | od -A n -t x1
352 if (c'\xff' ~ pat) { echo yes } else { echo no }
353 if (c'\xfe' ~ pat) { echo yes } else { echo no }
354
355 ## STDOUT:
356 5b ff 5d 0a
357 yes
358 no
359 ## END
360
361 #### ERE can express Unicode escapes that are in the ASCII range
362 shopt -s oil:all
363 var pat = /[ \u{7f} ]/;
364
365 echo $pat | od -A n -t x1
366 if (c'\x7f' ~ pat) { echo yes } else { echo no }
367 if (c'\x7e' ~ pat) { echo yes } else { echo no }
368
369 var pat2 = /[ \u{7f} ]/;
370 var pat3 = /[ \u{0007f} ]/;
371 test "$pat2" = "$pat3" && echo 'equal'
372
373 ## STDOUT:
374 5b 7f 5d 0a
375 yes
376 no
377 equal
378 ## END
379
380 #### ERE can't express higher Unicode escapes
381 shopt -s oil:all
382 var pat = /[ \u{ff} ]/;
383
384 echo $pat | od -A n -t x1
385 if (c'\x7f' ~ pat) { echo yes } else { echo no }
386 if (c'\x7e' ~ pat) { echo yes } else { echo no }
387
388 ## status: 1
389 ## stdout-json: ""
390
391 #### non-ASCII bytes must be singleton terms, e.g. '\x7f\xff' is disallowed
392 var bytes = c'\x7f\xff'
393 var pat = / [ $bytes ] /
394 echo $pat
395 ## status: 1
396 ## stdout-json: ""
397
398 #### Matching escaped tab character
399 shopt -s oil:all
400
401 # BUG: need C strings in array literal
402 var lines=%($'aa\tbb' $'cc\tdd')
403
404 var pat = / ('a' [\t] 'b') /
405 write pat=$pat
406 write @lines | egrep $pat
407
408 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
409
410 #### Matching ] and \ and ' and " in character classes
411 shopt -s oil:all
412
413 # BUG: need C strings in array literal
414 var lines=%(
415 'backslash \'
416 'rbracket ]'
417 'lbracket ['
418 "sq '"
419 'dq "'
420 )
421
422 # Weird GNU quirk: ] has to come first!
423 # []abc] works. But [abc\]] does NOT work. Stupid rule!
424
425 var pat = / [ ']' \\ \' \" ] /
426 write pat=$pat
427 write @lines | egrep $pat
428
429 ## STDOUT:
430 pat=[]\\'"]
431 backslash \
432 rbracket ]
433 sq '
434 dq "
435 ## END
436
437 #### Matching literal hyphen in character classes
438 shopt -s oil:all
439
440 var literal = '-'
441 var pat = / [ 'a' $literal 'b' ${literal} "-" ] /
442 write pat=$pat
443 write 'c-d' 'ab' 'cd' | grep $pat
444 ## STDOUT:
445 pat=[a\-b\-\-]
446 c-d
447 ab
448 ## END
449
450 #### Repeated String Literal With Single Char
451 shopt -s oil:all
452
453 var literal = 'f'
454 var pat = null
455
456 setvar pat = / %start $literal+ %end /
457 echo $pat
458 setvar pat = / %start ($literal)+ %end /
459 echo $pat
460
461 if ('fff' ~ pat) { echo yes }
462 if ('foo' !~ pat) { echo no }
463
464 ## STDOUT:
465 ^f+$
466 ^(f)+$
467 yes
468 no
469 ## END
470
471 #### Error when unparenthesized string of more than one character is repeated
472 shopt -s oil:all
473
474 var literal = 'foo'
475 var pat = null
476
477 setvar pat = / %start $literal+ %end /
478 echo $pat
479 setvar pat = / %start ($literal)+ %end /
480 echo $pat
481
482 if ('foofoo' ~ pat) { echo yes }
483 if ('foof' !~ pat) { echo no }
484
485 ## status: 1
486 ## stdout-json: ""
487
488 #### Instead of c'foo\\bar' use 'foo' \\ 'bar'
489 shopt -s oil:all
490 var pat = /'foo' \\ 'bar'/
491 echo $pat
492
493 if (r'foo\bar' ~ pat) { echo yes }
494 if (r'foo.bar' !~ pat) { echo no }
495 ## STDOUT:
496 foo\\bar
497 yes
498 no
499 ## END
500
501 #### Negation of Character Class
502 shopt -s oil:all
503
504 var pat = / ~[ a-z ] /
505 echo $pat
506
507 if ('0' ~ pat) { echo yes }
508 if ('a' !~ pat) { echo no }
509
510 ## STDOUT:
511 [^a-z]
512 yes
513 no
514 ## END
515
516 #### Posix and Perl class in class literals
517 shopt -s oil:all
518
519 var pat = null
520
521 setvar pat = / [ space 'z' ] /
522 echo $pat
523 #setvar pat = / [ ~space 'z' ] /
524 #echo $pat
525
526 # PROBLEM: can't negate individual POSIX classes. They would have to be a Perl
527 # class to be \D or \S.
528 # [[:space:]z] negates the whole thing!
529 # [^[:space:]]
530
531 setvar pat = / [ digit 'z' ] /
532 echo $pat
533 #setvar pat = / [ ~digit 'z' ] /
534 #echo $pat
535
536 ## STDOUT:
537 [[:space:]z]
538 [[:digit:]z]
539 ## END
540
541 #### [~d] can't be negated because it's a literal character
542 setvar pat = / [ ~d 'z' ] /
543 echo $pat
544 ## status: 2
545 ## stdout-json: ""
546
547 #### [~digit] can't be negated in POSIX ERE (but yes in Perl)
548 var pat = null
549 setvar pat = / [ ~digit 'z' ] /
550 echo $pat
551 ## status: 1
552 ## stdout-json: ""
553
554 #### Long Python Example
555
556 # https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
557
558 # integer ::= decinteger | bininteger | octinteger | hexinteger
559 # decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")*
560 # bininteger ::= "0" ("b" | "B") (["_"] bindigit)+
561 # octinteger ::= "0" ("o" | "O") (["_"] octdigit)+
562 # hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+
563 # nonzerodigit ::= "1"..."9"
564 # digit ::= "0"..."9"
565 # bindigit ::= "0" | "1"
566 # octdigit ::= "0"..."7"
567 # hexdigit ::= digit | "a"..."f" | "A"..."F"
568
569 shopt -s oil:all
570
571 DecDigit = / [0-9] /
572 BinDigit = / [0-1] /
573 OctDigit = / [0-7] /
574 HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class
575
576 DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* /
577 BinInt = / '0' [b B] ('_'? BinDigit)+ /
578 OctInt = / '0' [o O] ('_'? OctDigit)+ /
579 HexInt = / '0' [x X] ('_'? HexDigit)+ /
580
581 Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end /
582
583 #echo $Integer
584
585 if ( '123' ~ Integer) { echo 'Y' }
586 if ( 'zzz' !~ Integer) { echo 'N' }
587
588 if ('123_000' ~ Integer) { echo 'Y decimal' }
589 if ('000_123' !~ Integer) { echo 'N decimal' }
590
591 if ( '0b100' ~ Integer) { echo 'Y binary' }
592 if ( '0b102' !~ Integer) { echo 'N binary' }
593
594 if ( '0o755' ~ Integer) { echo 'Y octal' }
595 if ( '0o778' !~ Integer) { echo 'N octal' }
596
597 if ( '0xFF' ~ Integer) { echo 'Y hex' }
598 if ( '0xFG' !~ Integer) { echo 'N hex' }
599
600 ## STDOUT:
601 Y
602 N
603 Y decimal
604 N decimal
605 Y binary
606 N binary
607 Y octal
608 N octal
609 Y hex
610 N hex
611 ## END
612
613 #### Invalid sh operation on eggex
614 var pat = / d+ /
615 #pat[invalid]=1
616 pat[invalid]+=1
617 ## status: 1
618 ## stdout-json: ""
619