1 # Test out Oil's regular expression syntax.
2
3 #### /^.$/
4 shopt -s oil:all
5 var pat = ''
6
7 setvar pat = /^.$/
8 echo pat=$pat
9
10 setvar pat = /%start dot %end/
11 echo pat=$pat
12
13 if ('' ~ pat) { # ERE syntax
14 echo yes
15 } else {
16 echo no
17 }
18 # $pat is same as pat
19 if ('f' ~ pat) { # ERE syntax
20 echo yes
21 } else {
22 echo no
23 }
24
25 ## STDOUT:
26 pat=^.$
27 pat=^.$
28 no
29 yes
30 ## END
31
32
33 #### /.+/
34 shopt -s oil:all
35
36 var pat = /.+/
37 echo $pat
38
39 var s = 'foo'
40 if (s ~ pat) { # ERE syntax
41 echo yes
42 }
43 var empty = ''
44 if (empty ~ pat) { echo yes } else { echo no }
45 ## STDOUT:
46 .+
47 yes
48 no
49 ## END
50
51 #### Repeat {1,3} etc.
52 var pat = null
53
54 setvar pat = /d{2}/
55 echo $pat
56 setvar pat = /d{1,3}/
57 echo $pat
58 setvar pat = /d{1,}/
59 echo $pat
60 setvar pat = /d{,3}/
61 echo $pat
62
63
64 ## STDOUT:
65 [[:digit:]]{2}
66 [[:digit:]]{1,3}
67 [[:digit:]]{1,}
68 [[:digit:]]{,3}
69 ## END
70
71
72 #### d+ digit+ ~d+ ~digit+
73 shopt -s oil:all
74
75 var pat = ''
76
77 setvar pat = /d+/
78 echo $pat
79 if ('42' ~ pat) { echo yes }
80
81 var empty = ''
82 if (empty ~ pat) { echo yes } else { echo no }
83
84 setvar pat = /digit+/
85 echo $pat
86 setvar pat = /~d+/
87 echo $pat
88 setvar pat = /~digit+/
89 echo $pat
90
91
92 ## STDOUT:
93 [[:digit:]]+
94 yes
95 no
96 [[:digit:]]+
97 [^[:digit:]]+
98 [^[:digit:]]+
99 ## END
100
101 #### Alternation and sequence
102 var pat = ''
103 setvar pat = /s d+ | w*/
104 echo $pat
105 setvar pat = /s d+ or w*/
106 echo $pat
107 ## STDOUT:
108 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
109 [[:space:]][[:digit:]]+|[[:alpha:][:digit:]_]*
110 ## END
111
112 #### Char Class Ranges
113 shopt -s oil:all
114
115 var pat = ''
116 setvar pat = /[0-9 a-f]+/
117 echo $pat
118 # This is equivalent
119 setvar pat = /['0' - '9' 'a' - 'f']+/
120 echo $pat
121
122 if ('0123' ~ pat) { echo yes } else { echo no }
123 if ('zzz' ~ pat) { echo yes } else { echo no }
124 if ('' ~ pat) { echo yes } else { echo no }
125 ## STDOUT:
126 [0-9a-f]+
127 [0-9a-f]+
128 yes
129 no
130 no
131 ## END
132
133 #### Char Class Set
134 shopt -s oil:all
135 var pat = ''
136
137 # This is NOT allowed
138 # setvar pat = /[a b c]+/
139
140 setvar pat = /['abc']+/
141 echo $pat
142
143 if ('cbcb' ~ pat) { echo yes } else { echo no }
144 if ('0123' ~ pat) { echo yes } else { echo no }
145 if ('' ~ pat) { echo yes } else { echo no }
146 ## STDOUT:
147 [abc]+
148 yes
149 no
150 no
151 ## END
152
153 #### Range with escaped characters
154 shopt -s oil:all
155
156 var pat = null
157
158 setvar pat = / [ \x00 - \x0f ] /
159 echo $pat | od -A n -t x1
160
161 ## STDOUT:
162 5b 00 2d 0f 5d 0a
163 ## END
164
165
166 #### Group ()
167 shopt -s oil:all
168 var pat = ''
169
170 setvar pat = /(%start s or d d)/
171 echo $pat
172
173 if (' foo' ~ pat) { echo yes } else { echo no }
174 if ('-00-' ~ pat) { echo yes } else { echo no }
175 if ('foo' ~ pat) { echo yes } else { echo no }
176
177 ## STDOUT:
178 (^[[:space:]]|[[:digit:]][[:digit:]])
179 yes
180 yes
181 no
182 ## END
183
184 #### Capture is acceptable as a group
185 shopt -s oil:all
186 var pat = /<%start s | d d>/
187 echo $pat
188 ## STDOUT:
189 (^[[:space:]]|[[:digit:]][[:digit:]])
190 ## END
191
192 #### Named Capture Decays Without Name
193 shopt -s oil:all
194 var pat = /<d+ : month>/
195 echo $pat
196
197 if ('123' ~ pat) {
198 echo yes
199 }
200
201 ## STDOUT:
202 ([[:digit:]]+)
203 yes
204 ## END
205
206 #### Named Capture With ~ Assigns Variable
207 shopt -s oil:all
208 var pat = /<d+ : month>/
209 echo $pat
210
211 if ('123' ~ pat) {
212 echo yes
213 = month
214 }
215 ## STDOUT:
216 ([[:digit:]]+)
217 yes
218 TODO MONTH
219 ## END
220
221 #### literal ''
222 shopt -s oil:all
223 var pat = ''
224
225 setvar pat = /'abc' 'def'/
226 echo $pat
227
228 #setvar pat = /'abc' '^ + * ?'/
229 #echo $pat
230
231 if ('abcde' ~ pat) { echo yes } else { echo no }
232 if ('abcdef' ~ pat) { echo yes } else { echo no }
233
234 ## STDOUT:
235 abcdef
236 no
237 yes
238 ## END
239
240 #### double quoted, $x, and ${x}
241 shopt -s oil:all
242 var pat = ''
243
244 var x = 'x'
245 var y = 'y'
246 setvar pat = / $x ${x} "abc" "$x${y}"/
247 echo $pat
248
249 if ('xxabcx' ~ pat) { echo yes } else { echo no }
250 if ('xxabcxyf' ~ pat) { echo yes } else { echo no }
251
252 ## STDOUT:
253 xxabcxy
254 no
255 yes
256 ## END
257
258 #### @splice
259 shopt -s oil:all
260 var d = /d+/;
261 var ip = / @d '.' @d '.' @d '.' @d /
262 echo $ip
263 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
264 if ('0.0.0' ~ ip) { echo yes } else { echo no }
265 ## STDOUT:
266 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
267 yes
268 no
269 ## END
270
271 #### splice with capital letters
272 shopt -s oil:all
273 var D = /d+/;
274 var ip = / D '.' D '.' D '.' D /
275 echo $ip
276 if ('0.0.0.0' ~ ip) { echo yes } else { echo no }
277 if ('0.0.0' ~ ip) { echo yes } else { echo no }
278 ## STDOUT:
279 [[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+
280 yes
281 no
282 ## END
283
284 #### Matching escaped tab character
285 shopt -s oil:all
286
287 # BUG: need C strings in array literal
288 var lines=@($'aa\tbb' $'cc\tdd')
289
290 var pat = / ('a' [\t] 'b') /
291 write pat=$pat
292 write @lines | egrep $pat
293
294 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
295
296 #### Match non-ASCII byte denoted using c'\xff'
297 shopt -s oil:all
298 var pat = /[ c'\xff' ]/;
299
300 echo $pat | od -A n -t x1
301 if (c'\xff' ~ pat) { echo yes } else { echo no }
302 if (c'\xfe' ~ pat) { echo yes } else { echo no }
303
304 ## STDOUT:
305 5b ff 5d 0a
306 yes
307 no
308 ## END
309
310 #### Match non-ASCII byte denoted using \xff
311 shopt -s oil:all
312 var pat = /[ \xff ]/;
313
314 echo $pat | od -A n -t x1
315 if (c'\xff' ~ pat) { echo yes } else { echo no }
316 if (c'\xfe' ~ pat) { echo yes } else { echo no }
317
318 ## STDOUT:
319 5b ff 5d 0a
320 yes
321 no
322 ## END
323
324 #### ERE can express Unicode escapes that are in the ASCII range
325 shopt -s oil:all
326 var pat = /[ \u007f ]/;
327
328 echo $pat | od -A n -t x1
329 if (c'\x7f' ~ pat) { echo yes } else { echo no }
330 if (c'\x7e' ~ pat) { echo yes } else { echo no }
331
332 ## STDOUT:
333 5b 7f 5d 0a
334 yes
335 no
336 ## END
337
338 #### ERE can't express higher Unicode escapes
339 shopt -s oil:all
340 var pat = /[ \u00ff ]/;
341
342 echo $pat | od -A n -t x1
343 if (c'\x7f' ~ pat) { echo yes } else { echo no }
344 if (c'\x7e' ~ pat) { echo yes } else { echo no }
345
346 ## status: 1
347 ## stdout-json: ""
348
349 #### non-ASCII bytes must be singleton terms, e.g. '\x7f\xff' is disallowed
350 var bytes = c'\x7f\xff'
351 var pat = / [ $bytes ] /
352 echo $pat
353 ## status: 1
354 ## stdout-json: ""
355
356 #### Matching escaped tab character
357 shopt -s oil:all
358
359 # BUG: need C strings in array literal
360 var lines=@($'aa\tbb' $'cc\tdd')
361
362 var pat = / ('a' [\t] 'b') /
363 write pat=$pat
364 write @lines | egrep $pat
365
366 ## stdout-json: "pat=(a[\t]b)\naa\tbb\n"
367
368 #### Matching ] and \ and ' and " in character classes
369 shopt -s oil:all
370
371 # BUG: need C strings in array literal
372 var lines=@(
373 'backslash \'
374 'rbracket ]'
375 'lbracket ['
376 "sq '"
377 'dq "'
378 )
379
380 # Weird GNU quirk: ] has to come first!
381 # []abc] works. But [abc\]] does NOT work. Stupid rule!
382
383 var pat = / [ ']' \\ \' \" ] /
384 write pat=$pat
385 write @lines | egrep $pat
386
387 ## STDOUT:
388 pat=[]\\'"]
389 backslash \
390 rbracket ]
391 sq '
392 dq "
393 ## END
394
395 #### Matching literal hyphen in character classes
396 shopt -s oil:all
397
398 var literal = '-'
399 var pat = / [ 'a' $literal 'b' ${literal} "-" ] /
400 write pat=$pat
401 write 'c-d' 'ab' 'cd' | grep $pat
402 ## STDOUT:
403 pat=[a\-b\-\-]
404 c-d
405 ab
406 ## END
407
408 #### Repeated String Literal With Single Char
409 shopt -s oil:all
410
411 var literal = 'f'
412 var pat = null
413
414 setvar pat = / %start $literal+ %end /
415 echo $pat
416 setvar pat = / %start ($literal)+ %end /
417 echo $pat
418
419 if ('fff' ~ pat) { echo yes }
420 if ('foo' !~ pat) { echo no }
421
422 ## STDOUT:
423 ^f+$
424 ^(f)+$
425 yes
426 no
427 ## END
428
429 #### Error when unparenthesized string of more than one character is repeated
430 shopt -s oil:all
431
432 var literal = 'foo'
433 var pat = null
434
435 setvar pat = / %start $literal+ %end /
436 echo $pat
437 setvar pat = / %start ($literal)+ %end /
438 echo $pat
439
440 if ('foofoo' ~ pat) { echo yes }
441 if ('foof' !~ pat) { echo no }
442
443 ## status: 1
444 ## stdout-json: ""
445
446 #### Instead of c'foo\\bar' use 'foo' \\ 'bar'
447 shopt -s oil:all
448 var pat = /'foo' \\ 'bar'/
449 echo $pat
450
451 if (r'foo\bar' ~ pat) { echo yes }
452 if (r'foo.bar' !~ pat) { echo no }
453 ## STDOUT:
454 foo\\bar
455 yes
456 no
457 ## END
458
459 #### Negation of Character Class
460 shopt -s oil:all
461
462 var pat = / ~[ a-z ] /
463 echo $pat
464
465 if ('0' ~ pat) { echo yes }
466 if ('a' !~ pat) { echo no }
467
468 ## STDOUT:
469 [^a-z]
470 yes
471 no
472 ## END
473
474 #### Posix and Perl class in class literals
475 shopt -s oil:all
476
477 var pat = null
478
479 setvar pat = / [ space 'z' ] /
480 echo $pat
481 #setvar pat = / [ ~space 'z' ] /
482 #echo $pat
483
484 # PROBLEM: can't negate individual POSIX classes. They would have to be a Perl
485 # class to be \D or \S.
486 # [[:space:]z] negates the whole thing!
487 # [^[:space:]]
488
489 setvar pat = / [ digit 'z' ] /
490 echo $pat
491 #setvar pat = / [ ~digit 'z' ] /
492 #echo $pat
493
494 ## STDOUT:
495 [[:space:]z]
496 [[:digit:]z]
497 ## END
498
499 #### [~d] can't be negated because it's a literal character
500 setvar pat = / [ ~d 'z' ] /
501 echo $pat
502 ## status: 2
503 ## stdout-json: ""
504
505 #### [~digit] can't be negated in POSIX ERE (but yes in Perl)
506 var pat = null
507 setvar pat = / [ ~digit 'z' ] /
508 echo $pat
509 ## status: 1
510 ## stdout-json: ""
511
512 #### Long Python Example
513
514 # https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
515
516 # integer ::= decinteger | bininteger | octinteger | hexinteger
517 # decinteger ::= nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")*
518 # bininteger ::= "0" ("b" | "B") (["_"] bindigit)+
519 # octinteger ::= "0" ("o" | "O") (["_"] octdigit)+
520 # hexinteger ::= "0" ("x" | "X") (["_"] hexdigit)+
521 # nonzerodigit ::= "1"..."9"
522 # digit ::= "0"..."9"
523 # bindigit ::= "0" | "1"
524 # octdigit ::= "0"..."7"
525 # hexdigit ::= digit | "a"..."f" | "A"..."F"
526
527 shopt -s oil:all
528
529 DecDigit = / [0-9] /
530 BinDigit = / [0-1] /
531 OctDigit = / [0-7] /
532 HexDigit = / [0-9 a-f A-F] / # note: not splicing Digit into character class
533
534 DecInt = / [1-9] ('_'? DecDigit)* | '0'+ ('_'? '0')* /
535 BinInt = / '0' [b B] ('_'? BinDigit)+ /
536 OctInt = / '0' [o O] ('_'? OctDigit)+ /
537 HexInt = / '0' [x X] ('_'? HexDigit)+ /
538
539 Integer = / %start (DecInt | BinInt | OctInt | HexInt) %end /
540
541 #echo $Integer
542
543 if ( '123' ~ Integer) { echo 'Y' }
544 if ( 'zzz' !~ Integer) { echo 'N' }
545
546 if ('123_000' ~ Integer) { echo 'Y decimal' }
547 if ('000_123' !~ Integer) { echo 'N decimal' }
548
549 if ( '0b100' ~ Integer) { echo 'Y binary' }
550 if ( '0b102' !~ Integer) { echo 'N binary' }
551
552 if ( '0o755' ~ Integer) { echo 'Y octal' }
553 if ( '0o778' !~ Integer) { echo 'N octal' }
554
555 if ( '0xFF' ~ Integer) { echo 'Y hex' }
556 if ( '0xFG' !~ Integer) { echo 'N hex' }
557
558 ## STDOUT:
559 Y
560 N
561 Y decimal
562 N decimal
563 Y binary
564 N binary
565 Y octal
566 N octal
567 Y hex
568 N hex
569 ## END