OILS
/
ysh
/
grammar.pgen2
1 |
# Grammar for Oil.
|
2 |
# Adapted from the Python 3.7 expression grammar, with several changes!
|
3 |
#
|
4 |
# TODO:
|
5 |
# - funcs in expression context, maybe disable lambdas like |x| x+1 for now?
|
6 |
# - Are tuples and heterogeneous lists in Oil or Tea?
|
7 |
#
|
8 |
# Tea also needs:
|
9 |
# - cast expressions
|
10 |
# - although cast(Int, foo) works I suppose. It feels like it has a runtime
|
11 |
# cost
|
12 |
#
|
13 |
# - What about list comprehensions?
|
14 |
# - I think this could be replaced with implicit vectorization, like
|
15 |
# @len(x) or len.(x) ? It's shorter, but it maps and doesn't filter.
|
16 |
# - Generator expressions?
|
17 |
|
18 |
# Note: trailing commas are allowed:
|
19 |
# {k: mydict,}
|
20 |
# [mylist,]
|
21 |
# mytuple,
|
22 |
# f(args,)
|
23 |
# func f(params,)
|
24 |
#
|
25 |
# Kinds used:
|
26 |
# VSub, Left, Right, Expr, Op, Arith, Char, Eof, Unknown
|
27 |
|
28 |
# Oil patch: removed @= **= //=
|
29 |
# We're missing div= and xor=, which now look weird. ^= is
|
30 |
# exponentiation. |= has a use case.
|
31 |
augassign: (
|
32 |
'+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>='
|
33 |
)
|
34 |
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
35 |
|
36 |
test: or_test ['if' or_test 'else' test] | lambdef
|
37 |
|
38 |
# We follow the same rules as Python lambdas:
|
39 |
#
|
40 |
# |x| 1, 2 == (|x| 1), 2
|
41 |
# |x| x if True else 42 == |x| (x if True else 42)
|
42 |
#
|
43 |
# Python also had a test_nocond production like this: We don't need it because
|
44 |
# we can't have multiple ifs.
|
45 |
# [x for x in range(3) if lambda x: x if 1]
|
46 |
#
|
47 |
# The zero arg syntax like || 1 annoys me -- but this also works:
|
48 |
# func() { return 1 }
|
49 |
#
|
50 |
# We used name_type_list rather than param_group because a default value like
|
51 |
# x|y (bitwise or) conflicts with the | delimiter!
|
52 |
|
53 |
lambdef: '|' [name_type_list] '|' test
|
54 |
|
55 |
or_test: and_test ('or' and_test)*
|
56 |
and_test: not_test ('and' not_test)*
|
57 |
not_test: 'not' not_test | comparison
|
58 |
comparison: range_expr (comp_op range_expr)*
|
59 |
|
60 |
# Here the beginning and end are required
|
61 |
range_expr: expr [':' expr]
|
62 |
|
63 |
# Oil patch: removed legacy <>, use == and !==
|
64 |
comp_op: (
|
65 |
'<'|'>'|'==='|'>='|'<='|'!=='|'in'|'not' 'in'|'is'|'is' 'not'|
|
66 |
'~' | '!~' | '~~' | '!~~' | '~=='
|
67 |
)
|
68 |
|
69 |
# For lists and dicts. Note: In Python this was star_expr *foo
|
70 |
splat_expr: '...' expr
|
71 |
|
72 |
expr: xor_expr ('|' xor_expr)*
|
73 |
xor_expr: and_expr ('^' and_expr)*
|
74 |
and_expr: shift_expr ('&' shift_expr)*
|
75 |
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
76 |
# Oil: add concatenation with same precedence as +
|
77 |
arith_expr: term (('+'|'-'|'++') term)*
|
78 |
# Oil patch: removed '@'
|
79 |
term: factor (('*'|'/'|'//'|'%') factor)*
|
80 |
factor: ('+'|'-'|'~') factor | power
|
81 |
# Oil patch: removed Python 3 'await'
|
82 |
power: atom trailer* ['**' factor]
|
83 |
|
84 |
testlist_comp: (test|splat_expr) ( comp_for | (',' (test|splat_expr))* [','] )
|
85 |
|
86 |
atom: (
|
87 |
'(' [testlist_comp] ')'
|
88 |
| '[' [testlist_comp] ']'
|
89 |
# Note: newlines are significant inside {}, unlike inside () and []
|
90 |
| '{' [Op_Newline] [dict] '}'
|
91 |
| '/' regex [re_flags] '/'
|
92 |
# NOTE: These atoms are are allowed in typed array literals
|
93 |
| Expr_Name | Expr_Null | Expr_True | Expr_False
|
94 |
# TODO: Allow suffixes on floats and decimals?
|
95 |
# You could frame it as multiplication, so 100 M is 100 * M, where
|
96 |
# M = 1_000_000
|
97 |
| Expr_Float | Expr_DecInt | Expr_BinInt | Expr_OctInt | Expr_HexInt
|
98 |
|
99 |
| Char_OneChar # char literal \n \\ etc.
|
100 |
| Char_UBraced # char literal \u{3bc}
|
101 |
| Char_Pound # char literal #'A' etc.
|
102 |
|
103 |
| dq_string | sq_string
|
104 |
# Expr_Symbol could be %mykey
|
105 |
|
106 |
# $foo is disallowed, but $? is allowed. Should be "$foo" to indicate a
|
107 |
# string, or ${foo:-}
|
108 |
| simple_var_sub
|
109 |
| sh_command_sub | braced_var_sub
|
110 |
| sh_array_literal
|
111 |
| old_sh_array_literal
|
112 |
# Anonymous function. Is this only in Tea mode?
|
113 |
| 'func' tea_func
|
114 |
)
|
115 |
|
116 |
# Tea can run a limited form of procs. The first word must be a name, and NO
|
117 |
# BARE WORDS.
|
118 |
#
|
119 |
# Example:
|
120 |
# log "hello $name" # valid in OSH, Oil, Tea
|
121 |
# myproc $(hostname) # ditto
|
122 |
#
|
123 |
# my-proc '/' $foo # OSH and Oil
|
124 |
# run 'my-proc' '/' $foo # Tea. 'run' is similar to 'command' and 'builtin'
|
125 |
#
|
126 |
|
127 |
tea_word: (
|
128 |
dq_string | sq_string
|
129 |
| sh_command_sub | braced_var_sub | simple_var_sub
|
130 |
)
|
131 |
|
132 |
# var f = f(x)
|
133 |
trailer: (
|
134 |
'(' [arglist] ')'
|
135 |
| '[' subscriptlist ']'
|
136 |
|
137 |
# TODO:
|
138 |
# - {} %() :() trailers for passing a single arg that's a collection.
|
139 |
# - and maybe %"" too
|
140 |
# Lazy evaluation:
|
141 |
# - f %(a b c) could be f(%(a b c))
|
142 |
# - f {a=1, b=2} could be f({a=1, b=2})
|
143 |
# - although we might want that for Point {x: 1, y: 2}
|
144 |
# - f :(x, y) could be equivalent to f(:[x], :[y])
|
145 |
|
146 |
| '.' Expr_Name
|
147 |
| '->' Expr_Name
|
148 |
| '::' Expr_Name
|
149 |
)
|
150 |
|
151 |
# Oil patch: this is 'expr' instead of 'test'
|
152 |
# - 1:(3<4) doesn't make any sense.
|
153 |
# - And then this allows us to support a[3:] and a[:i] as special cases.
|
154 |
# - First class slices have to be written 0:n.
|
155 |
|
156 |
subscriptlist: subscript (',' subscript)* [',']
|
157 |
|
158 |
# TODO: Add => as low precedence operator, for Func[Str, Int => Str]
|
159 |
subscript: expr | [expr] ':' [expr]
|
160 |
|
161 |
# TODO: => should be even lower precedence here too
|
162 |
testlist: test (',' test)* [',']
|
163 |
|
164 |
# Dict syntax resembles JavaScript
|
165 |
# https://stackoverflow.com/questions/38948306/what-is-javascript-shorthand-property
|
166 |
#
|
167 |
# Examples:
|
168 |
# {age: 20} is like {'age': 20}
|
169 |
#
|
170 |
# x = 'age'
|
171 |
# d = %{[x]: 20} # Evaluate x as a variable
|
172 |
# d = %{["foo$x"]: 20} # Another expression
|
173 |
# d = %{[x, y]: 20} # Tuple key
|
174 |
# d = %{key1, key1: 123}
|
175 |
# Notes:
|
176 |
# - Value is optional when the key is a name, because it can be taken from the
|
177 |
# environment.
|
178 |
# - We don't have:
|
179 |
# - dict comprehensions. Maybe wait until LR parsing?
|
180 |
# - Splatting with **
|
181 |
|
182 |
dict_pair: (
|
183 |
Expr_Name [':' test]
|
184 |
| '[' testlist ']' ':' test
|
185 |
| sq_string ':' test
|
186 |
| dq_string ':' test
|
187 |
)
|
188 |
|
189 |
dict: dict_pair (comma_newline dict_pair)* [comma_newline]
|
190 |
|
191 |
# This how Python implemented dict comprehensions. We can probably do the
|
192 |
# same.
|
193 |
#
|
194 |
# dictorsetmaker: ( ((test ':' test | '**' expr)
|
195 |
# (comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
196 |
# ((test | splat_expr)
|
197 |
# (comp_for | (',' (test | splat_expr))* [','])) )
|
198 |
|
199 |
# The reason that keywords are test nodes instead of NAME is that using NAME
|
200 |
# results in an ambiguity. ast.c makes sure it's a NAME.
|
201 |
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
202 |
# These need to be in a single rule to avoid grammar that is ambiguous
|
203 |
# to our LL(1) parser. Even though 'test' includes '*expr' in splat_expr,
|
204 |
# we explicitly match '*' here, too, to give it proper precedence.
|
205 |
# Illegal combinations and orderings are blocked in ast.c:
|
206 |
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
207 |
# that precede iterable unpackings are blocked; etc.
|
208 |
|
209 |
argument: (
|
210 |
test [comp_for]
|
211 |
# named arg
|
212 |
| test '=' test
|
213 |
# splat. Note we're using prefix syntax to be consistent with Python, JS,
|
214 |
# and the prefix @ operator.
|
215 |
| '...' test
|
216 |
)
|
217 |
|
218 |
# The grammar at call sites is less restrictive than at declaration sites.
|
219 |
# ... can appear anywhere. Keyword args can appear anywhere too.
|
220 |
arglist: argument (',' argument)* [','] [';' argument (',' argument)* [',']]
|
221 |
|
222 |
|
223 |
# Oil patch: test_nocond -> or_test. I believe this was trying to prevent the
|
224 |
# "double if" ambiguity here:
|
225 |
# #
|
226 |
# [x for x in range(3) if lambda x: x if 1]
|
227 |
#
|
228 |
# but Oil doesn't supported "nested loops", so we don't have this problem.
|
229 |
comp_for: 'for' name_type_list 'in' or_test ['if' or_test]
|
230 |
|
231 |
|
232 |
#
|
233 |
# Expressions that are New in Oil
|
234 |
#
|
235 |
|
236 |
# Notes:
|
237 |
# - Most of these occur in 'atom' above
|
238 |
# - You can write $mystr but not mystr. It has to be (mystr)
|
239 |
array_item: (
|
240 |
Expr_Null | Expr_True | Expr_False
|
241 |
| Expr_Float | Expr_DecInt | Expr_BinInt | Expr_OctInt | Expr_HexInt
|
242 |
| dq_string | sq_string
|
243 |
| sh_command_sub | braced_var_sub | simple_var_sub
|
244 |
| '(' test ')'
|
245 |
)
|
246 |
sh_array_literal: ':|' Expr_CastedDummy Op_Pipe
|
247 |
|
248 |
# TODO: remove this
|
249 |
old_sh_array_literal: '%(' Expr_CastedDummy Right_ShArrayLiteral
|
250 |
sh_command_sub: ( '$(' | '@(' | '^(' ) Expr_CastedDummy Eof_RParen
|
251 |
|
252 |
# Note: could add c"" too
|
253 |
dq_string: (Left_DoubleQuote | Left_TDoubleQuote) Expr_CastedDummy Right_DoubleQuote
|
254 |
sq_string: (
|
255 |
Left_SingleQuote | Left_RSingleQuote | Left_DollarSingleQuote |
|
256 |
Left_TSingleQuote | Left_RTSingleQuote | Left_DollarTSingleQuote
|
257 |
) Expr_CastedDummy Right_SingleQuote
|
258 |
|
259 |
braced_var_sub: '${' Expr_CastedDummy Right_DollarBrace
|
260 |
|
261 |
simple_var_sub: (
|
262 |
# This is everything in Kind.VSub except VSub_Name, which is braced: ${foo}
|
263 |
#
|
264 |
# Note: we could allow $foo and $0, but disallow the rest in favor of ${@}
|
265 |
# and ${-}? Meh it's too inconsistent.
|
266 |
VSub_DollarName | VSub_Number
|
267 |
| VSub_Bang | VSub_At | VSub_Pound | VSub_Dollar | VSub_Star | VSub_Hyphen
|
268 |
| VSub_QMark
|
269 |
# NOTE: $? should be STATUS because it's an integer.
|
270 |
)
|
271 |
|
272 |
#
|
273 |
# Assignment / Type Variables
|
274 |
#
|
275 |
# Several differences vs. Python:
|
276 |
#
|
277 |
# - no yield expression on RHS
|
278 |
# - no star expressions on either side (Python 3) *x, y = 2, *b
|
279 |
# - no multiple assignments like: var x = y = 3
|
280 |
# - type annotation syntax is more restrictive # a: (1+2) = 3 is OK in python
|
281 |
# - We're validating the lvalue here, instead of doing it in the "transformer".
|
282 |
# We have the 'var' prefix which helps.
|
283 |
|
284 |
# name_type use cases:
|
285 |
# for x Int, y Int
|
286 |
# [x for x Int, y Int in ...]
|
287 |
# var x Int, y Int = 3, 5
|
288 |
# func(x Int, y Int)
|
289 |
name_type: Expr_Name [type_expr]
|
290 |
name_type_list: name_type (',' name_type)*
|
291 |
|
292 |
type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
|
293 |
|
294 |
# NOTE: Eof_RParen and Eof_Backtick aren't allowed because we don't want 'var'
|
295 |
# in command subs.
|
296 |
end_stmt: '}' | ';' | Op_Newline | Eof_Real
|
297 |
|
298 |
oil_var_decl: name_type_list '=' testlist end_stmt
|
299 |
|
300 |
# Note: this production is more precise as the following, but it's ambiguous :(
|
301 |
#oil_setvar: place augassign testlist end_stmt
|
302 |
# | place_list '=' testlist end_stmt
|
303 |
|
304 |
# Note: for Oil (not Tea), we could accept [':'] expr for setvar :out = 'foo'
|
305 |
place_list: expr (',' expr)*
|
306 |
oil_place_mutation: place_list (augassign | '=') testlist end_stmt
|
307 |
|
308 |
# For $stringfunc(x, y=1) and @arrayfunc(a, b='s')
|
309 |
oil_arglist: '(' [arglist] ')'
|
310 |
|
311 |
#
|
312 |
# Other Entry Points
|
313 |
#
|
314 |
|
315 |
# for if (x > 0) etc.
|
316 |
oil_expr: '(' testlist ')'
|
317 |
|
318 |
# Example: for (a Int, b Int in expr) { ... }
|
319 |
oil_for: '(' name_type_list 'in' testlist ')'
|
320 |
|
321 |
# e.g. return 1 + 2 * 3
|
322 |
command_expr: testlist end_stmt
|
323 |
|
324 |
# $[d->key] etc.
|
325 |
oil_expr_sub: testlist ']'
|
326 |
|
327 |
# Signatures for proc and func.
|
328 |
|
329 |
# Note: 'proc name-with-hyphens' is allowed, so we can't parse the name in
|
330 |
# expression mode.
|
331 |
ysh_proc: (
|
332 |
[ '('
|
333 |
[ param_group ] # word params, with defaults
|
334 |
[ ';' [ param_group ] ] # positional typed params, with defaults
|
335 |
[ ';' [ param_group ] ] # named params, with defaults
|
336 |
[ ';' [ param_group ] ] # optional block param, with no type or default
|
337 |
|
338 |
# This causes a pgen2 error? It doesn't know which branch to take
|
339 |
# So we have the extra {block} syntax
|
340 |
#[ ';' Expr_Name ] # optional block param, with no type or default
|
341 |
')'
|
342 |
]
|
343 |
'{' # opening { for pgen2
|
344 |
)
|
345 |
|
346 |
# YSH entry point
|
347 |
ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
|
348 |
|
349 |
param: Expr_Name [type_expr] ['=' expr]
|
350 |
|
351 |
# This is an awkward way of writing that '...' has to come last.
|
352 |
param_group: (
|
353 |
(param ',')*
|
354 |
[ (param | '...' Expr_Name) [','] ]
|
355 |
)
|
356 |
|
357 |
type_expr_list: type_expr (',' type_expr)*
|
358 |
|
359 |
# Note: It may make sense to have ; here, for named params only!
|
360 |
data_params: (param ',')* [ param [','] ]
|
361 |
|
362 |
# zero params allowed for consistency with func and class?
|
363 |
tea_data: Expr_Name '(' [data_params] ')'
|
364 |
|
365 |
# e.g. Nullary %Token or Nullary(x Int)
|
366 |
variant_type: Expr_Symbol | '(' data_params ')'
|
367 |
variant: Expr_Name [ variant_type ]
|
368 |
|
369 |
# for dict, tea_enum
|
370 |
comma_newline: ',' [Op_Newline] | Op_Newline
|
371 |
# for tea_func, tea_class
|
372 |
semi_newline: ';' [Op_Newline] | Op_Newline
|
373 |
|
374 |
#
|
375 |
# Experimental "Tea" stuff
|
376 |
#
|
377 |
|
378 |
tea_enum: (
|
379 |
Expr_Name '{' [Op_Newline]
|
380 |
# note: braces can be empty
|
381 |
[ variant (comma_newline variant)* [comma_newline] ]
|
382 |
'}'
|
383 |
)
|
384 |
|
385 |
suite: '{' [Op_Newline] [func_items] '}'
|
386 |
|
387 |
func_item: (
|
388 |
('var' | 'const') name_type_list '=' testlist # oil_var_decl
|
389 |
|
390 |
# TODO: if/switch, with, try/except/throw, etc.
|
391 |
| 'while' test suite
|
392 |
| 'for' name_type_list 'in' test suite
|
393 |
|
394 |
# In Python, imports, assert, etc. also at this 'small_stmt' level
|
395 |
| 'break' | 'continue' | 'return' [testlist]
|
396 |
|
397 |
# TODO: accept setvar for consistency with Oil?
|
398 |
| 'set' place_list (augassign | '=') testlist # oil_place_mutation
|
399 |
# x f(x) etc.
|
400 |
#
|
401 |
# And x = 1. Python uses the same "hack" to fit within pgen2. It also
|
402 |
# supports a = b = 1, which we don't want.
|
403 |
#
|
404 |
# And echo 'hi' 'there'
|
405 |
#
|
406 |
# TODO: expr_to_ast needs to validate this
|
407 |
| testlist (['=' testlist] | tea_word*)
|
408 |
)
|
409 |
|
410 |
# we want to avoid requiring newline or ; before }
|
411 |
func_items: func_item (semi_newline func_item)* [semi_newline]
|
412 |
|
413 |
# This is anonymous
|
414 |
tea_func: (
|
415 |
'(' [param_group] [';' param_group] ')' [type_expr_list]
|
416 |
suite
|
417 |
)
|
418 |
named_func: Expr_Name tea_func
|
419 |
|
420 |
# TODO: Methods differ from functions:
|
421 |
# super() can be the first arg
|
422 |
# shortcut initializer: Parser(this.lexer) { }
|
423 |
# abstract, override, virtual
|
424 |
# should we allow annotations, like 'public' or 'export'?
|
425 |
#
|
426 |
# No field initializers for now. Later C++ versions allow it.
|
427 |
#
|
428 |
# Annotations:
|
429 |
#
|
430 |
# func Parse() Int
|
431 |
# [override const abstract] {
|
432 |
# } ?
|
433 |
|
434 |
class_item: (
|
435 |
('virtual' | 'override' | 'func' | 'abstract' ) Expr_Name tea_func
|
436 |
# Member declaration
|
437 |
| 'var' name_type_list
|
438 |
)
|
439 |
|
440 |
# Note: we could restrict separators to newlines.
|
441 |
# But then you couldn't do class Foo { var a; var b }
|
442 |
class_items: class_item (semi_newline class_item)* [semi_newline]
|
443 |
|
444 |
tea_class: Expr_Name [':' Expr_Name ] '{' [Op_Newline] [class_items] '}'
|
445 |
|
446 |
# 'import' can't use 'semi_newline' because ending with an unknown number of
|
447 |
# tokens doesn't compose with our CommandParser.
|
448 |
end_import: ';' | Op_Newline
|
449 |
|
450 |
import_name: Expr_Name ['as' Expr_Name]
|
451 |
import_names: import_name (comma_newline import_name)* [import_name]
|
452 |
|
453 |
# TODO: Should we have a simpler Oil string literal?
|
454 |
tea_import: sq_string [ 'as' Expr_Name ] ['(' [Op_Newline] [import_names] ')'] end_import
|
455 |
|
456 |
# Top level:
|
457 |
# declarations of constants -- with const only?
|
458 |
# maybe only const?
|
459 |
# use, data, enum, class, func. That's it? OK.
|
460 |
|
461 |
end_outer: ';' [Op_Newline] | Op_Newline | Eof_Real
|
462 |
|
463 |
module_item: (
|
464 |
# oil_var_decl, but no mutation
|
465 |
('var' | 'const') name_type_list '=' testlist end_outer
|
466 |
| 'import' tea_import # TODO: needs Eof_Real
|
467 |
# Also 'export'
|
468 |
| 'class' tea_class end_outer
|
469 |
| 'data' tea_data end_outer
|
470 |
| 'enum' tea_enum end_outer
|
471 |
| 'func' Expr_Name tea_func end_outer
|
472 |
|
473 |
# Might need: typedef? Or typealias?
|
474 |
)
|
475 |
|
476 |
# Eof_Real either after newline or before newline are both valid
|
477 |
tea_module: [Op_Newline] module_item* [Eof_Real]
|
478 |
|
479 |
|
480 |
#
|
481 |
# Regex Sublanguage
|
482 |
#
|
483 |
|
484 |
char_literal: Char_OneChar | Char_Hex | Char_UBraced
|
485 |
|
486 |
# we allow a-z A-Z 0-9 as ranges, but otherwise they have to be quoted
|
487 |
# The parser enforces that they are single strings
|
488 |
range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
|
489 |
|
490 |
# digit or a-z
|
491 |
# We have to do further validation of ranges later.
|
492 |
class_literal_term: (
|
493 |
# NOTE: range_char has sq_string
|
494 |
range_char ['-' range_char ]
|
495 |
# splice a literal set of characters
|
496 |
| '@' Expr_Name
|
497 |
| '!' Expr_Name
|
498 |
# Reserved for [[.collating sequences.]] (Unicode)
|
499 |
| '.' Expr_Name
|
500 |
# Reserved for [[=character equivalents=]] (Unicode)
|
501 |
| '=' Expr_Name
|
502 |
# TODO: Do these char classes actually work in bash/awk/egrep/sed/etc.?
|
503 |
|
504 |
)
|
505 |
class_literal: '[' class_literal_term+ ']'
|
506 |
|
507 |
# NOTE: Here is an example of where you can put ^ in the middle of a pattern in
|
508 |
# Python, and it matters!
|
509 |
# >>> r = re.compile('.f[a-z]*', re.DOTALL|re.MULTILINE)
|
510 |
# >>> r.findall('z\nfoo\nbeef\nfood\n')
|
511 |
# ['\nfoo', 'ef', '\nfood']
|
512 |
# >>> r = re.compile('.^f[a-z]*', re.DOTALL|re.MULTILINE)
|
513 |
# r.findall('z\nfoo\nbeef\nfood\n')
|
514 |
# ['\nfoo', '\nfood']
|
515 |
|
516 |
re_atom: (
|
517 |
char_literal
|
518 |
# builtin regex like 'digit' or a regex reference like 'D'
|
519 |
| Expr_Name
|
520 |
# %begin or %end
|
521 |
| Expr_Symbol
|
522 |
| class_literal
|
523 |
# !digit or ![a-f]. Note ! %boundary could be \B in Python, but ERE
|
524 |
# doesn't have anything like that
|
525 |
| '!' (Expr_Name | class_literal)
|
526 |
|
527 |
# syntactic space for Perl-style backtracking
|
528 |
# !!REF 1 !!REF name
|
529 |
# !!AHEAD(d+) !!BEHIND(d+) !!NOT_AHEAD(d+) !!NOT_BEHIND(d+)
|
530 |
#
|
531 |
# Note: !! conflicts with history
|
532 |
| '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
|
533 |
|
534 |
# Splice another expression
|
535 |
| '@' Expr_Name
|
536 |
# any %start %end are preferred
|
537 |
| '.' | '^' | '$'
|
538 |
# In a language-independent spec, backslashes are disallowed within 'sq'.
|
539 |
# Write it with char literals outside strings: 'foo' \\ 'bar' \n
|
540 |
#
|
541 |
# No double-quoted strings because you can write "x = $x" with 'x = ' @x
|
542 |
| sq_string
|
543 |
|
544 |
# grouping (non-capturing in Perl; capturing in ERE although < > is preferred)
|
545 |
| '(' regex ')'
|
546 |
# capturing group, with optional name
|
547 |
| '<' regex [':' name_type] '>'
|
548 |
|
549 |
# Might want this obscure conditional construct. Can't use C-style ternary
|
550 |
# because '?' is a regex operator.
|
551 |
#| '{' regex 'if' regex 'else' regex '}'
|
552 |
|
553 |
# Others:
|
554 |
# PCRE has (?R ) for recursion? That could be !RECURSE()
|
555 |
# Note: .NET has && in character classes, making it a recursive language
|
556 |
)
|
557 |
|
558 |
# e.g. a{3} a{3,4} a{3,} a{,4} but not a{,}
|
559 |
repeat_range: (
|
560 |
Expr_DecInt [',']
|
561 |
| ',' Expr_DecInt
|
562 |
| Expr_DecInt ',' Expr_DecInt
|
563 |
)
|
564 |
|
565 |
repeat_op: (
|
566 |
'+' | '*' | '?'
|
567 |
# In PCRE, ?? *? +? {}? is lazy/nongreedy and ?+ *+ ++ {}+ is "possessive"
|
568 |
# We use N and P modifiers within {}.
|
569 |
# a{L +} a{P ?} a{P 3,4} a{P ,4}
|
570 |
| '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
|
571 |
)
|
572 |
|
573 |
re_alt: (re_atom [repeat_op])+
|
574 |
|
575 |
regex: [re_alt] (('|'|'or') re_alt)*
|
576 |
|
577 |
# Patterns are the start of a case arm. Ie,
|
578 |
#
|
579 |
# case (foo) {
|
580 |
# (40 + 2) | (0) { echo number }
|
581 |
# ^^^^^^^^^^^^^^-- This is pattern
|
582 |
# }
|
583 |
#
|
584 |
# Due to limitations created from pgen2/cmd_parser interactions, we also parse
|
585 |
# the leading '{' token of the case arm body in pgen2. We do this to help pgen2
|
586 |
# figure out when to transfer control back to the cmd_parser. For more details
|
587 |
# see #oil-dev > Dev Friction / Smells.
|
588 |
#
|
589 |
# case (foo) {
|
590 |
# (40 + 2) | (0) { echo number }
|
591 |
# ^-- End of pattern/beginning of case arm body
|
592 |
# }
|
593 |
|
594 |
ysh_case_pat: (
|
595 |
'(' (pat_else | pat_exprs)
|
596 |
| pat_eggex
|
597 |
) [Op_Newline] '{'
|
598 |
|
599 |
pat_else: 'else' ')'
|
600 |
pat_exprs: expr ')' [Op_Newline] ('|' [Op_Newline] '(' expr ')' [Op_Newline])*
|
601 |
pat_eggex: '/' regex [re_flags] '/'
|
602 |
|
603 |
# e.g. /digit+ ; multiline !ignorecase/
|
604 |
#
|
605 |
# This can express translation preferences:
|
606 |
#
|
607 |
# / d+ ; %ERE / is '[[:digit:]]+'
|
608 |
# / d+ ; %python / is '\d+'
|
609 |
# / d+ ; ignorecase %python / is '(?i)\d+'
|
610 |
|
611 |
re_flag: ['!'] Expr_Name | Expr_Symbol
|
612 |
re_flags: ';' re_flag+
|
613 |
|
614 |
# Syntax reserved for PCRE/Python, but that's not in ERE:
|
615 |
#
|
616 |
# nop-greedy a{N *}
|
617 |
# non-capturing :( digit+ )
|
618 |
# backtracking !REF 1 !AHEAD(d+)
|
619 |
#
|
620 |
# Legacy syntax:
|
621 |
#
|
622 |
# ^ and $ instead of %start and %end
|
623 |
# < and > instead of %start_word and %end_word
|
624 |
# . instead of dot
|
625 |
# | instead of 'or'
|