1 """expr_to_ast.py."""
2 from __future__ import print_function
3
4 from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str
5 from _devbuild.gen.syntax_asdl import (
6 Token,
7 loc,
8 loc_t,
9 DoubleQuoted,
10 SingleQuoted,
11 SimpleVarSub,
12 BracedVarSub,
13 CommandSub,
14 ShArrayLiteral,
15 command,
16 command_t,
17 expr,
18 expr_e,
19 expr_t,
20 expr_context_e,
21 re,
22 re_t,
23 re_repeat,
24 re_repeat_t,
25 class_literal_term,
26 class_literal_term_t,
27 PosixClass,
28 PerlClass,
29 NameType,
30 place_expr,
31 place_expr_e,
32 place_expr_t,
33 Comprehension,
34 Subscript,
35 Attribute,
36 proc_sig,
37 proc_sig_t,
38 Param,
39 RestParam,
40 NamedArg,
41 ArgList,
42 Variant,
43 variant_type,
44 variant_type_t,
45 pat,
46 pat_t,
47 TypeExpr,
48 )
49 from _devbuild.gen import grammar_nt
50 from core.error import p_die
51 from frontend import lexer
52 from mycpp import mylib
53 from mycpp.mylib import log
54 from ysh import expr_parse
55
56 from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, cast
57 if TYPE_CHECKING:
58 from pgen2.grammar import Grammar
59 from pgen2.pnode import PNode
60
61 _ = log
62
63 PERL_CLASSES = {
64 'd': 'd',
65 'w': 'w',
66 'word': 'w',
67 's': 's',
68 }
69 # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
70 POSIX_CLASSES = [
71 'alnum',
72 'cntrl',
73 'lower',
74 'space',
75 'alpha',
76 'digit',
77 'print',
78 'upper',
79 'blank',
80 'graph',
81 'punct',
82 'xdigit',
83 ]
84 # NOTE: There are also things like \p{Greek} that we could put in the
85 # "non-sigil" namespace.
86
87 RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"
88
89 # Copied from pgen2/token.py to avoid dependency.
90 NT_OFFSET = 256
91
92
93 if mylib.PYTHON:
94
95 def MakeGrammarNames(oil_grammar):
96 # type: (Grammar) -> Dict[int, str]
97
98 # TODO: Break this dependency
99 from frontend import lexer_def
100
101 names = {}
102
103 #from _devbuild.gen.id_kind_asdl import _Id_str
104 # This is a dictionary
105
106 # _Id_str()
107
108 for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
109 # Hm some are out of range
110 #assert k < 256, (k, id_name)
111
112 # HACK: Cut it off at 256 now! Expr/Arith/Op doesn't go higher than
113 # that. TODO: Change NT_OFFSET? That might affect C code though.
114 # Best to keep everything fed to pgen under 256. This only affects
115 # pretty printing.
116 if k < 256:
117 names[k] = id_name
118
119 for k, v in oil_grammar.number2symbol.items():
120 # eval_input == 256. Remove?
121 assert k >= 256, (k, v)
122 names[k] = v
123
124 return names
125
126
127 def ISNONTERMINAL(x):
128 # type: (int) -> bool
129 return x >= NT_OFFSET
130
131
132 class Transformer(object):
133 """Homogeneous parse tree -> heterogeneous AST ("lossless syntax tree")
134
135 pgen2 (Python's LL parser generator) doesn't have semantic actions like yacc,
136 so this "transformer" is the equivalent.
137
138 Files to refer to when modifying this function:
139
140 ysh/grammar.pgen2 (generates _devbuild/gen/grammar_nt.py)
141 frontend/syntax.asdl (generates _devbuild/gen/syntax_asdl.py)
142
143 Related examples:
144
145 opy/compiler2/transformer.py (Python's parse tree -> AST, ~1500 lines)
146 Python-2.7.13/Python/ast.c (the "real" CPython version, ~3600 lines)
147
148 Other:
149 frontend/parse_lib.py (turn on print_parse_tree)
150
151 Public methods:
152 Expr, VarDecl
153 atom, trailer, etc. are private, named after productions in grammar.pgen2.
154 """
155
156 def __init__(self, gr):
157 # type: (Grammar) -> None
158 self.number2symbol = gr.number2symbol
159 if mylib.PYTHON:
160 names = MakeGrammarNames(gr)
161 # print raw nodes
162 self.p_printer = expr_parse.ParseTreePrinter(names)
163
164 def _LeftAssoc(self, p_node):
165 # type: (PNode) -> expr_t
166 """For an associative binary operation.
167
168 Examples:
169 xor_expr: and_expr ('xor' and_expr)*
170 term: factor (('*'|'/'|'%'|'div') factor)*
171
172 3 - 1 - 2 must be grouped as ((3 - 1) - 2).
173 """
174 # Note: Compare the iteractive com_binary() method in
175 # opy/compiler2/transformer.py.
176
177 # Examples:
178 # - The PNode for '3 - 1' will have 3 children
179 # - The PNode for '3 - 1 - 2' will have 5 children
180
181 #self.p_printer.Print(p_node)
182
183 i = 1 # index of the operator
184 n = p_node.NumChildren()
185
186 left = self.Expr(p_node.GetChild(0))
187 while i < n:
188 op = p_node.GetChild(i)
189 right = self.Expr(p_node.GetChild(i + 1))
190
191 # create a new left node
192 left = expr.Binary(op.tok, left, right)
193 i += 2
194
195 return left
196
197 def _Trailer(self, base, p_trailer):
198 # type: (expr_t, PNode) -> expr_t
199 """
200 Trailer: ( '(' [arglist] ')' | '[' subscriptlist ']'
201 | '.' NAME | '->' NAME | '::' NAME
202 )
203 """
204 op_tok = p_trailer.GetChild(0).tok
205
206 if op_tok.id == Id.Op_LParen:
207 lparen = op_tok
208 rparen = p_trailer.GetChild(-1).tok
209 arglist = ArgList(lparen, [], [], rparen)
210 if p_trailer.NumChildren() == 2: # ()
211 return expr.FuncCall(base, arglist)
212
213 p = p_trailer.GetChild(1) # the X in ( X )
214 assert p.typ == grammar_nt.arglist # f(x, y)
215 self._Arglist(p, arglist)
216 return expr.FuncCall(base, arglist)
217
218 if op_tok.id == Id.Op_LBracket:
219 p_args = p_trailer.GetChild(1)
220 assert p_args.typ == grammar_nt.subscriptlist
221 n = p_args.NumChildren()
222 if n > 1:
223 p_die("Only 1 subscript is accepted", p_args.GetChild(1).tok)
224
225 a = p_args.GetChild(0)
226 return Subscript(base, self._Subscript(a))
227
228 if op_tok.id in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_DColon):
229 attr = p_trailer.GetChild(1).tok # will be Id.Expr_Name
230 return Attribute(base, op_tok, attr, expr_context_e.Store)
231
232 raise AssertionError(Id_str(op_tok.id))
233
234 def _DictPair(self, p_node):
235 # type: (PNode) -> Tuple[expr_t, expr_t]
236 """dict_pair: ( Expr_Name [':' test] |
237
238 '[' testlist ']' ':' test )
239 """
240 assert p_node.typ == grammar_nt.dict_pair
241
242 typ = p_node.GetChild(0).typ
243
244 if ISNONTERMINAL(typ): # for sq_string
245 # Note: Could inline these cases instead of going through self.Expr.
246 if typ == grammar_nt.sq_string:
247 key = self.Expr(p_node.GetChild(0)) # type: expr_t
248 elif typ == grammar_nt.dq_string:
249 key = self.Expr(p_node.GetChild(0))
250
251 value = self.Expr(p_node.GetChild(2))
252 return key, value
253
254 tok0 = p_node.GetChild(0).tok
255 id_ = tok0.id
256
257 if id_ == Id.Expr_Name:
258 key = expr.Const(tok0)
259 if p_node.NumChildren() >= 3:
260 value = self.Expr(p_node.GetChild(2))
261 else:
262 value = expr.Implicit
263
264 if id_ == Id.Op_LBracket: # {[x+y]: 'val'}
265 key = self.Expr(p_node.GetChild(1))
266 value = self.Expr(p_node.GetChild(4))
267 return key, value
268
269 return key, value
270
271 def _Dict(self, p_node):
272 # type: (PNode) -> expr.Dict
273 """Parse tree to LST
274
275 dict: dict_pair (',' dict_pair)* [',']
276 dict2: dict_pair (comma_newline dict_pair)* [comma_newline]
277 """
278 if not ISNONTERMINAL(p_node.typ):
279 assert p_node.tok.id == Id.Op_RBrace
280 return expr.Dict([], [])
281
282 #assert p_node.typ == grammar_nt.dict
283
284 keys = [] # type: List[expr_t]
285 values = [] # type: List[expr_t]
286
287 n = p_node.NumChildren()
288 for i in xrange(0, n, 2):
289 key, value = self._DictPair(p_node.GetChild(i))
290 keys.append(key)
291 values.append(value)
292
293 return expr.Dict(keys, values)
294
295 def _Tuple(self, parent):
296 # type: (PNode) -> expr_t
297
298 n = parent.NumChildren()
299
300 # (x) -- not a tuple
301 if n == 1:
302 return self.Expr(parent.GetChild(0))
303
304 # x, and (x,) aren't allowed
305 if n == 2:
306 p_die('Invalid trailing comma', parent.GetChild(1).tok)
307
308 elts = [] # type: List[expr_t]
309 for i in xrange(0, n, 2): # skip commas
310 p_node = parent.GetChild(i)
311 elts.append(self.Expr(p_node))
312
313 return expr.Tuple(elts, expr_context_e.Store) # unused expr_context_e
314
315 def _TestlistComp(self, p_node, id0):
316 # type: (PNode, Id_t) -> expr_t
317 """Parse tree to LST
318
319 testlist_comp:
320 (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
321 """
322 assert p_node.typ == grammar_nt.testlist_comp
323 n = p_node.NumChildren()
324 if n > 1 and p_node.GetChild(1).typ == grammar_nt.comp_for:
325 elt = self.Expr(p_node.GetChild(0))
326 comp = self._CompFor(p_node.GetChild(1))
327 if id0 == Id.Op_LParen:
328 return expr.GeneratorExp(elt, [comp])
329 if id0 == Id.Op_LBracket:
330 return expr.ListComp(elt, [comp])
331 raise AssertionError()
332
333 if id0 == Id.Op_LParen:
334 if n == 1: # parenthesized expression like (x+1) or (x)
335 return self.Expr(p_node.GetChild(0))
336
337 # (1,) (1, 2) etc.
338 if p_node.GetChild(1).tok.id == Id.Arith_Comma:
339 return self._Tuple(p_node)
340
341 raise NotImplementedError('testlist_comp')
342
343 if id0 == Id.Op_LBracket:
344 elts = [] # type: List[expr_t]
345 for i in xrange(0, n, 2): # skip commas
346 elts.append(self.Expr(p_node.GetChild(i)))
347
348 return expr.List(elts,
349 expr_context_e.Store) # unused expr_context_e
350
351 raise AssertionError(Id_str(id0))
352
353 def _Atom(self, parent):
354 # type: (PNode) -> expr_t
355 """Handle alternatives of 'atom' where there's more than one child."""
356
357 tok = parent.GetChild(0).tok
358 id_ = tok.id
359 n = parent.NumChildren()
360
361 if id_ == Id.Op_LParen:
362 # atom: '(' [yield_expr|testlist_comp] ')' | ...
363 if n == 2: # () is a tuple
364 assert parent.GetChild(
365 1).tok.id == Id.Op_RParen, parent.GetChild(1)
366 return expr.Tuple([], expr_context_e.Store)
367
368 return self._TestlistComp(parent.GetChild(1), id_)
369
370 if id_ == Id.Op_LBracket:
371 # atom: ... | '[' [testlist_comp] ']' | ...
372
373 if n == 2: # []
374 assert parent.GetChild(
375 1).tok.id == Id.Op_RBracket, parent.GetChild(1)
376 return expr.List([],
377 expr_context_e.Store) # unused expr_context_e
378
379 return self._TestlistComp(parent.GetChild(1), id_)
380
381 if id_ == Id.Op_LBrace:
382 # atom: ... | '{' [Op_Newline] [dict] '}'
383 i = 1
384 if parent.GetChild(i).tok.id == Id.Op_Newline:
385 i += 1
386 return self._Dict(parent.GetChild(i))
387
388 if id_ == Id.Arith_Slash:
389 r = self._Regex(parent.GetChild(1))
390 flags = [] # type: List[Token]
391 # TODO: Parse translation preference.
392 trans_pref = None # type: Token
393 return expr.RegexLiteral(
394 parent.GetChild(0).tok, r, flags, trans_pref)
395
396 if id_ == Id.Expr_Func:
397 # STUB. This should really be a Func, not Lambda.
398 return expr.Lambda([], expr.Implicit)
399
400 raise NotImplementedError(Id_str(id_))
401
402 def _NameTypeList(self, p_node):
403 # type: (PNode) -> List[NameType]
404 """name_type_list: name_type (',' name_type)*"""
405 assert p_node.typ == grammar_nt.name_type_list
406 results = [] # type: List[NameType]
407
408 n = p_node.NumChildren()
409 for i in xrange(0, n, 2): # was children[::2]
410 p = p_node.GetChild(i)
411
412 if p.NumChildren() == 2:
413 typ = self._TypeExpr(p.GetChild(1))
414 else:
415 typ = None
416
417 node = NameType(p.GetChild(0).tok, typ)
418 results.append(node)
419 return results
420
421 def _CompFor(self, p_node):
422 # type: (PNode) -> Comprehension
423 """comp_for: 'for' exprlist 'in' or_test ['if' or_test]"""
424 lhs = self._NameTypeList(p_node.GetChild(1)) # Python calls this target
425 iterable = self.Expr(p_node.GetChild(3))
426
427 if p_node.NumChildren() >= 6:
428 cond = self.Expr(p_node.GetChild(5))
429 else:
430 cond = None
431
432 return Comprehension(lhs, iterable, cond)
433
434 def _CompareChain(self, parent):
435 # type: (PNode) -> expr_t
436 """comparison: expr (comp_op expr)*"""
437 cmp_ops = [] # type: List[Token]
438 comparators = [] # type: List[expr_t]
439 left = self.Expr(parent.GetChild(0))
440
441 i = 1
442 n = parent.NumChildren()
443 while i < n:
444 p = parent.GetChild(i)
445 op = p.GetChild(0).tok
446 if p.NumChildren() == 2:
447 # Blame the first token, and change its type
448 if op.id == Id.Expr_Not: # not in
449 op.id = Id.Node_NotIn
450 elif op.id == Id.Expr_Is: # is not
451 op.id = Id.Node_IsNot
452 else:
453 raise AssertionError()
454 else:
455 # is, <, ==, etc.
456 pass
457
458 cmp_ops.append(op)
459 i += 1
460 comparators.append(self.Expr(parent.GetChild(i)))
461 i += 1
462 return expr.Compare(left, cmp_ops, comparators)
463
464 def _Subscript(self, parent):
465 # type: (PNode) -> expr_t
466 """subscript: expr | [expr] ':' [expr]"""
467 typ0 = parent.GetChild(0).typ
468
469 n = parent.NumChildren()
470
471 if ISNONTERMINAL(typ0):
472 if n == 3: # a[1:2]
473 lower = self.Expr(parent.GetChild(0))
474 upper = self.Expr(parent.GetChild(2))
475 elif n == 2: # a[1:]
476 lower = self.Expr(parent.GetChild(0))
477 upper = None
478 else: # a[1]
479 return self.Expr(parent.GetChild(0))
480 else:
481 assert parent.GetChild(0).tok.id == Id.Arith_Colon
482 lower = None
483 if n == 1: # a[:]
484 upper = None
485 else: # a[:3]
486 upper = self.Expr(parent.GetChild(1))
487 return expr.Slice(lower, upper)
488
489 def Expr(self, pnode):
490 # type: (PNode) -> expr_t
491 """Transform expressions (as opposed to statements)."""
492 typ = pnode.typ
493 tok = pnode.tok
494
495 if ISNONTERMINAL(typ):
496
497 #
498 # Oil Entry Points / Additions
499 #
500
501 if typ == grammar_nt.oil_expr: # for if/while
502 # oil_expr: '(' testlist ')'
503 return self.Expr(pnode.GetChild(1))
504
505 if typ == grammar_nt.command_expr:
506 # return_expr: testlist end_stmt
507 return self.Expr(pnode.GetChild(0))
508
509 #
510 # Python-like Expressions / Operators
511 #
512
513 if typ == grammar_nt.atom:
514 if pnode.NumChildren() == 1:
515 return self.Expr(pnode.GetChild(0))
516 return self._Atom(pnode)
517
518 if typ == grammar_nt.testlist:
519 # testlist: test (',' test)* [',']
520 return self._Tuple(pnode)
521
522 if typ == grammar_nt.test:
523 # test: or_test ['if' or_test 'else' test] | lambdef
524 if pnode.NumChildren() == 1:
525 return self.Expr(pnode.GetChild(0))
526
527 # TODO: Handle lambdef
528
529 test = self.Expr(pnode.GetChild(2))
530 body = self.Expr(pnode.GetChild(0))
531 orelse = self.Expr(pnode.GetChild(4))
532 return expr.IfExp(test, body, orelse)
533
534 if typ == grammar_nt.lambdef:
535 # lambdef: '|' [name_type_list] '|' test
536
537 n = pnode.NumChildren()
538 if n == 4:
539 params = self._NameTypeList(pnode.GetChild(1))
540 else:
541 params = []
542
543 body = self.Expr(pnode.GetChild(n - 1))
544 return expr.Lambda(params, body)
545
546 #
547 # Operators with Precedence
548 #
549
550 if typ == grammar_nt.or_test:
551 # or_test: and_test ('or' and_test)*
552 return self._LeftAssoc(pnode)
553
554 if typ == grammar_nt.and_test:
555 # and_test: not_test ('and' not_test)*
556 return self._LeftAssoc(pnode)
557
558 if typ == grammar_nt.not_test:
559 # not_test: 'not' not_test | comparison
560 if pnode.NumChildren() == 1:
561 return self.Expr(pnode.GetChild(0))
562
563 op_tok = pnode.GetChild(0).tok # not
564 return expr.Unary(op_tok, self.Expr(pnode.GetChild(1)))
565
566 elif typ == grammar_nt.comparison:
567 if pnode.NumChildren() == 1:
568 return self.Expr(pnode.GetChild(0))
569
570 return self._CompareChain(pnode)
571
572 elif typ == grammar_nt.range_expr:
573 n = pnode.NumChildren()
574 if n == 1:
575 return self.Expr(pnode.GetChild(0))
576
577 if n == 3:
578 return expr.Range(self.Expr(pnode.GetChild(0)),
579 self.Expr(pnode.GetChild(2)))
580
581 raise AssertionError(n)
582
583 elif typ == grammar_nt.expr:
584 # expr: xor_expr ('|' xor_expr)*
585 return self._LeftAssoc(pnode)
586
587 if typ == grammar_nt.xor_expr:
588 # xor_expr: and_expr ('xor' and_expr)*
589 return self._LeftAssoc(pnode)
590
591 if typ == grammar_nt.and_expr: # a & b
592 # and_expr: shift_expr ('&' shift_expr)*
593 return self._LeftAssoc(pnode)
594
595 elif typ == grammar_nt.shift_expr:
596 # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
597 return self._LeftAssoc(pnode)
598
599 elif typ == grammar_nt.arith_expr:
600 # arith_expr: term (('+'|'-') term)*
601 return self._LeftAssoc(pnode)
602
603 elif typ == grammar_nt.term:
604 # term: factor (('*'|'/'|'div'|'mod') factor)*
605 return self._LeftAssoc(pnode)
606
607 elif typ == grammar_nt.factor:
608 # factor: ('+'|'-'|'~') factor | power
609 # the power would have already been reduced
610 if pnode.NumChildren() == 1:
611 return self.Expr(pnode.GetChild(0))
612
613 assert pnode.NumChildren() == 2
614 op = pnode.GetChild(0)
615 e = pnode.GetChild(1)
616
617 assert isinstance(op.tok, Token)
618 return expr.Unary(op.tok, self.Expr(e))
619
620 elif typ == grammar_nt.power:
621 # power: atom trailer* ['**' factor]
622
623 node = self.Expr(pnode.GetChild(0))
624 if pnode.NumChildren() == 1: # No trailers
625 return node
626
627 # Support a->startswith(b) and mydict.key
628 n = pnode.NumChildren()
629 i = 1
630 while i < n and ISNONTERMINAL(pnode.GetChild(i).typ):
631 node = self._Trailer(node, pnode.GetChild(i))
632 i += 1
633
634 if i != n: # ['**' factor]
635 op_tok = pnode.GetChild(i).tok
636 assert op_tok.id == Id.Arith_DStar, op_tok
637 factor = self.Expr(pnode.GetChild(i + 1))
638 node = expr.Binary(op_tok, node, factor)
639
640 return node
641
642 elif typ == grammar_nt.oil_expr_sub:
643 return self.Expr(pnode.GetChild(0))
644
645 #
646 # Oil Lexer Modes
647 #
648
649 elif typ == grammar_nt.sh_array_literal:
650 return cast(ShArrayLiteral, pnode.GetChild(1).tok)
651
652 elif typ == grammar_nt.old_sh_array_literal:
653 return cast(ShArrayLiteral, pnode.GetChild(1).tok)
654
655 elif typ == grammar_nt.sh_command_sub:
656 return cast(CommandSub, pnode.GetChild(1).tok)
657
658 elif typ == grammar_nt.braced_var_sub:
659 return cast(BracedVarSub, pnode.GetChild(1).tok)
660
661 elif typ == grammar_nt.dq_string:
662 return cast(DoubleQuoted, pnode.GetChild(1).tok)
663
664 elif typ == grammar_nt.sq_string:
665 return cast(SingleQuoted, pnode.GetChild(1).tok)
666
667 elif typ == grammar_nt.simple_var_sub:
668 tok = pnode.GetChild(0).tok
669
670 if tok.id == Id.VSub_DollarName: # $foo is disallowed
671 bare = tok.tval[1:]
672 p_die(
673 'In expressions, remove $ and use `%s`, or sometimes "$%s"'
674 % (bare, bare), tok)
675
676 # $? is allowed
677 return SimpleVarSub(tok, lexer.TokenSliceLeft(tok, 1))
678
679 else:
680 nt_name = self.number2symbol[typ]
681 raise AssertionError("PNode type %d (%s) wasn't handled" %
682 (typ, nt_name))
683
684 else: # Terminals should have a token
685 id_ = tok.id
686
687 if id_ == Id.Expr_Name:
688 return expr.Var(tok)
689
690 if id_ in (Id.Expr_DecInt, Id.Expr_BinInt, Id.Expr_OctInt,
691 Id.Expr_HexInt, Id.Expr_Float):
692 return expr.Const(tok)
693
694 if id_ in (Id.Expr_Null, Id.Expr_True, Id.Expr_False,
695 Id.Char_OneChar, Id.Char_UBraced, Id.Char_Pound):
696 return expr.Const(tok)
697
698 raise NotImplementedError(Id_str(id_))
699
700 def _ArrayItem(self, p_node):
701 # type: (PNode) -> expr_t
702 assert p_node.typ == grammar_nt.array_item
703
704 child0 = p_node.GetChild(0)
705 typ0 = child0.typ
706 if ISNONTERMINAL(typ0):
707 return self.Expr(child0)
708 else:
709 if child0.tok.id == Id.Op_LParen: # (x+1)
710 return self.Expr(p_node.GetChild(1))
711 return self.Expr(child0) # $1 ${x} etc.
712
713 def _PlaceList(self, p_node):
714 # type: (PNode) -> List[place_expr_t]
715 """place_list: expr (',' expr)*"""
716 assert p_node.typ == grammar_nt.place_list
717 places = [] # type: List[place_expr_t]
718 n = p_node.NumChildren()
719 for i in xrange(0, n, 2): # was children[::2]
720 p = p_node.GetChild(i)
721 e = self.Expr(p)
722 UP_e = e
723 tag = e.tag()
724 if tag == expr_e.Var: # COMPATIBILITY hack
725 e = cast(expr.Var, UP_e)
726 places.append(place_expr.Var(e.name))
727 elif tag in (place_expr_e.Var, place_expr_e.Subscript,
728 place_expr_e.Attribute):
729 places.append(cast(place_expr_t, UP_e))
730 else:
731 # This blame mechanism seems to work. Otherwise we don't have a method
732 # to blame an arbitrary expr_t.
733 blame = cast(loc_t,
734 p.tok) if p.tok else loc.Missing # type: loc_t
735 p_die("Can't assign to this expression", blame)
736 return places
737
738 def MakeVarDecl(self, p_node):
739 # type: (PNode) -> command.VarDecl
740 """oil_var_decl: name_type_list '=' testlist end_stmt."""
741 typ = p_node.typ
742 assert typ == grammar_nt.oil_var_decl
743
744 #log('len(children) = %d', len(children))
745 lhs = self._NameTypeList(p_node.GetChild(0)) # could be a tuple
746 # This syntax is confusing, and different than JavaScript
747 # var x, y = 1, 2
748 if len(lhs) > 1:
749 eq_tok = p_node.GetChild(1).tok
750 p_die('Only one variable can be initialized', eq_tok)
751
752 rhs = self.Expr(p_node.GetChild(2))
753
754 # The caller should fill in the keyword token.
755 return command.VarDecl(None, lhs, rhs)
756
757 def MakePlaceMutation(self, p_node):
758 # type: (PNode) -> command.PlaceMutation
759 """Parse tree to LST
760
761 oil_place_mutation: place_list (augassign | '=') testlist end_stmt
762 """
763 typ = p_node.typ
764 assert typ == grammar_nt.oil_place_mutation
765
766 place_list = self._PlaceList(p_node.GetChild(0)) # could be a tuple
767 op_tok = p_node.GetChild(1).tok
768 if len(place_list) > 1 and op_tok.id != Id.Arith_Equal:
769 p_die('Multiple assignment must use =', op_tok)
770 rhs = self.Expr(p_node.GetChild(2))
771 return command.PlaceMutation(None, place_list, op_tok, rhs)
772
773 def OilForExpr(self, pnode):
774 # type: (PNode) -> Tuple[List[NameType], expr_t]
775 typ = pnode.typ
776
777 if typ == grammar_nt.oil_for:
778 # oil_for: '(' lvalue_list 'in' testlist ')'
779 lhs = self._NameTypeList(pnode.GetChild(1)) # could be a tuple
780 iterable = self.Expr(pnode.GetChild(3))
781 return lhs, iterable
782
783 nt_name = self.number2symbol[typ]
784 raise AssertionError("PNode type %d (%s) wasn't handled" %
785 (typ, nt_name))
786
787 def YshCasePattern(self, pnode):
788 # type: (PNode) -> pat_t
789 assert pnode.typ == grammar_nt.ysh_case_pat, pnode
790
791 pattern = pnode.GetChild(0)
792 typ = pattern.typ
793 if typ == Id.Op_LParen:
794 # pat_expr or pat_else
795 pattern = pnode.GetChild(1)
796 typ = pattern.typ
797
798 if typ == grammar_nt.pat_else:
799 return pat.Else
800 elif typ == grammar_nt.pat_exprs:
801 exprs = [] # type: List[expr_t]
802 for i in xrange(pattern.NumChildren()):
803 child = pattern.GetChild(i)
804 if child.typ == grammar_nt.expr:
805 expr = self.Expr(child)
806 exprs.append(expr)
807 return pat.YshExprs(exprs)
808
809 elif typ == grammar_nt.pat_eggex:
810 # pat_eggex
811 re = self._Regex(pattern.GetChild(1))
812 return pat.Eggex(re)
813
814 raise NotImplementedError()
815
816 def _Argument(self, p_node, do_named, arglist):
817 # type: (PNode, bool, ArgList) -> None
818 """Parse tree to LST
819
820 argument: (
821 test [comp_for]
822 | test '=' test # named arg
823 | '...' test # var args
824 )
825 """
826 pos_args = arglist.pos_args
827 named_args = arglist.named_args
828
829 assert p_node.typ == grammar_nt.argument, p_node
830 n = p_node.NumChildren()
831 if n == 1:
832 arg = self.Expr(p_node.GetChild(0))
833 pos_args.append(arg)
834 return
835
836 if n == 2:
837 # Note: We allow multiple spreads, just like Julia. They are
838 # concatenated as in lists and dicts.
839 if p_node.GetChild(0).tok.id == Id.Expr_Ellipsis:
840 spread_expr = self.Expr(p_node.GetChild(1))
841 if do_named:
842 # Implicit spread with name = None
843 named_args.append(NamedArg(None, spread_expr))
844 else:
845 pos_args.append(
846 expr.Spread(spread_expr, expr_context_e.Store))
847 return
848
849 if p_node.GetChild(1).typ == grammar_nt.comp_for:
850 elt = self.Expr(p_node.GetChild(0))
851 comp = self._CompFor(p_node.GetChild(1))
852 arg = expr.GeneratorExp(elt, [comp])
853 pos_args.append(arg)
854 return
855
856 raise AssertionError()
857
858 if n == 3:
859 n1 = NamedArg(p_node.GetChild(0).tok, self.Expr(p_node.GetChild(2)))
860 named_args.append(n1)
861 return
862
863 raise NotImplementedError()
864
865 def _Arglist(self, parent, arglist):
866 # type: (PNode, ArgList) -> None
867 """Parse tree to LST
868
869 arglist:
870 argument (',' argument)* [',']
871 [';' argument (',' argument)* [','] ]
872 """
873 do_named = False
874 for i in xrange(parent.NumChildren()):
875 p_child = parent.GetChild(i)
876 if ISNONTERMINAL(p_child.typ):
877 self._Argument(p_child, do_named, arglist)
878 elif p_child.tok.id == Id.Op_Semi:
879 do_named = True
880
881 def ToArgList(self, pnode, arglist):
882 # type: (PNode, ArgList) -> None
883 """Transform arg lists.
884
885 oil_arglist: '(' [arglist] ')'
886 """
887 if pnode.NumChildren() == 2: # f()
888 return
889
890 assert pnode.NumChildren() == 3
891 p = pnode.GetChild(1) # the X in '( X )'
892
893 assert p.typ == grammar_nt.arglist
894 self._Arglist(p, arglist)
895
896 def _TypeExpr(self, pnode):
897 # type: (PNode) -> TypeExpr
898 """
899 type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
900 """
901 assert pnode.typ == grammar_nt.type_expr, pnode.typ
902
903 #self.p_printer.Print(pnode)
904
905 ty = TypeExpr.CreateNull() # don't allocate children
906
907 ty.tok = pnode.GetChild(0).tok
908 ty.name = ty.tok.tval # TODO: TokenVal()
909
910 n = pnode.NumChildren()
911 if n == 1:
912 return ty
913
914 ty.params = []
915 i = 2
916 while i < n:
917 p = self._TypeExpr(pnode.GetChild(i))
918 ty.params.append(p)
919 i += 2 # skip comma
920
921 return ty
922
923 def _TypeExprList(self, pnode):
924 # type: (PNode) -> List[TypeExpr]
925 """
926 For return value annotation?
927 """
928 assert pnode.typ == grammar_nt.type_expr_list, pnode.typ
929 return None
930
931 def _Param(self, pnode):
932 # type: (PNode) -> Param
933 """
934 param: Expr_Name [type_expr] ['=' expr]
935 """
936 assert pnode.typ == grammar_nt.param
937
938 name_tok = pnode.GetChild(0).tok
939 n = pnode.NumChildren()
940
941 assert name_tok.id == Id.Expr_Name, name_tok
942
943 default_val = None # type: expr_t
944 type_ = None # type: TypeExpr
945
946 #self.p_printer.Print(pnode)
947
948 if n == 1:
949 # proc p(a)
950 pass
951
952 elif n == 2:
953 # proc p(a Int)
954 type_ = self._TypeExpr(pnode.GetChild(1))
955
956 elif n == 3:
957 # proc p(a = 3)
958 default_val = self.Expr(pnode.GetChild(2))
959
960 elif n == 4:
961 # proc p(a Int = 3)
962 type_ = self._TypeExpr(pnode.GetChild(1))
963 default_val = self.Expr(pnode.GetChild(3))
964
965 return Param(name_tok, lexer.TokenVal(name_tok), type_, default_val)
966
967 def _ParamGroup(self, p_node):
968 # type: (PNode) -> Tuple[List[Param], Optional[RestParam]]
969 """
970 param_group:
971 (param ',')*
972 [ (param | '...' Expr_Name) [,] ]
973 """
974 assert p_node.typ == grammar_nt.param_group, p_node
975
976 #self.p_printer.Print(p_node)
977
978 params = [] # type: List[Param]
979 rest_of = None # type: Optional[RestParam]
980
981 n = p_node.NumChildren()
982 i = 0
983 while i < n:
984 child = p_node.GetChild(i)
985 if child.typ == grammar_nt.param:
986 params.append(self._Param(child))
987 elif child.tok.id == Id.Expr_Ellipsis:
988 tok = p_node.GetChild(i + 1).tok
989 rest_of = RestParam(tok, lexer.TokenVal(tok))
990 i += 2
991 #log('i %d n %d', i, n)
992
993 return params, rest_of
994
995 def Proc(self, p_node):
996 # type: (PNode) -> proc_sig_t
997 """
998 ysh_proc: (
999 [ '('
1000 [ param_group ] # word params, with defaults
1001 [ ';' [ param_group ] ] # positional typed params, with defaults
1002 [ ';' [ param_group ] ] # named params, with defaults
1003 [ ';' Expr_Name ] # optional block param, with no type or default
1004 ')'
1005 ]
1006 '{' # opening { for pgen2
1007 )
1008 """
1009 typ = p_node.typ
1010 assert typ == grammar_nt.ysh_proc
1011
1012 #self.p_printer.Print(p_node)
1013
1014 n = p_node.NumChildren()
1015 if n == 1: # proc f {
1016 return proc_sig.Open
1017
1018 if n == 3: # proc f () {
1019 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1020
1021 # proc f( three param groups, and block group )
1022 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1023
1024 # Word args
1025 i = 1
1026 child = p_node.GetChild(i)
1027 if child.typ == grammar_nt.param_group:
1028 sig.word_params, sig.rest_of_words = self._ParamGroup(p_node.GetChild(i))
1029 i += 2
1030 else:
1031 i += 1
1032
1033 # Validate word args
1034 for word in sig.word_params:
1035 if word.type:
1036 if word.type.name not in ('Str', 'Ref'):
1037 p_die('Word params may only have type Str or Ref',
1038 word.type.tok)
1039 if word.type.params is not None:
1040 p_die('Unexpected type parameters', word.type.tok)
1041
1042 #log('i %d n %d', i, n)
1043 if i >= n:
1044 return sig
1045
1046 # Positional args
1047 child = p_node.GetChild(i)
1048 if child.typ == grammar_nt.param_group:
1049 sig.pos_params, sig.rest_of_pos = (
1050 self._ParamGroup(p_node.GetChild(i)))
1051 i += 2
1052 else:
1053 i += 1
1054
1055 #log('i %d n %d', i, n)
1056 if i >= n:
1057 return sig
1058
1059 # Keyword args
1060 child = p_node.GetChild(i)
1061 if child.typ == grammar_nt.param_group:
1062 sig.named_params, sig.rest_of_named = (
1063 self._ParamGroup(p_node.GetChild(i)))
1064 i += 2
1065 else:
1066 i += 1
1067
1068 #log('i %d n %d', i, n)
1069 if i >= n:
1070 return sig
1071
1072 child = p_node.GetChild(i)
1073 if child.typ == grammar_nt.param_group:
1074 params, rest = self._ParamGroup(p_node.GetChild(i))
1075 if len(params) > 1:
1076 p_die('Only 1 block param is allowed', params[1].blame_tok)
1077 if rest:
1078 p_die("Rest param isn't allowed for blocks", rest.blame_tok)
1079
1080 if len(params) > 0:
1081 if params[0].type:
1082 if params[0].type.name != 'Command':
1083 p_die('Block param must have type Command',
1084 params[0].type.tok)
1085 if params[0].type.params is not None:
1086 p_die('Unexpected type parameters', params[0].type.tok)
1087
1088 sig.block_param = RestParam(params[0].blame_tok, params[0].name)
1089
1090 return sig
1091
1092 def func_item(self, node):
1093 # type: (PNode) -> command_t
1094 """Parse tree to LST
1095
1096 func_item: (
1097 ('var' | 'const') name_type_list '=' testlist # oil_var_decl.
1098
1099 # TODO: for, if/switch, with, break/continue/return, try/throw, etc.
1100 | 'while' test suite
1101 | 'for' name_type_list 'in' test suite
1102 | flow_stmt
1103 | 'set' place_list (augassign | '=') testlist # oil_place_mutation
1104 # x f(x) etc.
1105 #
1106 # And x = 1. Python uses the same "hack" to fit within pgen2. It
1107 # also supports a = b = 1, which we don't want.
1108 #
1109 # And echo 'hi' 'there'
1110 #
1111 # TODO: expr_to_ast needs to validate this
1112 | testlist (['=' testlist] | tea_word*)
1113 )
1114 """
1115 if node.tok.id == Id.Expr_While:
1116 return command.While(self.Expr(node.GetChild(1)),
1117 self._Suite(node.GetChild(2)))
1118 elif node.tok.id == Id.Expr_For:
1119 return command.For(self._NameTypeList(node.GetChild(1)),
1120 self.Expr(node.GetChild(3)),
1121 self._Suite(node.GetChild(4)))
1122 elif node.tok.id == Id.Expr_Break:
1123 return command.Break
1124 elif node.tok.id == Id.Expr_Continue:
1125 return command.Continue
1126 elif node.tok.id == Id.Expr_Return:
1127 # 'return' [testlist]
1128 if node.NumChildren() == 1:
1129 return command.Return(None)
1130 else:
1131 return command.Return(self.Expr(node.GetChild(1)))
1132 elif node.tok.id == Id.Expr_Name:
1133 # TODO: turn echo 'hi' into AST
1134 return command.NoOp
1135 else:
1136 raise NotImplementedError(Id_str(node.tok.id))
1137
1138 def func_items(self, pnode):
1139 # type: (PNode) -> List[command_t]
1140 """func_items: func_item (semi_newline func_item)* [semi_newline]"""
1141 # Rewrite of
1142 # return [self.func_item(item) for item in pnode.children[::2]]
1143 # Unfortunately mycpp doesn't support the stride.
1144
1145 result = [] # type: List[command_t]
1146 n = pnode.NumChildren()
1147 for i in xrange(0, n, 2):
1148 result.append(self.func_item(pnode.GetChild(i)))
1149 return result
1150
1151 def _Suite(self, pnode):
1152 # type: (PNode) -> command.CommandList
1153 """Parse tree to LST
1154
1155 suite: '{' [Op_Newline] [func_items] '}'
1156 """
1157 n = pnode.NumChildren()
1158
1159 if n == 2: # {}
1160 return command.CommandList([])
1161
1162 if n == 3:
1163 if pnode.GetChild(1).typ == grammar_nt.func_items: # { func_items }
1164 items_index = 1
1165 else:
1166 return command.CommandList([])
1167
1168 if n == 4: # { Op_Newline func_items }
1169 items_index = 2
1170
1171 return command.CommandList(self.func_items(pnode.GetChild(items_index)))
1172
1173 def YshFunc(self, p_node, out):
1174 # type: (PNode, command.Func) -> None
1175 """Parse tree to LST
1176
1177 ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
1178 """
1179 assert p_node.typ == grammar_nt.ysh_func
1180
1181 #self.p_printer.Print(p_node)
1182
1183 out.name = p_node.GetChild(0).tok
1184
1185 n = p_node.NumChildren()
1186 i = 2 # after (
1187
1188 child = p_node.GetChild(i)
1189 if child.typ == grammar_nt.param_group:
1190 out.pos_params, out.rest_of_pos = self._ParamGroup(child)
1191 i += 2 # skip past ;
1192 else:
1193 i += 1
1194
1195 if i >= n:
1196 return
1197
1198 child = p_node.GetChild(i)
1199 if child.typ == grammar_nt.param_group:
1200 out.named_params, out.rest_of_named = self._ParamGroup(child)
1201
1202 def TeaFunc(self, pnode, out):
1203 # type: (PNode, command.TeaFunc) -> None
1204 """Parse tree to LST
1205
1206 tea_func:
1207 '(' [param_group] [';' param_group] ')' [type_expr_list] suite
1208 """
1209 assert pnode.typ == grammar_nt.tea_func
1210 assert pnode.GetChild(0).tok.id == Id.Op_LParen # proc foo(
1211
1212 # TODO: Simplify this in the style of YshFunc() above
1213
1214 pos = 1
1215 typ2 = pnode.GetChild(pos).typ
1216 if ISNONTERMINAL(typ2):
1217 assert typ2 == grammar_nt.param_group, pnode.GetChild(
1218 pos) # f(x, y)
1219 # every other one is a comma
1220 out.pos_params, out.pos_splat = self._ParamGroup(
1221 pnode.GetChild(pos))
1222 pos += 1
1223
1224 id_ = pnode.GetChild(pos).tok.id
1225 if id_ == Id.Op_RParen: # f()
1226 pos += 1
1227 elif id_ == Id.Op_Semi: # f(; a)
1228 out.named_params, out.named_splat = self._ParamGroup(
1229 pnode.GetChild(pos + 1))
1230 pos += 3
1231
1232 if pnode.GetChild(pos).typ == grammar_nt.type_expr_list:
1233 out.return_types = self._TypeExprList(pnode.GetChild(pos))
1234 pos += 1
1235
1236 out.body = self._Suite(pnode.GetChild(pos))
1237
1238 def NamedFunc(self, pnode, out):
1239 # type: (PNode, command.TeaFunc) -> None
1240 """named_func: Expr_Name tea_func."""
1241 assert pnode.typ == grammar_nt.named_func
1242
1243 out.name = pnode.GetChild(0).tok
1244 self.TeaFunc(pnode.GetChild(1), out)
1245
1246 def _DataParams(self, p_node):
1247 # type: (PNode) -> List[Param]
1248 """data_params: (param ',')* [ param [','] ]"""
1249 params = [] # type: List[Param]
1250
1251 n = p_node.NumChildren()
1252 for i in xrange(0, n, 2):
1253 params.append(self._Param(p_node.GetChild(i)))
1254
1255 return params
1256
1257 def Data(self, pnode, out):
1258 # type: (PNode, command.Data) -> None
1259 """tea_data: Expr_Name '(' [data_params] ')'."""
1260 assert pnode.typ == grammar_nt.tea_data
1261
1262 out.name = pnode.GetChild(0).tok
1263
1264 assert pnode.GetChild(1).tok.id == Id.Op_LParen # data foo(
1265 #print(pnode)
1266 if ISNONTERMINAL(pnode.GetChild(2).typ):
1267 out.params = self._DataParams(pnode.GetChild(2))
1268
1269 def _VariantType(self, pnode):
1270 # type: (PNode) -> variant_type_t
1271 """variant_type: Expr_Symbol | '(' data_params ')'."""
1272 n = pnode.NumChildren()
1273 if n == 1:
1274 return variant_type.Ref(pnode.GetChild(0).tok)
1275 else:
1276 assert n == 3, pnode
1277 return variant_type.Anon(self._DataParams(pnode.GetChild(1)))
1278
1279 def _Variant(self, pnode):
1280 # type: (PNode) -> Variant
1281 """Variant: Expr_Name [ variant_type ]"""
1282 assert pnode.typ == grammar_nt.variant, pnode
1283 t = None # type: variant_type_t
1284 if pnode.NumChildren() == 2:
1285 t = self._VariantType(pnode.GetChild(1))
1286 return Variant(pnode.GetChild(0).tok, t)
1287
1288 def Enum(self, pnode, out):
1289 # type: (PNode, command.Enum) -> None
1290 """Parse tree to LST
1291
1292 tea_enum:
1293 Expr_Name '{' [Op_Newline]
1294 (variant variant_end)* [ variant [variant_end] ]
1295 '}'
1296 """
1297 assert pnode.typ == grammar_nt.tea_enum
1298
1299 out.name = pnode.GetChild(0).tok
1300
1301 assert pnode.GetChild(1).tok.id == Id.Op_LBrace # enum op {
1302
1303 start = 2
1304 if pnode.GetChild(start).tok.id == Id.Op_Newline:
1305 start = 3
1306
1307 n = pnode.NumChildren()
1308 for i in xrange(start, n - 1, 2): # skip commas
1309 p_node = pnode.GetChild(i)
1310 out.variants.append(self._Variant(p_node))
1311
1312 def Class(self, pnode, out):
1313 # type: (PNode, command.Class) -> None
1314 """tea_class: Expr_Name [':' Expr_Name ] '{' class_items '}'."""
1315 assert pnode.typ == grammar_nt.tea_class
1316
1317 out.name = pnode.GetChild(0).tok
1318
1319 #assert children[1].tok.id == Id.Op_LBrace # enum op {
1320 return
1321 #n = len(children)
1322 #for i in xrange(2, n-1, 2): # skip commas
1323 # p_node = children[i]
1324 # out.variants.append(self._Variant(p_node))
1325
1326 def Import(self, pnode, out):
1327 # type: (PNode, command.Import) -> None
1328 """Parse tree to LST
1329
1330 tea_import: (
1331 sq_string ['as' Expr_Name] (import_name ',')* [ import_name [','] ]
1332 )
1333 """
1334 assert pnode.typ == grammar_nt.tea_import
1335
1336 typ = pnode.GetChild(0).typ
1337 if ISNONTERMINAL(typ):
1338 if typ == grammar_nt.sq_string:
1339 sq_part = cast(SingleQuoted, pnode.GetChild(0).GetChild(1).tok)
1340 out.path = sq_part
1341
1342 #
1343 # Regex Language
1344 #
1345
1346 def _RangeChar(self, p_node):
1347 # type: (PNode) -> Token
1348 """An endpoint of a range (single char)
1349
1350 range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
1351 a-z 0-9 'a'-'z' \x00-\xff
1352 """
1353 assert p_node.typ == grammar_nt.range_char, p_node
1354 typ = p_node.GetChild(0).typ
1355 if ISNONTERMINAL(typ):
1356 # 'a' in 'a'-'b'
1357 if typ == grammar_nt.sq_string:
1358 sq_part = cast(SingleQuoted, p_node.GetChild(0).GetChild(1).tok)
1359 tokens = sq_part.tokens
1360 if len(tokens
1361 ) > 1: # Can happen with multiline single-quoted strings
1362 p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
1363 if len(tokens[0].tval) > 1:
1364 p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
1365 return tokens[0]
1366
1367 if typ == grammar_nt.char_literal:
1368 tok = p_node.GetChild(0).GetChild(0).tok
1369 return tok
1370
1371 raise NotImplementedError()
1372 else:
1373 # Expr_Name or Expr_DecInt
1374 tok = p_node.tok
1375 if tok.id in (Id.Expr_Name, Id.Expr_DecInt):
1376 # For the a in a-z, 0 in 0-9
1377 if len(tok.tval) != 1:
1378 p_die(RANGE_POINT_TOO_LONG, tok)
1379 return tok
1380
1381 raise NotImplementedError()
1382
1383 def _NonRangeChars(self, p_node):
1384 # type: (PNode) -> class_literal_term_t
1385 """\" \u1234 '#'."""
1386 assert p_node.typ == grammar_nt.range_char, p_node
1387 typ = p_node.GetChild(0).typ
1388 if ISNONTERMINAL(typ):
1389 p_child = p_node.GetChild(0)
1390 if typ == grammar_nt.sq_string:
1391 return cast(SingleQuoted, p_child.GetChild(1).tok)
1392
1393 if typ == grammar_nt.char_literal:
1394 return class_literal_term.CharLiteral(p_node.GetChild(0).tok)
1395
1396 raise NotImplementedError()
1397 else:
1398 # Look up PerlClass and PosixClass
1399 return self._NameInClass(None, p_node.GetChild(0).tok)
1400
1401 def _ClassLiteralTerm(self, p_node):
1402 # type: (PNode) -> class_literal_term_t
1403 """Parse tree to LST
1404
1405 class_literal_term:
1406 range_char ['-' range_char ]
1407 | '@' Expr_Name # splice
1408 | '!' Expr_Name # negate char class
1409 ...
1410 """
1411 assert p_node.typ == grammar_nt.class_literal_term, p_node
1412
1413 first = p_node.GetChild(0)
1414 typ = first.typ
1415
1416 if ISNONTERMINAL(typ):
1417 n = p_node.NumChildren()
1418
1419 if n == 1 and typ == grammar_nt.range_char:
1420 return self._NonRangeChars(p_node.GetChild(0))
1421
1422 # 'a'-'z' etc.
1423 if n == 3 and p_node.GetChild(1).tok.id == Id.Arith_Minus:
1424 start = self._RangeChar(p_node.GetChild(0))
1425 end = self._RangeChar(p_node.GetChild(2))
1426 return class_literal_term.Range(start, end)
1427
1428 else:
1429 if first.tok.id == Id.Expr_At:
1430 tok1 = p_node.GetChild(1).tok
1431 return class_literal_term.Splice(tok1, lexer.TokenVal(tok1))
1432
1433 if first.tok.id == Id.Expr_Bang:
1434 return self._NameInClass(
1435 p_node.GetChild(0).tok,
1436 p_node.GetChild(1).tok)
1437
1438 raise AssertionError(p_node.GetChild(0).tok.id)
1439
1440 nt_name = self.number2symbol[typ]
1441 raise NotImplementedError(nt_name)
1442
1443 def _ClassLiteral(self, p_node):
1444 # type: (PNode) -> List[class_literal_term_t]
1445 """class_literal: '[' class_literal_term+ ']'."""
1446 assert p_node.typ == grammar_nt.class_literal
1447 # skip [ and ]
1448 terms = [] # type: List[class_literal_term_t]
1449 for i in xrange(1, p_node.NumChildren() - 1):
1450 terms.append(self._ClassLiteralTerm(p_node.GetChild(i)))
1451
1452 return terms
1453
1454 def _NameInRegex(self, negated_tok, tok):
1455 # type: (Token, Token) -> re_t
1456 tok_str = tok.tval
1457 if tok_str == 'dot':
1458 if negated_tok:
1459 p_die("Can't negate this symbol", tok)
1460 return tok
1461
1462 if tok_str in POSIX_CLASSES:
1463 return PosixClass(negated_tok, tok_str)
1464
1465 perl = PERL_CLASSES.get(tok_str)
1466 if perl is not None:
1467 return PerlClass(negated_tok, perl)
1468
1469 if tok_str[0].isupper(): # e.g. HexDigit
1470 return re.Splice(tok, lexer.TokenVal(tok))
1471
1472 p_die("%r isn't a character class" % tok_str, tok)
1473
1474 def _NameInClass(self, negated_tok, tok):
1475 # type: (Token, Token) -> class_literal_term_t
1476 """Like the above, but 'dot' doesn't mean anything.
1477
1478 And `d` is a literal 'd', not `digit`.
1479 """
1480 tok_str = tok.tval
1481
1482 # A bare, unquoted character literal. In the grammar, this is expressed as
1483 # range_char without an ending.
1484
1485 # d is NOT 'digit', it's a literal 'd'!
1486 if len(tok_str) == 1:
1487 # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
1488 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)
1489
1490 if negated_tok: # [~d] is not allowed, only [~digit]
1491 p_die("Can't negate this symbol", tok)
1492 return class_literal_term.CharLiteral(tok)
1493
1494 # digit, word, but not d, w, etc.
1495 if tok_str in POSIX_CLASSES:
1496 return PosixClass(negated_tok, tok_str)
1497
1498 perl = PERL_CLASSES.get(tok_str)
1499 if perl is not None:
1500 return PerlClass(negated_tok, perl)
1501 p_die("%r isn't a character class" % tok_str, tok)
1502
1503 def _ReAtom(self, p_atom):
1504 # type: (PNode) -> re_t
1505 """re_atom: ( char_literal."""
1506 assert p_atom.typ == grammar_nt.re_atom, p_atom.typ
1507
1508 typ = p_atom.GetChild(0).typ
1509
1510 if ISNONTERMINAL(typ):
1511 p_child = p_atom.GetChild(0)
1512 if typ == grammar_nt.class_literal:
1513 return re.CharClassLiteral(False, self._ClassLiteral(p_child))
1514
1515 if typ == grammar_nt.sq_string:
1516 return cast(SingleQuoted, p_child.GetChild(1).tok)
1517
1518 if typ == grammar_nt.char_literal:
1519 return p_atom.GetChild(0).tok
1520
1521 raise NotImplementedError(typ)
1522
1523 else:
1524 tok = p_atom.GetChild(0).tok
1525
1526 # Special punctuation
1527 if tok.id in (Id.Expr_Dot, Id.Arith_Caret, Id.Expr_Dollar):
1528 return tok
1529
1530 # TODO: d digit can turn into PosixClass and PerlClass right here!
1531 # It's parsing.
1532 if tok.id == Id.Expr_Name:
1533 return self._NameInRegex(None, tok)
1534
1535 if tok.id == Id.Expr_Symbol:
1536 # Validate symbols here, like we validate PerlClass, etc.
1537 if tok.tval in ('%start', '%end', 'dot'):
1538 return tok
1539 p_die("Unexpected token %r in regex" % tok.tval, tok)
1540
1541 if tok.id == Id.Expr_At:
1542 # | '@' Expr_Name
1543 tok = p_atom.GetChild(1).tok
1544 return re.Splice(tok, lexer.TokenVal(tok))
1545
1546 if tok.id == Id.Expr_Bang:
1547 # | '!' (Expr_Name | class_literal)
1548 # | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
1549 n = p_atom.NumChildren()
1550 if n == 2:
1551 typ = p_atom.GetChild(1).typ
1552 if ISNONTERMINAL(typ):
1553 return re.CharClassLiteral(
1554 True, self._ClassLiteral(p_atom.GetChild(1)))
1555 else:
1556 return self._NameInRegex(tok, p_atom.GetChild(1).tok)
1557 else:
1558 # Note: !! conflicts with shell history
1559 p_die(
1560 "Backtracking with !! isn't implemented (requires Python/PCRE)",
1561 p_atom.GetChild(1).tok)
1562
1563 if tok.id == Id.Op_LParen:
1564 # | '(' regex ')'
1565
1566 # Note: in ERE (d+) is the same as <d+>. That is, Group becomes
1567 # Capture.
1568 return re.Group(self._Regex(p_atom.GetChild(1)))
1569
1570 if tok.id == Id.Arith_Less:
1571 # | '<' regex [':' name_type] '>'
1572
1573 regex = self._Regex(p_atom.GetChild(1))
1574
1575 n = p_atom.NumChildren()
1576 if n == 5:
1577 # TODO: Add type expression
1578 # YES
1579 # < d+ '.' d+ : ratio Float >
1580 # < d+ : month Int >
1581 # INVALID
1582 # < d+ : month List[int] >
1583 name_tok = p_atom.GetChild(3).GetChild(0).tok
1584
1585 # TODO: is it possible to output the capture name <-> index mapping
1586 # here for POSIX ERE?
1587
1588 else:
1589 name_tok = None
1590
1591 return re.Capture(regex, name_tok)
1592
1593 if tok.id == Id.Arith_Colon:
1594 # | ':' '(' regex ')'
1595 raise NotImplementedError(Id_str(tok.id))
1596
1597 raise NotImplementedError(Id_str(tok.id))
1598
1599 def _RepeatOp(self, p_repeat):
1600 # type: (PNode) -> re_repeat_t
1601 assert p_repeat.typ == grammar_nt.repeat_op, p_repeat
1602
1603 tok = p_repeat.GetChild(0).tok
1604 id_ = tok.id
1605 # a+
1606 if id_ in (Id.Arith_Plus, Id.Arith_Star, Id.Arith_QMark):
1607 return re_repeat.Op(tok)
1608
1609 if id_ == Id.Op_LBrace:
1610 p_range = p_repeat.GetChild(1)
1611 assert p_range.typ == grammar_nt.repeat_range, p_range
1612
1613 # repeat_range: (
1614 # Expr_DecInt [',']
1615 # | ',' Expr_DecInt
1616 # | Expr_DecInt ',' Expr_DecInt
1617 # )
1618
1619 n = p_range.NumChildren()
1620 if n == 1: # {3}
1621 return re_repeat.Num(p_range.GetChild(0).tok)
1622
1623 if n == 2:
1624 if p_range.GetChild(0).tok.id == Id.Expr_DecInt: # {,3}
1625 return re_repeat.Range(p_range.GetChild(0).tok, None)
1626 else: # {1,}
1627 return re_repeat.Range(None, p_range.GetChild(1).tok)
1628
1629 if n == 3: # {1,3}
1630 return re_repeat.Range(
1631 p_range.GetChild(0).tok,
1632 p_range.GetChild(2).tok)
1633
1634 raise AssertionError(n)
1635
1636 raise AssertionError(id_)
1637
1638 def _Regex(self, p_node):
1639 # type: (PNode) -> re_t
1640 typ = p_node.typ
1641
1642 if typ == grammar_nt.regex:
1643 # regex: [re_alt] (('|'|'or') re_alt)*
1644
1645 if p_node.NumChildren() == 1:
1646 return self._Regex(p_node.GetChild(0))
1647
1648 # NOTE: We're losing the | and or operators
1649 alts = [] # type: List[re_t]
1650 n = p_node.NumChildren()
1651 for i in xrange(0, n, 2): # was children[::2]
1652 c = p_node.GetChild(i)
1653 alts.append(self._Regex(c))
1654 return re.Alt(alts)
1655
1656 if typ == grammar_nt.re_alt:
1657 # re_alt: (re_atom [repeat_op])+
1658 i = 0
1659 n = p_node.NumChildren()
1660 seq = [] # type: List[re_t]
1661 while i < n:
1662 r = self._ReAtom(p_node.GetChild(i))
1663 i += 1
1664 if i < n and p_node.GetChild(i).typ == grammar_nt.repeat_op:
1665 repeat_op = self._RepeatOp(p_node.GetChild(i))
1666 r = re.Repeat(r, repeat_op)
1667 i += 1
1668 seq.append(r)
1669
1670 if len(seq) == 1:
1671 return seq[0]
1672 else:
1673 return re.Seq(seq)
1674
1675 nt_name = self.number2symbol[typ]
1676 raise NotImplementedError(nt_name)
1677
1678
1679 # vim: sw=4