1 """expr_parse.py."""
2 from __future__ import print_function
3
4 from _devbuild.gen.syntax_asdl import (loc, Token, DoubleQuoted, SingleQuoted,
5 CommandSub, ShArrayLiteral, CompoundWord,
6 word_part_t, word_e)
7 from _devbuild.gen.id_kind_asdl import Id, Kind, Id_str
8 from _devbuild.gen.types_asdl import lex_mode_e
9
10 from core import ui
11 from core.error import p_die
12 from frontend import consts
13 from frontend import reader
14 from mycpp import mylib
15 from mycpp.mylib import log, tagswitch
16 from osh import braces
17 from osh import word_
18 from pgen2 import parse
19 from pgen2.pnode import PNodeAllocator
20
21 _ = log
22
23 from typing import TYPE_CHECKING, Any, Dict, Tuple, List, cast, Optional
24 if TYPE_CHECKING:
25 from frontend.lexer import Lexer
26 from frontend.parse_lib import ParseContext
27 from pgen2.grammar import Grammar
28 from pgen2.pnode import PNode
29
30 if mylib.PYTHON:
31
32 class ParseTreePrinter(object):
33 """Prints a tree of PNode instances."""
34
35 def __init__(self, names):
36 # type: (Dict[int, str]) -> None
37 self.names = names
38 self.f = mylib.Stdout()
39
40 def _Print(self, pnode, indent, i):
41 # type: (PNode, int, int) -> None
42
43 ind = ' ' * indent
44 # NOTE:
45 # - why isn't 'tok' None for PRODUCTIONS? There is some redundancy to get
46 # rid of.
47 if pnode.tok:
48 if isinstance(pnode.tok, Token):
49 v = pnode.tok.tval
50 else:
51 # e.g. CommandSub for x = $(echo hi)
52 v = repr(pnode.tok)
53 else:
54 v = '-'
55 self.f.write('%s%d %s %s\n' % (ind, i, self.names[pnode.typ], v))
56 if pnode.children is not None:
57 for i, child in enumerate(pnode.children):
58 self._Print(child, indent + 1, i)
59
60 def Print(self, pnode):
61 # type: (PNode) -> None
62 self._Print(pnode, 0, 0)
63
64
65 def _Classify(gr, tok, tea_keywords):
66 # type: (Grammar, Token, bool) -> int
67
68 # We have to match up what ParserGenerator.make_grammar() did when
69 # calling make_label() and make_first(). See classify() in
70 # opy/pgen2/driver.py.
71
72 # Special case for top-level Tea keywords like data/enum/class, etc.
73 # TODO: Do this more elegantly at grammar build time.
74 if tea_keywords and tok.id == Id.Expr_Name:
75 if tok.tval in gr.keywords:
76 #log('NEW %r', gr.keywords[tok.val])
77 return gr.keywords[tok.tval]
78
79 # This handles 'x'.
80 if tok.id in gr.tokens:
81 return gr.tokens[tok.id]
82
83 if tok.id == Id.Unknown_DEqual:
84 p_die('Use === to be exact, or ~== to convert types', tok)
85
86 if tok.id == Id.Unknown_Tok:
87 type_str = ''
88 else:
89 type_str = ' (%s)' % ui.PrettyId(tok.id)
90 p_die('Unexpected token in expression mode%s' % type_str, tok)
91
92
93 # Newlines are ignored between these pairs.
94 _OTHER_BALANCE = {
95 # Parenthesized expressions (tuples) and func/proc parameter lists
96 Id.Op_LParen:
97 1,
98 Id.Op_RParen:
99 -1,
100 Id.Op_LBracket:
101 1,
102 Id.Op_RBracket:
103 -1,
104
105 # Dicts are {}, and the grammar respects Op_Newline.
106 }
107
108
109 def _PushOilTokens(parse_ctx, gr, p, lex, tea_keywords):
110 # type: (ParseContext, Grammar, parse.Parser, Lexer, bool) -> Token
111 """Push tokens onto pgen2's parser.
112
113 Returns the last token so it can be reused/seen by the
114 CommandParser.
115 """
116 #log('keywords = %s', gr.keywords)
117 #log('tokens = %s', gr.tokens)
118
119 last_token = None # type: Optional[Token]
120 prev_was_newline = False
121
122 balance = 0 # to ignore newlines
123
124 while True:
125 if last_token: # e.g. left over from WordParser
126 tok = last_token
127 #log('last_token = %s', last_token)
128 last_token = None
129 else:
130 tok = lex.Read(lex_mode_e.Expr)
131 #log('tok = %s', tok)
132
133 # Comments and whitespace. Newlines aren't ignored.
134 if consts.GetKind(tok.id) == Kind.Ignored:
135 continue
136
137 # For multiline lists, maps, etc.
138 if tok.id == Id.Op_Newline:
139 if balance > 0:
140 #log('*** SKIPPING NEWLINE')
141 continue
142 # Eliminate duplicate newline tokens. It makes the grammar simpler, and
143 # it's consistent with CPython's lexer and our own WordParser.
144 if prev_was_newline:
145 continue
146 prev_was_newline = True
147 else:
148 prev_was_newline = False
149
150 balance += _OTHER_BALANCE.get(tok.id, 0)
151 #log('BALANCE after seeing %s = %d', tok.id, balance)
152
153 if tok.id == Id.Op_LParen:
154 # For nesting inside $()
155 lex.PushHint(Id.Op_RParen, Id.Op_RParen)
156
157 #if tok.id == Id.Expr_Name and tok.val in KEYWORDS:
158 # tok.id = KEYWORDS[tok.val]
159 # log('Replaced with %s', tok.id)
160
161 assert tok.id < 256, Id_str(tok.id)
162
163 ilabel = _Classify(gr, tok, tea_keywords)
164 #log('tok = %s, ilabel = %d', tok, ilabel)
165
166 if p.addtoken(tok.id, tok, ilabel):
167 return tok
168
169 #
170 # Mututally recursive calls into the command/word parsers.
171 #
172
173 if tok.id in (Id.Left_ColonPipe,
174 Id.Left_PercentParen): # :| %( LEGACY!
175 left_tok = tok
176 if tok.id == Id.Left_PercentParen:
177 lex.PushHint(Id.Op_RParen, Id.Right_ShArrayLiteral)
178
179 # Blame the opening token
180 line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
181 w_parser = parse_ctx.MakeWordParser(lex, line_reader)
182 words = [] # type: List[CompoundWord]
183 close_tok = None # type: Optional[Token]
184 done = False
185 while not done:
186 w = w_parser.ReadWord(lex_mode_e.ShCommand)
187 with tagswitch(w) as case:
188 if case(word_e.Operator):
189 tok = cast(Token, w)
190 if tok.id == Id.Right_ShArrayLiteral:
191 if left_tok.id != Id.Left_PercentParen:
192 p_die('Expected ) to close', left_tok)
193 close_tok = tok
194 done = True # can't use break here
195 elif tok.id == Id.Op_Pipe:
196 if left_tok.id != Id.Left_ColonPipe:
197 p_die('Expected ) to close', left_tok)
198 close_tok = tok
199 done = True # can't use break here
200 elif tok.id == Id.Op_Newline: # internal newlines allowed
201 continue
202 else:
203 p_die('Unexpected token in array literal',
204 loc.Word(w))
205
206 elif case(word_e.Compound):
207 words.append(cast(CompoundWord, w))
208
209 else:
210 raise AssertionError()
211
212 words2 = braces.BraceDetectAll(words)
213 words3 = word_.TildeDetectAll(words2)
214
215 typ = Id.Expr_CastedDummy
216
217 lit_part = ShArrayLiteral(left_tok, words3, close_tok)
218 opaque = cast(Token, lit_part) # HACK for expr_to_ast
219 done = p.addtoken(typ, opaque, gr.tokens[typ])
220 assert not done # can't end the expression
221
222 # Now push the closing )
223 ilabel = _Classify(gr, close_tok, tea_keywords)
224 done = p.addtoken(tok.id, close_tok, ilabel)
225 assert not done # can't end the expression
226
227 continue
228
229 # $( @( ^(
230 if tok.id in (Id.Left_DollarParen, Id.Left_AtParen, Id.Left_CaretParen):
231
232 left_token = tok
233
234 lex.PushHint(Id.Op_RParen, Id.Eof_RParen)
235 line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
236 c_parser = parse_ctx.MakeParserForCommandSub(
237 line_reader, lex, Id.Eof_RParen)
238 node = c_parser.ParseCommandSub()
239 # A little gross: Copied from osh/word_parse.py
240 right_token = c_parser.w_parser.cur_token
241
242 cs_part = CommandSub(left_token, node, right_token)
243
244 typ = Id.Expr_CastedDummy
245 opaque = cast(Token, cs_part) # HACK for expr_to_ast
246 done = p.addtoken(typ, opaque, gr.tokens[typ])
247 assert not done # can't end the expression
248
249 # Now push the closing )
250 ilabel = _Classify(gr, right_token, tea_keywords)
251 done = p.addtoken(right_token.id, right_token, ilabel)
252 assert not done # can't end the expression
253
254 continue
255
256 # " and """
257 if tok.id in (Id.Left_DoubleQuote, Id.Left_TDoubleQuote):
258 left_token = tok
259 line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
260 w_parser = parse_ctx.MakeWordParser(lex, line_reader)
261
262 parts = [] # type: List[word_part_t]
263 last_token = w_parser.ReadDoubleQuoted(left_token, parts)
264 expr_dq_part = DoubleQuoted(left_token, parts, last_token)
265
266 typ = Id.Expr_CastedDummy
267 opaque = cast(Token, expr_dq_part) # HACK for expr_to_ast
268 done = p.addtoken(typ, opaque, gr.tokens[typ])
269 assert not done # can't end the expression
270
271 continue
272
273 # ${
274 if tok.id == Id.Left_DollarBrace:
275 left_token = tok
276 line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
277 w_parser = parse_ctx.MakeWordParser(lex, line_reader)
278
279 part, last_token = w_parser.ReadBracedVarSub(left_token)
280
281 # It's casted word_part.BracedVarSub -> dummy -> expr.BracedVarSub!
282 typ = Id.Expr_CastedDummy
283 opaque = cast(Token, part) # HACK for expr_to_ast
284 done = p.addtoken(typ, opaque, gr.tokens[typ])
285 assert not done # can't end the expression
286
287 continue
288
289 # 'x' r'x' $'x' and '''x''' r'''x''' $'''x'''
290 if tok.id in (Id.Left_SingleQuote, Id.Left_RSingleQuote,
291 Id.Left_DollarSingleQuote, Id.Left_TSingleQuote,
292 Id.Left_RTSingleQuote, Id.Left_DollarTSingleQuote):
293 if tok.id in (Id.Left_DollarSingleQuote,
294 Id.Left_DollarTSingleQuote):
295 sq_mode = lex_mode_e.SQ_C
296 else:
297 sq_mode = lex_mode_e.SQ_Raw
298
299 left_token = tok
300 line_reader = reader.DisallowedLineReader(parse_ctx.arena, tok)
301 w_parser = parse_ctx.MakeWordParser(lex, line_reader)
302
303 tokens = [] # type: List[Token]
304 last_token = w_parser.ReadSingleQuoted(sq_mode, left_token, tokens,
305 True)
306
307 sq_part = SingleQuoted(left_token, tokens, last_token)
308
309 typ = Id.Expr_CastedDummy
310 opaque = cast(Token, sq_part) # HACK for expr_to_ast
311 done = p.addtoken(typ, opaque, gr.tokens[typ])
312 assert not done # can't end the expression
313 continue
314
315 else:
316 # We never broke out -- EOF is too soon (how can this happen???)
317 raise parse.ParseError("incomplete input", tok.id, tok)
318
319
320 class ExprParser(object):
321 """A wrapper around a pgen2 parser."""
322
323 def __init__(self, parse_ctx, gr, tea_keywords):
324 # type: (ParseContext, Grammar, bool) -> None
325 self.parse_ctx = parse_ctx
326 self.gr = gr
327 self.tea_keywords = tea_keywords
328 # Reused multiple times.
329 self.push_parser = parse.Parser(gr)
330 self.pnode_alloc = None # type: Optional[PNodeAllocator]
331
332 def Parse(self, lexer, start_symbol):
333 # type: (Lexer, int) -> Tuple[PNode, Token]
334
335 # Reuse the parser
336 self.push_parser.setup(start_symbol, self.pnode_alloc)
337 try:
338 last_token = _PushOilTokens(self.parse_ctx, self.gr,
339 self.push_parser, lexer,
340 self.tea_keywords)
341 except parse.ParseError as e:
342 #log('ERROR %s', e)
343 # TODO:
344 # - Describe what lexer mode we're in (Invalid syntax in regex)
345 # - Maybe say where the mode started
346 # - Id.Unknown_Tok could say "This character is invalid"
347
348 # ParseError has a "too much input" case but I haven't been able to
349 # tickle it. Maybe it's because of the Eof tokens?
350
351 p_die(
352 'Syntax error in expression (near %s)' % ui.PrettyId(e.tok.id),
353 e.tok)
354
355 return self.push_parser.rootnode, last_token
356
357
358 class ctx_PNodeAllocator(object):
359 def __init__(self, ep):
360 # type: (ExprParser) -> None
361 self.expr_parser = ep
362 self.expr_parser.pnode_alloc = PNodeAllocator()
363
364 def __enter__(self):
365 # type: () -> None
366 pass
367
368 def __exit__(self, type, value, traceback):
369 # type: (Any, Any, Any) -> None
370 self.expr_parser.pnode_alloc.Clear()
371 self.expr_parser.pnode_alloc = None