OILS
/
frontend
/
parse_lib.py
1 |
"""
|
2 |
parse_lib.py - Consolidate various parser instantiations here.
|
3 |
"""
|
4 |
|
5 |
from _devbuild.gen.id_kind_asdl import Id_t
|
6 |
from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
|
7 |
ArgList, command, pat_t)
|
8 |
from _devbuild.gen.types_asdl import lex_mode_e
|
9 |
from _devbuild.gen import grammar_nt
|
10 |
|
11 |
from core import state
|
12 |
from frontend import lexer
|
13 |
from frontend import reader
|
14 |
|
15 |
from ysh import expr_parse
|
16 |
from ysh import expr_to_ast
|
17 |
from ysh.expr_parse import ctx_PNodeAllocator
|
18 |
from osh import tdop
|
19 |
from osh import arith_parse
|
20 |
from osh import cmd_parse
|
21 |
from osh import word_parse
|
22 |
from mycpp import mylib
|
23 |
from mycpp.mylib import log
|
24 |
|
25 |
_ = log
|
26 |
|
27 |
from typing import Any, List, Tuple, Dict, TYPE_CHECKING
|
28 |
if TYPE_CHECKING:
|
29 |
from core.alloc import Arena
|
30 |
from core.util import _DebugFile
|
31 |
from core import optview
|
32 |
from frontend.lexer import Lexer
|
33 |
from frontend.reader import _Reader
|
34 |
from osh.tdop import TdopParser
|
35 |
from osh.word_parse import WordParser
|
36 |
from osh.cmd_parse import CommandParser
|
37 |
from pgen2.grammar import Grammar
|
38 |
|
39 |
|
40 |
class _BaseTrail(object):
|
41 |
"""Base class has members, but no-ops for methods."""
|
42 |
|
43 |
def __init__(self):
|
44 |
# type: () -> None
|
45 |
# word from a partially completed command.
|
46 |
# Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
|
47 |
self.words = [] # type: List[CompoundWord]
|
48 |
self.redirects = [] # type: List[Redir]
|
49 |
# TODO: We should maintain the LST invariant and have a single list, but
|
50 |
# that I ran into the "cases classes are better than variants" problem.
|
51 |
|
52 |
# Non-ignored tokens, after PushHint translation. Used for variable name
|
53 |
# completion. Filled in by _Peek() in osh/word_parse.py.
|
54 |
#
|
55 |
# Example:
|
56 |
# $ echo $\
|
57 |
# f<TAB>
|
58 |
# This could complete $foo.
|
59 |
# Problem: readline doesn't even allow that, because it spans more than one
|
60 |
# line!
|
61 |
self.tokens = [] # type: List[Token]
|
62 |
|
63 |
self.alias_words = [
|
64 |
] # type: List[CompoundWord] # words INSIDE an alias expansion
|
65 |
self._expanding_alias = False
|
66 |
|
67 |
def Clear(self):
|
68 |
# type: () -> None
|
69 |
pass
|
70 |
|
71 |
def SetLatestWords(self, words, redirects):
|
72 |
# type: (List[CompoundWord], List[Redir]) -> None
|
73 |
pass
|
74 |
|
75 |
def AppendToken(self, token):
|
76 |
# type: (Token) -> None
|
77 |
pass
|
78 |
|
79 |
def BeginAliasExpansion(self):
|
80 |
# type: () -> None
|
81 |
pass
|
82 |
|
83 |
def EndAliasExpansion(self):
|
84 |
# type: () -> None
|
85 |
pass
|
86 |
|
87 |
if mylib.PYTHON:
|
88 |
|
89 |
def PrintDebugString(self, debug_f):
|
90 |
# type: (_DebugFile) -> None
|
91 |
|
92 |
# note: could cast DebugFile to IO[str] instead of ignoring?
|
93 |
debug_f.writeln(' words:')
|
94 |
for w in self.words:
|
95 |
w.PrettyPrint(f=debug_f) # type: ignore
|
96 |
debug_f.writeln('')
|
97 |
|
98 |
debug_f.writeln(' redirects:')
|
99 |
for r in self.redirects:
|
100 |
r.PrettyPrint(f=debug_f) # type: ignore
|
101 |
debug_f.writeln('')
|
102 |
|
103 |
debug_f.writeln(' tokens:')
|
104 |
for p in self.tokens:
|
105 |
p.PrettyPrint(f=debug_f) # type: ignore
|
106 |
debug_f.writeln('')
|
107 |
|
108 |
debug_f.writeln(' alias_words:')
|
109 |
for w in self.alias_words:
|
110 |
w.PrettyPrint(f=debug_f) # type: ignore
|
111 |
debug_f.writeln('')
|
112 |
|
113 |
def __repr__(self):
|
114 |
# type: () -> str
|
115 |
return '<Trail %s %s %s %s>' % (self.words, self.redirects, self.tokens,
|
116 |
self.alias_words)
|
117 |
|
118 |
|
119 |
class ctx_Alias(object):
|
120 |
"""Used by CommandParser so we know to be ready for FIRST alias word.
|
121 |
|
122 |
For example, for
|
123 |
|
124 |
alias ll='ls -l'
|
125 |
|
126 |
Then we want to capture 'ls' as the first word.
|
127 |
|
128 |
We do NOT want SetLatestWords or AppendToken to be active, because we don't
|
129 |
need other tokens from 'ls -l'.
|
130 |
|
131 |
It would also probably cause bugs in history expansion, e.g. echo !1 should
|
132 |
be the first word the user typed, not the first word after alias expansion.
|
133 |
"""
|
134 |
|
135 |
def __init__(self, trail):
|
136 |
# type: (_BaseTrail) -> None
|
137 |
trail._expanding_alias = True
|
138 |
self.trail = trail
|
139 |
|
140 |
def __enter__(self):
|
141 |
# type: () -> None
|
142 |
pass
|
143 |
|
144 |
def __exit__(self, type, value, traceback):
|
145 |
# type: (Any, Any, Any) -> None
|
146 |
self.trail._expanding_alias = False
|
147 |
|
148 |
|
149 |
class Trail(_BaseTrail):
|
150 |
"""Info left by the parser to help us complete shell syntax and commands.
|
151 |
|
152 |
It's also used for history expansion.
|
153 |
"""
|
154 |
|
155 |
def __init__(self):
|
156 |
# type: () -> None
|
157 |
"""Empty constructor for mycpp."""
|
158 |
_BaseTrail.__init__(self)
|
159 |
|
160 |
def Clear(self):
|
161 |
# type: () -> None
|
162 |
del self.words[:]
|
163 |
del self.redirects[:]
|
164 |
# The other ones don't need to be reset?
|
165 |
del self.tokens[:]
|
166 |
del self.alias_words[:]
|
167 |
|
168 |
def SetLatestWords(self, words, redirects):
|
169 |
# type: (List[CompoundWord], List[Redir]) -> None
|
170 |
if self._expanding_alias:
|
171 |
self.alias_words = words # Save these separately
|
172 |
return
|
173 |
self.words = words
|
174 |
self.redirects = redirects
|
175 |
|
176 |
def AppendToken(self, token):
|
177 |
# type: (Token) -> None
|
178 |
if self._expanding_alias: # We don't want tokens inside aliases
|
179 |
return
|
180 |
self.tokens.append(token)
|
181 |
|
182 |
|
183 |
if TYPE_CHECKING:
|
184 |
AliasesInFlight = List[Tuple[str, int]]
|
185 |
|
186 |
|
187 |
class ParseContext(object):
|
188 |
"""Context shared between the mutually recursive Command and Word parsers.
|
189 |
|
190 |
In contrast, STATE is stored in the CommandParser and WordParser
|
191 |
instances.
|
192 |
"""
|
193 |
|
194 |
def __init__(self,
|
195 |
arena,
|
196 |
parse_opts,
|
197 |
aliases,
|
198 |
oil_grammar,
|
199 |
one_pass_parse=False):
|
200 |
# type: (Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
|
201 |
self.arena = arena
|
202 |
self.parse_opts = parse_opts
|
203 |
self.aliases = aliases
|
204 |
self.oil_grammar = oil_grammar
|
205 |
self.one_pass_parse = one_pass_parse
|
206 |
|
207 |
# NOTE: The transformer is really a pure function.
|
208 |
if oil_grammar:
|
209 |
self.tr = expr_to_ast.Transformer(oil_grammar)
|
210 |
else: # hack for unit tests, which pass None
|
211 |
self.tr = None
|
212 |
|
213 |
if mylib.PYTHON:
|
214 |
if self.tr:
|
215 |
self.p_printer = self.tr.p_printer
|
216 |
else:
|
217 |
self.p_printer = None
|
218 |
|
219 |
# Completion state lives here since it may span multiple parsers.
|
220 |
self.trail = _BaseTrail() # no-op by default
|
221 |
|
222 |
def Init_Trail(self, trail):
|
223 |
# type: (_BaseTrail) -> None
|
224 |
self.trail = trail
|
225 |
|
226 |
def MakeLexer(self, line_reader):
|
227 |
# type: (_Reader) -> Lexer
|
228 |
"""Helper function.
|
229 |
|
230 |
NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
|
231 |
better.
|
232 |
"""
|
233 |
# Take Arena from LineReader
|
234 |
line_lexer = lexer.LineLexer(line_reader.arena)
|
235 |
return lexer.Lexer(line_lexer, line_reader)
|
236 |
|
237 |
def MakeOshParser(self, line_reader, emit_comp_dummy=False):
|
238 |
# type: (_Reader, bool) -> CommandParser
|
239 |
lx = self.MakeLexer(line_reader)
|
240 |
if emit_comp_dummy:
|
241 |
lx.EmitCompDummy() # A special token before EOF!
|
242 |
|
243 |
w_parser = word_parse.WordParser(self, lx, line_reader)
|
244 |
c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
|
245 |
line_reader)
|
246 |
return c_parser
|
247 |
|
248 |
def MakeConfigParser(self, line_reader):
|
249 |
# type: (_Reader) -> CommandParser
|
250 |
lx = self.MakeLexer(line_reader)
|
251 |
parse_opts = state.MakeOilOpts()
|
252 |
w_parser = word_parse.WordParser(self, lx, line_reader)
|
253 |
c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
|
254 |
line_reader)
|
255 |
return c_parser
|
256 |
|
257 |
def MakeWordParserForHereDoc(self, line_reader):
|
258 |
# type: (_Reader) -> WordParser
|
259 |
lx = self.MakeLexer(line_reader)
|
260 |
return word_parse.WordParser(self, lx, line_reader)
|
261 |
|
262 |
def MakeWordParser(self, lx, line_reader):
|
263 |
# type: (Lexer, _Reader) -> WordParser
|
264 |
return word_parse.WordParser(self, lx, line_reader)
|
265 |
|
266 |
def MakeArithParser(self, code_str):
|
267 |
# type: (str) -> TdopParser
|
268 |
"""Used for a[x+1]=foo in the CommandParser."""
|
269 |
line_reader = reader.StringLineReader(code_str, self.arena)
|
270 |
lx = self.MakeLexer(line_reader)
|
271 |
w_parser = word_parse.WordParser(self, lx, line_reader)
|
272 |
w_parser.Init(lex_mode_e.Arith) # Special initialization
|
273 |
a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
|
274 |
self.parse_opts)
|
275 |
return a_parser
|
276 |
|
277 |
def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
|
278 |
# type: (_Reader, Lexer, Id_t) -> CommandParser
|
279 |
"""To parse command sub, we want a fresh word parser state."""
|
280 |
w_parser = word_parse.WordParser(self, lexer, line_reader)
|
281 |
c_parser = cmd_parse.CommandParser(self,
|
282 |
self.parse_opts,
|
283 |
w_parser,
|
284 |
lexer,
|
285 |
line_reader,
|
286 |
eof_id=eof_id)
|
287 |
return c_parser
|
288 |
|
289 |
def MakeWordParserForPlugin(self, code_str):
|
290 |
# type: (str) -> WordParser
|
291 |
"""For $PS1, $PS4, etc."""
|
292 |
line_reader = reader.StringLineReader(code_str, self.arena)
|
293 |
lx = self.MakeLexer(line_reader)
|
294 |
return word_parse.WordParser(self, lx, line_reader)
|
295 |
|
296 |
def _YshParser(self):
|
297 |
# type: () -> expr_parse.ExprParser
|
298 |
return expr_parse.ExprParser(self, self.oil_grammar, False)
|
299 |
|
300 |
def _TeaParser(self):
|
301 |
# type: () -> expr_parse.ExprParser
|
302 |
return expr_parse.ExprParser(self, self.oil_grammar, True)
|
303 |
|
304 |
def ParseVarDecl(self, kw_token, lexer):
|
305 |
# type: (Token, Lexer) -> Tuple[command.VarDecl, Token]
|
306 |
""" var mylist = [1, 2, 3] """
|
307 |
e_parser = self._YshParser()
|
308 |
with ctx_PNodeAllocator(e_parser):
|
309 |
pnode, last_token = e_parser.Parse(lexer, grammar_nt.oil_var_decl)
|
310 |
|
311 |
if 0:
|
312 |
self.p_printer.Print(pnode)
|
313 |
|
314 |
ast_node = self.tr.MakeVarDecl(pnode)
|
315 |
ast_node.keyword = kw_token # VarDecl didn't fill this in
|
316 |
|
317 |
return ast_node, last_token
|
318 |
|
319 |
def ParsePlaceMutation(self, kw_token, lexer):
|
320 |
# type: (Token, Lexer) -> Tuple[command.PlaceMutation, Token]
|
321 |
"""Setvar d['a'] += 1."""
|
322 |
e_parser = self._YshParser()
|
323 |
with ctx_PNodeAllocator(e_parser):
|
324 |
pnode, last_token = e_parser.Parse(lexer,
|
325 |
grammar_nt.oil_place_mutation)
|
326 |
if 0:
|
327 |
self.p_printer.Print(pnode)
|
328 |
ast_node = self.tr.MakePlaceMutation(pnode)
|
329 |
ast_node.keyword = kw_token # VarDecl didn't fill this in
|
330 |
|
331 |
return ast_node, last_token
|
332 |
|
333 |
def ParseYshArgList(self, lx, out):
|
334 |
# type: (Lexer, ArgList) -> None
|
335 |
"""$f(x, y)"""
|
336 |
|
337 |
e_parser = self._YshParser()
|
338 |
with ctx_PNodeAllocator(e_parser):
|
339 |
pnode, last_token = e_parser.Parse(lx, grammar_nt.oil_arglist)
|
340 |
|
341 |
if 0:
|
342 |
self.p_printer.Print(pnode)
|
343 |
|
344 |
self.tr.ToArgList(pnode, out)
|
345 |
out.right = last_token
|
346 |
|
347 |
def ParseYshExpr(self, lx, start_symbol):
|
348 |
# type: (Lexer, int) -> Tuple[expr_t, Token]
|
349 |
"""if (x > 0) { ...
|
350 |
|
351 |
}, while, etc.
|
352 |
"""
|
353 |
|
354 |
e_parser = self._YshParser()
|
355 |
with ctx_PNodeAllocator(e_parser):
|
356 |
pnode, last_token = e_parser.Parse(lx, start_symbol)
|
357 |
if 0:
|
358 |
self.p_printer.Print(pnode)
|
359 |
|
360 |
ast_node = self.tr.Expr(pnode)
|
361 |
|
362 |
return ast_node, last_token
|
363 |
|
364 |
def ParseYshCasePattern(self, lexer):
|
365 |
# type: (Lexer) -> Tuple[pat_t, Token, Token]
|
366 |
"""(6) | (7), / dot* '.py' /, (else), etc.
|
367 |
|
368 |
Alongside the pattern, this returns the first token in the pattern and
|
369 |
the LBrace token at the start of the case arm body.
|
370 |
"""
|
371 |
e_parser = self._YshParser()
|
372 |
with ctx_PNodeAllocator(e_parser):
|
373 |
pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
|
374 |
|
375 |
left_tok = pnode.GetChild(0).tok
|
376 |
pattern = self.tr.YshCasePattern(pnode)
|
377 |
|
378 |
return pattern, left_tok, last_token
|
379 |
|
380 |
def ParseProc(self, lexer, out):
|
381 |
# type: (Lexer, command.Proc) -> Token
|
382 |
"""proc f(x, y, @args) {"""
|
383 |
e_parser = self._YshParser()
|
384 |
with ctx_PNodeAllocator(e_parser):
|
385 |
pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
|
386 |
|
387 |
if 0:
|
388 |
self.p_printer.Print(pnode)
|
389 |
|
390 |
out.sig = self.tr.Proc(pnode)
|
391 |
|
392 |
return last_token
|
393 |
|
394 |
def ParseFunc(self, lexer, out):
|
395 |
# type: (Lexer, command.Func) -> None
|
396 |
""" func f(x Int, y Int = 0, ...args; z Int = 3, ...named) """
|
397 |
e_parser = self._YshParser()
|
398 |
with ctx_PNodeAllocator(e_parser):
|
399 |
pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
|
400 |
|
401 |
if 0:
|
402 |
self.p_printer.Print(pnode)
|
403 |
|
404 |
self.tr.YshFunc(pnode, out)
|
405 |
|
406 |
def ParseTeaFunc(self, lexer, out):
|
407 |
# type: (Lexer, command.TeaFunc) -> Token
|
408 |
""" func f(x Int, y Int = 0, ...args; z Int = 3, ...named) { x = 42 } """
|
409 |
e_parser = self._TeaParser()
|
410 |
with ctx_PNodeAllocator(e_parser):
|
411 |
pnode, last_token = e_parser.Parse(lexer, grammar_nt.named_func)
|
412 |
|
413 |
if 0:
|
414 |
self.p_printer.Print(pnode)
|
415 |
|
416 |
self.tr.NamedFunc(pnode, out)
|
417 |
|
418 |
return last_token
|
419 |
|
420 |
def ParseDataType(self, lexer, out):
|
421 |
# type: (Lexer, command.Data) -> Token
|
422 |
"""data Point(x Int, y Int)"""
|
423 |
e_parser = self._TeaParser()
|
424 |
with ctx_PNodeAllocator(e_parser):
|
425 |
pnode, last_token = e_parser.Parse(lexer, grammar_nt.tea_data)
|
426 |
|
427 |
if 0:
|
428 |
self.p_printer.Print(pnode)
|
429 |
|
430 |
self.tr.Data(pnode, out)
|
431 |
|
432 |
return last_token
|
433 |
|
434 |
def ParseEnum(self, lexer, out):
|
435 |
# type: (Lexer, command.Enum) -> Token
|
436 |
"""enum cflow { Break, Continue, Return(status Int) }"""
|
437 |
e_parser = self._TeaParser()
|
438 |
with ctx_PNodeAllocator(e_parser):
|
439 |
pnode, last_token = e_parser.Parse(lexer, grammar_nt.tea_enum)
|
440 |
|
441 |
if 0:
|
442 |
self.p_printer.Print(pnode)
|
443 |
|
444 |
self.tr.Enum(pnode, out)
|
445 |
|
446 |
return last_token
|
447 |
|
448 |
def ParseClass(self, lexer, out):
|
449 |
# type: (Lexer, command.Class) -> Token
|
450 |
"""class Lexer { var Token; func Next() { echo } }"""
|
451 |
e_parser = self._TeaParser()
|
452 |
with ctx_PNodeAllocator(e_parser):
|
453 |
pnode, last_token = e_parser.Parse(lexer, grammar_nt.tea_class)
|
454 |
|
455 |
if 0:
|
456 |
self.p_printer.Print(pnode)
|
457 |
|
458 |
self.tr.Class(pnode, out)
|
459 |
|
460 |
return last_token
|
461 |
|
462 |
def ParseImport(self, lexer, out):
|
463 |
# type: (Lexer, command.Import) -> Token
|
464 |
"""Use 'foo/bar' as spam, Foo, Z as Y."""
|
465 |
e_parser = self._TeaParser()
|
466 |
with ctx_PNodeAllocator(e_parser):
|
467 |
pnode, last_token = e_parser.Parse(lexer, grammar_nt.tea_import)
|
468 |
|
469 |
if 0:
|
470 |
self.p_printer.Print(pnode)
|
471 |
|
472 |
self.tr.Import(pnode, out)
|
473 |
|
474 |
return last_token
|
475 |
|
476 |
if mylib.PYTHON:
|
477 |
|
478 |
def ParseTeaModule(self, line_reader):
|
479 |
# type: (_Reader) -> None
|
480 |
"""An entire .tea file."""
|
481 |
line_lexer = lexer.LineLexer(self.arena)
|
482 |
lx = lexer.Lexer(line_lexer, line_reader)
|
483 |
|
484 |
e_parser = self._TeaParser()
|
485 |
with ctx_PNodeAllocator(e_parser):
|
486 |
pnode, last_token = e_parser.Parse(lx, grammar_nt.tea_module)
|
487 |
|
488 |
if 1:
|
489 |
self.p_printer.Print(pnode)
|
490 |
|
491 |
#out = command.Use() # TODO: make a node
|
492 |
#self.tr.TeaModule(pnode, out)
|
493 |
|
494 |
return None
|
495 |
|
496 |
|
497 |
# Another parser instantiation:
|
498 |
# - For Array Literal in word_parse.py WordParser:
|
499 |
# w_parser = WordParser(self.lexer, self.line_reader)
|