1 # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
3
4 """Parser engine for the grammar tables generated by pgen.
5
6 The grammar table must be loaded first.
7
8 See Parser/parser.c in the Python distribution for additional info on
9 how this parsing engine works.
10 """
11
12 from mycpp.mylib import log
13 _ = log
14
15 from typing import TYPE_CHECKING, Optional, Any, List
16 from pgen2.pnode import PNode, PNodeAllocator
17
18 if TYPE_CHECKING:
19 from _devbuild.gen.syntax_asdl import Token
20 from pgen2.grammar import Grammar, dfa_t
21
22
23 class ParseError(Exception):
24 """Exception to signal the parser is stuck."""
25
26 def __init__(self, msg, type_, tok):
27 # type: (str, int, Token) -> None
28 self.msg = msg
29 self.type = type_
30 self.tok = tok
31
32 def __repr__(self):
33 # type: () -> str
34 return "%s: type=%d, tok=%r" % (self.msg, self.type, self.tok)
35
36
37 class _StackItem(object):
38 def __init__(self, dfa, state, node):
39 # type: (dfa_t, int, PNode) -> None
40 self.dfa = dfa
41 self.state = state
42 self.node = node
43
44
45 class Parser(object):
46 """Parser engine.
47
48 The proper usage sequence is:
49
50 p = Parser(grammar, [converter]) # create instance
51 p.setup(start) # prepare for parsing
52 <for each input token>:
53 if p.addtoken(...): # parse a token; may raise ParseError
54 break
55 root = p.rootnode # root of abstract syntax tree
56
57 A Parser instance may be reused by calling setup() repeatedly.
58
59 A Parser instance contains state pertaining to the current token
60 sequence, and should not be used concurrently by different threads
61 to parse separate token sequences.
62
63 See driver.py for how to get input tokens by tokenizing a file or
64 string.
65
66 Parsing is complete when addtoken() returns True; the root of the
67 abstract syntax tree can then be retrieved from the rootnode
68 instance variable. When a syntax error occurs, addtoken() raises
69 the ParseError exception. There is no error recovery; the parser
70 cannot be used after a syntax error was reported (but it can be
71 reinitialized by calling setup()).
72 """
73
74 def __init__(self, grammar):
75 # type: (Grammar) -> None
76 """Constructor.
77
78 The grammar argument is a grammar.Grammar instance; see the
79 grammar module for more information.
80
81 The parser is not ready yet for parsing; you must call the
82 setup() method to get it started.
83
84 A concrete syntax tree node is a (type, value, context, nodes)
85 tuple, where type is the node type (a token or symbol number),
86 value is None for symbols and a string for tokens, context is
87 None or an opaque value used for error reporting (typically a
88 (lineno, offset) pair), and nodes is a list of children for
89 symbols, and None for tokens.
90
91 An abstract syntax tree node may be anything; this is entirely
92 up to the converter function.
93 """
94 self.grammar = grammar
95 self.rootnode = None # type: Optional[PNode]
96 self.stack = [] # type: List[_StackItem]
97 self.pnode_alloc = None # type: Optional[PNodeAllocator]
98
99 def setup(self, start, pnode_alloc):
100 # type: (int, PNodeAllocator) -> None
101 """Prepare for parsing.
102
103 This *must* be called before starting to parse.
104
105 The optional argument is an alternative start symbol; it
106 defaults to the grammar's start symbol.
107
108 You can use a Parser instance to parse any number of programs;
109 each time you call setup() the parser is reset to an initial
110 state determined by the (implicit or explicit) start symbol.
111 """
112 self.pnode_alloc = pnode_alloc
113 newnode = self.pnode_alloc.NewPNode(start, None)
114 # Each stack entry is a tuple: (dfa, state, node).
115 self.stack = [_StackItem(self.grammar.dfas[start], 0, newnode)]
116 self.rootnode = None
117
118 def addtoken(self, typ, opaque, ilabel):
119 # type: (int, Token, int) -> bool
120 """Add a token; return True iff this is the end of the program."""
121 # Loop until the token is shifted; may raise exceptions
122
123 # Andy NOTE: This is not linear time, i.e. a constant amount of work
124 # for each token? Is it O(n^2) as the ANTLR paper says?
125 # Do the "accelerators" in pgen.c have anything to do with it?
126
127 while True:
128 top = self.stack[-1]
129 states, _ = top.dfa
130 state = top.state
131
132 arcs = states[state]
133
134 # Look for a state with this label
135 found = False
136 for ilab, newstate in arcs:
137 t = self.grammar.labels[ilab]
138 if ilabel == ilab:
139 # Look it up in the list of labels
140 assert t < 256, t
141 # Shift a token; we're done with it
142 self.shift(typ, opaque, newstate)
143 # Pop while we are in an accept-only state
144 state = newstate
145
146 while True:
147 # mycpp: rewrite of tuple-in-list comparison
148 if len(states[state]) != 1:
149 break
150
151 s0, s1 = states[state][0]
152 if s0 != 0 or s1 != state:
153 break
154
155 self.pop()
156 if len(self.stack) == 0:
157 # Done parsing!
158 return True
159 top = self.stack[-1]
160 states, _ = top.dfa
161 state = top.state
162
163 # Done with this token
164 return False
165 elif t >= 256:
166 # See if it's a symbol and if we're in its first set
167 itsdfa = self.grammar.dfas[t]
168 _, itsfirst = itsdfa
169 if ilabel in itsfirst:
170 # Push a symbol
171 self.push(t, opaque, self.grammar.dfas[t], newstate)
172 found = True
173 break # To continue the outer while loop
174
175 if not found:
176 # Note: this condition was rewritten for mycpp tarnslation.
177 # if (0, state) in arcs:
178 # ...
179 # else:
180 # ...
181 found2 = False
182 for left, right in arcs:
183 if left == 0 and right == state:
184 # An accepting state, pop it and try something else
185 self.pop()
186 if len(self.stack) == 0:
187 # Done parsing, but another token is input
188 raise ParseError("too much input", typ, opaque)
189 found2 = True
190
191 if not found2:
192 # No success finding a transition
193 raise ParseError("bad input", typ, opaque)
194
195 def shift(self, typ, opaque, newstate):
196 # type: (int, Token, int) -> None
197 """Shift a token. (Internal)"""
198 top = self.stack[-1]
199 newnode = self.pnode_alloc.NewPNode(typ, opaque)
200 top.node.AddChild(newnode)
201 self.stack[-1].state = newstate
202
203 def push(self, typ, opaque, newdfa, newstate):
204 # type: (int, Token, dfa_t, int) -> None
205 """Push a nonterminal. (Internal)"""
206 top = self.stack[-1]
207 newnode = self.pnode_alloc.NewPNode(typ, opaque)
208 self.stack[-1].state = newstate
209 self.stack.append(_StackItem(newdfa, 0, newnode))
210
211 def pop(self):
212 # type: () -> None
213 """Pop a nonterminal. (Internal)"""
214 top = self.stack.pop()
215 newnode = top.node
216 if len(self.stack):
217 top2 = self.stack[-1]
218 top2.node.AddChild(newnode)
219 else:
220 self.rootnode = newnode