1 #!/usr/bin/env python2
2 """
3 grammar_gen.py - Use pgen2 to generate tables from Oil's grammar.
4 """
5 from __future__ import print_function
6
7 import os
8 import sys
9
10 from _devbuild.gen.id_kind_asdl import Id, Kind
11 from _devbuild.gen.syntax_asdl import source
12
13 from core import alloc
14 from core import optview
15 from mycpp.mylib import log
16 from frontend import lexer
17 from frontend import lexer_def
18 from frontend import reader
19 from pgen2 import parse, pgen
20
21
22 class OilTokenDef(object):
23 def __init__(self, ops, more_ops, keyword_ops):
24 self.ops = ops
25 self.more_ops = more_ops
26 self.keyword_ops = keyword_ops
27
28 def GetTerminalNum(self, label):
29 """e.g. translate Expr_Name in the grammar to 178."""
30 id_ = getattr(Id, label)
31 #log('Id %s = %d', id_, id_)
32 assert id_ < 256, id_
33 return id_
34
35 def GetKeywordNum(self, s):
36 """e.g 'xor' -> Id.Expr_Xor.
37
38 Python doesn't have this, but Oil does. Returns None if not
39 found.
40 """
41 id_ = self.keyword_ops.get(s)
42 if id_ is None:
43 return None
44 assert id_ < 256, id_
45 return id_
46
47 def GetOpNum(self, op_str):
48 """
49 Args:
50 op_str: '>='
51
52 Returns:
53 Integer for '>=' or Id.Arith_GreatEqual
54 """
55 id_ = self.ops.get(op_str) or self.more_ops[op_str] # Fail if not there
56 assert id_ < 256, id_
57 return id_
58
59
60 def MakeOilLexer(code_str, arena):
61 arena.PushSource(source.MainFile('pgen2_main'))
62 line_reader = reader.StringLineReader(code_str, arena)
63 line_lexer = lexer.LineLexer(arena)
64 lex = lexer.Lexer(line_lexer, line_reader)
65 return lex
66
67
68 def main(argv):
69 action = argv[1]
70 argv = argv[2:]
71
72 # Used at grammar BUILD time.
73 OPS = {
74 '!': Id.Expr_Bang,
75 '.': Id.Expr_Dot,
76 '->': Id.Expr_RArrow,
77 '::': Id.Expr_DColon,
78 '//': Id.Expr_DSlash,
79 '++': Id.Arith_DPlus,
80 '!~': Id.Expr_NotTilde,
81 '~~': Id.Expr_DTilde,
82 '!~~': Id.Expr_NotDTilde,
83 '~==': Id.Expr_TildeDEqual,
84 '===': Id.Expr_TEqual,
85 '!==': Id.Expr_NotDEqual,
86 '@': Id.Expr_At,
87 '...': Id.Expr_Ellipsis,
88 '$': Id.Expr_Dollar, # Only for legacy eggex /d+$/
89 }
90
91 # Note: We have two lists of ops because Id.Op_Semi is used, not
92 # Id.Arith_Semi.
93 for _, token_str, id_ in lexer_def.EXPR_OPS:
94 assert token_str not in OPS, token_str
95 OPS[token_str] = id_
96
97 # Tokens that look like / or ${ or @{
98 triples = (lexer_def.ID_SPEC.LexerPairs(Kind.Arith) +
99 lexer_def.YSH_LEFT_SUBS + lexer_def.YSH_LEFT_UNQUOTED +
100 lexer_def.EXPR_WORDS)
101 more_ops = {}
102 for _, token_str, id_ in triples:
103 if token_str in more_ops:
104 import pprint
105 raise AssertionError(
106 '%r %s' % (token_str, pprint.pformat(more_ops, indent=2)))
107 more_ops[token_str] = id_
108
109 # Tokens that look like 'for'
110 keyword_ops = {}
111 for _, token_str, id_ in lexer_def.EXPR_WORDS: # for, in, etc.
112 assert token_str not in keyword_ops, token_str
113 keyword_ops[token_str] = id_
114
115 if 0:
116 from pprint import pprint
117 pprint(OPS)
118 print('---')
119 pprint(more_ops)
120 print('---')
121 pprint(keyword_ops)
122 print('---')
123
124 tok_def = OilTokenDef(OPS, more_ops, keyword_ops)
125
126 if action == 'py': # generate the grammar and parse it
127 grammar_path = argv[0]
128 out_dir = argv[1]
129
130 basename, _ = os.path.splitext(os.path.basename(grammar_path))
131
132 # HACK for find:
133 if basename == 'find':
134 from tools.find import tokenizer as find_tokenizer
135 tok_def = find_tokenizer.TokenDef()
136
137 with open(grammar_path) as f:
138 gr = pgen.MakeGrammar(f, tok_def=tok_def)
139
140 marshal_path = os.path.join(out_dir, basename + '.marshal')
141 with open(marshal_path, 'wb') as out_f:
142 gr.dump(out_f)
143
144 nonterm_py = os.path.join(out_dir, basename + '_nt.py')
145 with open(nonterm_py, 'w') as out_f:
146 gr.dump_nonterminals_py(out_f)
147
148 log('%s -> (ysh/grammar_gen) -> %s/%s{.marshal,_nt.py}', grammar_path,
149 out_dir, basename)
150
151 #gr.report()
152
153 elif action == 'cpp': # generate the grammar and parse it
154 grammar_path = argv[0]
155 out_dir = argv[1]
156
157 basename, _ = os.path.splitext(os.path.basename(grammar_path))
158
159 with open(grammar_path) as f:
160 gr = pgen.MakeGrammar(f, tok_def=tok_def)
161
162 nonterm_h = os.path.join(out_dir, basename + '_nt.h')
163 with open(nonterm_h, 'w') as out_f:
164 gr.dump_nonterminals_cpp(out_f)
165
166 grammar_cpp_path = os.path.join(out_dir, basename + '_tables.cc')
167 with open(grammar_cpp_path, 'w') as src_f:
168 gr.dump_cpp(src_f)
169
170 if 0:
171 log('%s -> (ysh/grammar_gen) -> %s/%s._nt.h', grammar_path, out_dir,
172 basename)
173
174 elif action == 'parse': # generate the grammar and parse it
175 # Remove build dependency
176 from frontend import parse_lib
177 from ysh import expr_parse
178 from ysh import expr_to_ast
179
180 grammar_path = argv[0]
181 start_symbol = argv[1]
182 code_str = argv[2]
183
184 # For choosing lexer and semantic actions
185 grammar_name, _ = os.path.splitext(os.path.basename(grammar_path))
186
187 with open(grammar_path) as f:
188 gr = pgen.MakeGrammar(f, tok_def=tok_def)
189
190 arena = alloc.Arena()
191 lex_ = MakeOilLexer(code_str, arena)
192
193 is_expr = grammar_name in ('calc', 'grammar')
194
195 parse_opts = optview.Parse([], [])
196 parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr)
197 p = expr_parse.ExprParser(parse_ctx, gr, False)
198 try:
199 with expr_parse.ctx_PNodeAllocator(p):
200 pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol])
201 except parse.ParseError as e:
202 log('Parse Error: %s', e)
203 return 1
204
205 names = expr_to_ast.MakeGrammarNames(gr)
206 p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes
207 p_printer.Print(pnode)
208
209 if is_expr:
210 tr = expr_to_ast.Transformer(gr)
211 if start_symbol == 'eval_input':
212 ast_node = tr.Expr(pnode)
213 elif start_symbol == 'ysh_case_pat':
214 ast_node = tr.YshCasePattern(pnode)
215 else:
216 ast_node = tr.VarDecl(pnode)
217 ast_node.PrettyPrint()
218 print()
219
220 elif action == 'stdlib-test':
221 # This shows how deep Python's parse tree is. It doesn't use semantic
222 # actions to prune on the fly!
223
224 import parser # builtin module
225 t = parser.expr('1+2')
226 print(t)
227 t2 = parser.st2tuple(t)
228 print(t2)
229
230 else:
231 raise RuntimeError('Invalid action %r' % action)
232
233
234 if __name__ == '__main__':
235 try:
236 sys.exit(main(sys.argv))
237 except RuntimeError as e:
238 print('FATAL: %s' % e, file=sys.stderr)
239 sys.exit(1)