OILS
/
frontend
/
lexer_def_test.py
1 |
#!/usr/bin/env python2
|
2 |
"""
|
3 |
lexer_def_test.py: Tests for lexer_def.py
|
4 |
"""
|
5 |
from __future__ import print_function
|
6 |
|
7 |
import re
|
8 |
import unittest
|
9 |
|
10 |
from _devbuild.gen.id_kind_asdl import Id, Id_str, Kind
|
11 |
from _devbuild.gen.types_asdl import lex_mode_e
|
12 |
from core.test_lib import Tok
|
13 |
from mycpp.mylib import log
|
14 |
from core import test_lib
|
15 |
from frontend import lexer_def
|
16 |
from frontend import consts
|
17 |
from frontend import match
|
18 |
|
19 |
_ = log
|
20 |
|
21 |
|
22 |
def _InitLexer(s):
|
23 |
arena = test_lib.MakeArena('<lex_test.py>')
|
24 |
_, lexer = test_lib.InitLexer(s, arena)
|
25 |
return lexer
|
26 |
|
27 |
|
28 |
class AsdlTest(unittest.TestCase):
|
29 |
def testLexMode(self):
|
30 |
print(lex_mode_e.DQ)
|
31 |
|
32 |
|
33 |
CMD = """\
|
34 |
ls /
|
35 |
ls /home/
|
36 |
"""
|
37 |
|
38 |
|
39 |
class LexerTest(unittest.TestCase):
|
40 |
def assertTokensEqual(self, left, right):
|
41 |
self.assertTrue(test_lib.TokensEqual(left, right),
|
42 |
'Expected %r, got %r' % (left, right))
|
43 |
|
44 |
def testRead(self):
|
45 |
lexer = _InitLexer(CMD)
|
46 |
|
47 |
t = lexer.Read(lex_mode_e.ShCommand)
|
48 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'ls'), t)
|
49 |
t = lexer.Read(lex_mode_e.ShCommand)
|
50 |
|
51 |
self.assertTokensEqual(Tok(Id.WS_Space, None), t)
|
52 |
|
53 |
t = lexer.Read(lex_mode_e.ShCommand)
|
54 |
self.assertTokensEqual(Tok(Id.Lit_Chars, '/'), t)
|
55 |
|
56 |
t = lexer.Read(lex_mode_e.ShCommand)
|
57 |
self.assertTokensEqual(Tok(Id.Op_Newline, None), t)
|
58 |
|
59 |
# Line two
|
60 |
t = lexer.Read(lex_mode_e.ShCommand)
|
61 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'ls'), t)
|
62 |
|
63 |
t = lexer.Read(lex_mode_e.ShCommand)
|
64 |
self.assertTokensEqual(Tok(Id.WS_Space, None), t)
|
65 |
|
66 |
t = lexer.Read(lex_mode_e.ShCommand)
|
67 |
self.assertTokensEqual(Tok(Id.Lit_Chars, '/home/'), t)
|
68 |
|
69 |
t = lexer.Read(lex_mode_e.ShCommand)
|
70 |
self.assertTokensEqual(Tok(Id.Op_Newline, None), t)
|
71 |
|
72 |
t = lexer.Read(lex_mode_e.ShCommand)
|
73 |
self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
|
74 |
|
75 |
# Another EOF gives EOF
|
76 |
t = lexer.Read(lex_mode_e.ShCommand)
|
77 |
self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
|
78 |
|
79 |
def testMode_VSub_ArgUnquoted(self):
|
80 |
# Another EOF gives EOF
|
81 |
lexer = _InitLexer("'hi'")
|
82 |
t = lexer.Read(lex_mode_e.VSub_ArgUnquoted)
|
83 |
#self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
|
84 |
#t = l.Read(lex_mode_e.VSub_ArgUnquoted)
|
85 |
print(t)
|
86 |
|
87 |
def testMode_ExtGlob(self):
|
88 |
lexer = _InitLexer('@(foo|bar)')
|
89 |
|
90 |
t = lexer.Read(lex_mode_e.ShCommand)
|
91 |
self.assertTokensEqual(Tok(Id.ExtGlob_At, '@('), t)
|
92 |
|
93 |
t = lexer.Read(lex_mode_e.ExtGlob)
|
94 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'foo'), t)
|
95 |
|
96 |
t = lexer.Read(lex_mode_e.ExtGlob)
|
97 |
self.assertTokensEqual(Tok(Id.Op_Pipe, None), t)
|
98 |
|
99 |
t = lexer.Read(lex_mode_e.ExtGlob)
|
100 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'bar'), t)
|
101 |
|
102 |
t = lexer.Read(lex_mode_e.ExtGlob)
|
103 |
self.assertTokensEqual(Tok(Id.Op_RParen, None), t)
|
104 |
|
105 |
# Individual cases
|
106 |
|
107 |
lexer = _InitLexer('@(')
|
108 |
t = lexer.Read(lex_mode_e.ExtGlob)
|
109 |
self.assertTokensEqual(Tok(Id.ExtGlob_At, '@('), t)
|
110 |
|
111 |
lexer = _InitLexer('*(')
|
112 |
t = lexer.Read(lex_mode_e.ExtGlob)
|
113 |
self.assertTokensEqual(Tok(Id.ExtGlob_Star, '*('), t)
|
114 |
|
115 |
lexer = _InitLexer('?(')
|
116 |
t = lexer.Read(lex_mode_e.ExtGlob)
|
117 |
self.assertTokensEqual(Tok(Id.ExtGlob_QMark, '?('), t)
|
118 |
|
119 |
lexer = _InitLexer('$')
|
120 |
t = lexer.Read(lex_mode_e.ExtGlob)
|
121 |
self.assertTokensEqual(Tok(Id.Lit_Other, '$'), t)
|
122 |
|
123 |
def testMode_BashRegex(self):
|
124 |
lexer = _InitLexer('(foo|bar)')
|
125 |
|
126 |
t = lexer.Read(lex_mode_e.BashRegex)
|
127 |
self.assertTokensEqual(Tok(Id.Lit_Other, '('), t)
|
128 |
|
129 |
t = lexer.Read(lex_mode_e.BashRegex)
|
130 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'foo'), t)
|
131 |
|
132 |
t = lexer.Read(lex_mode_e.BashRegex)
|
133 |
self.assertTokensEqual(Tok(Id.Lit_Other, '|'), t)
|
134 |
|
135 |
def testMode_DBracket(self):
|
136 |
lex = _InitLexer('-z foo')
|
137 |
t = lex.Read(lex_mode_e.DBracket)
|
138 |
self.assertTokensEqual(Tok(Id.BoolUnary_z, '-z'), t)
|
139 |
self.assertEqual(Kind.BoolUnary, consts.GetKind(t.id))
|
140 |
|
141 |
def testMode_DollarSq(self):
|
142 |
lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
|
143 |
|
144 |
t = lexer.Read(lex_mode_e.SQ_C)
|
145 |
print(t)
|
146 |
self.assertTokensEqual(Tok(Id.Char_Literals, 'foo bar'), t)
|
147 |
|
148 |
t = lexer.Read(lex_mode_e.SQ_C)
|
149 |
print(t)
|
150 |
self.assertTokensEqual(Tok(Id.Char_OneChar, r'\n'), t)
|
151 |
|
152 |
def testMode_Backtick(self):
|
153 |
CASES = [
|
154 |
r'echo \" \\ hi`',
|
155 |
r'`',
|
156 |
r'',
|
157 |
]
|
158 |
|
159 |
for case in CASES:
|
160 |
print()
|
161 |
print('--- %s ---' % case)
|
162 |
print()
|
163 |
|
164 |
lexer = _InitLexer(case)
|
165 |
|
166 |
while True:
|
167 |
t = lexer.Read(lex_mode_e.Backtick)
|
168 |
print(t)
|
169 |
if t.id == Id.Eof_Real:
|
170 |
break
|
171 |
|
172 |
def testMode_Printf(self):
|
173 |
CASES = [
|
174 |
r'hello %s\n',
|
175 |
r'%% percent %%\377',
|
176 |
]
|
177 |
|
178 |
for case in CASES:
|
179 |
print()
|
180 |
print('--- %s ---' % case)
|
181 |
print()
|
182 |
|
183 |
lexer = _InitLexer(case)
|
184 |
|
185 |
while True:
|
186 |
t = lexer.Read(lex_mode_e.PrintfOuter)
|
187 |
print(t)
|
188 |
if t.id == Id.Eof_Real:
|
189 |
break
|
190 |
|
191 |
# Now test the Printf_Percent mode
|
192 |
CASES = [r'-3.3f', r'03d']
|
193 |
|
194 |
for case in CASES:
|
195 |
print()
|
196 |
print('--- %s ---' % case)
|
197 |
print()
|
198 |
|
199 |
lexer = _InitLexer(case)
|
200 |
|
201 |
while True:
|
202 |
t = lexer.Read(lex_mode_e.PrintfPercent)
|
203 |
print(t)
|
204 |
if t.id == Id.Eof_Real:
|
205 |
break
|
206 |
|
207 |
def testMode_Expr(self):
|
208 |
CASES = [
|
209 |
r'@[ ]',
|
210 |
]
|
211 |
|
212 |
for case in CASES:
|
213 |
print()
|
214 |
print('--- %s ---' % case)
|
215 |
print()
|
216 |
|
217 |
lexer = _InitLexer(case)
|
218 |
|
219 |
while True:
|
220 |
t = lexer.Read(lex_mode_e.Expr)
|
221 |
print(t)
|
222 |
if t.id == Id.Eof_Real:
|
223 |
break
|
224 |
|
225 |
def testLookPastSpace(self):
|
226 |
# I think this is the usage pattern we care about. Peek and Next() past
|
227 |
# the function; then Peek() the next token. Then Lookahead in that state.
|
228 |
lexer = _InitLexer('fun()')
|
229 |
|
230 |
t = lexer.Read(lex_mode_e.ShCommand)
|
231 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'fun'), t)
|
232 |
|
233 |
t = lexer.Read(lex_mode_e.ShCommand)
|
234 |
self.assertTokensEqual(Tok(Id.Op_LParen, None), t)
|
235 |
|
236 |
self.assertEqual(Id.Op_RParen,
|
237 |
lexer.LookPastSpace(lex_mode_e.ShCommand))
|
238 |
|
239 |
lexer = _InitLexer('fun ()')
|
240 |
|
241 |
t = lexer.Read(lex_mode_e.ShCommand)
|
242 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'fun'), t)
|
243 |
|
244 |
t = lexer.Read(lex_mode_e.ShCommand)
|
245 |
self.assertTokensEqual(Tok(Id.WS_Space, None), t)
|
246 |
|
247 |
self.assertEqual(Id.Op_LParen,
|
248 |
lexer.LookPastSpace(lex_mode_e.ShCommand))
|
249 |
|
250 |
def testPushHint(self):
|
251 |
# Extglob use case
|
252 |
lexer = _InitLexer('@()')
|
253 |
lexer.PushHint(Id.Op_RParen, Id.Right_ExtGlob)
|
254 |
|
255 |
t = lexer.Read(lex_mode_e.ShCommand)
|
256 |
self.assertTokensEqual(Tok(Id.ExtGlob_At, '@('), t)
|
257 |
|
258 |
t = lexer.Read(lex_mode_e.ShCommand)
|
259 |
self.assertTokensEqual(Tok(Id.Right_ExtGlob, None), t)
|
260 |
|
261 |
t = lexer.Read(lex_mode_e.ShCommand)
|
262 |
self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
|
263 |
|
264 |
def testEmitCompDummy(self):
|
265 |
lexer = _InitLexer('echo ')
|
266 |
lexer.EmitCompDummy()
|
267 |
|
268 |
t = lexer.Read(lex_mode_e.ShCommand)
|
269 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'echo'), t)
|
270 |
|
271 |
t = lexer.Read(lex_mode_e.ShCommand)
|
272 |
self.assertTokensEqual(Tok(Id.WS_Space, None), t)
|
273 |
|
274 |
# Right before EOF
|
275 |
t = lexer.Read(lex_mode_e.ShCommand)
|
276 |
self.assertTokensEqual(Tok(Id.Lit_CompDummy, ''), t)
|
277 |
|
278 |
t = lexer.Read(lex_mode_e.ShCommand)
|
279 |
self.assertTokensEqual(Tok(Id.Eof_Real, ''), t)
|
280 |
|
281 |
|
282 |
class LineLexerTest(unittest.TestCase):
|
283 |
def setUp(self):
|
284 |
self.arena = test_lib.MakeArena('<lex_test.py>')
|
285 |
|
286 |
def assertTokensEqual(self, left, right):
|
287 |
#log('LEFT %s', left)
|
288 |
#log('RIGHT %s', right)
|
289 |
# self.assertTrue(test_lib.TokensEqual(left, right))
|
290 |
self.assertEqual(left.id, right.id,
|
291 |
'%s != %s' % (Id_str(left.id), Id_str(right.id)))
|
292 |
self.assertEqual(left.tval, right.tval)
|
293 |
|
294 |
def testReadOuter(self):
|
295 |
l = test_lib.InitLineLexer('\n', self.arena)
|
296 |
self.assertTokensEqual(Tok(Id.Op_Newline, None),
|
297 |
l.Read(lex_mode_e.ShCommand))
|
298 |
|
299 |
def testRead_VSub_ArgUnquoted(self):
|
300 |
l = test_lib.InitLineLexer("'hi'", self.arena)
|
301 |
t = l.Read(lex_mode_e.VSub_ArgUnquoted)
|
302 |
self.assertEqual(Id.Left_SingleQuote, t.id)
|
303 |
|
304 |
def testLookPastSpace(self):
|
305 |
# Lines always end with '\n'
|
306 |
l = test_lib.InitLineLexer('', self.arena)
|
307 |
self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
|
308 |
|
309 |
l = test_lib.InitLineLexer('foo', self.arena)
|
310 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'foo'),
|
311 |
l.Read(lex_mode_e.ShCommand))
|
312 |
self.assertEqual(Id.Unknown_Tok, l.LookPastSpace(lex_mode_e.ShCommand))
|
313 |
|
314 |
l = test_lib.InitLineLexer('foo bar', self.arena)
|
315 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'foo'),
|
316 |
l.Read(lex_mode_e.ShCommand))
|
317 |
self.assertEqual(Id.Lit_Chars, l.LookPastSpace(lex_mode_e.ShCommand))
|
318 |
|
319 |
# No lookahead; using the cursor!
|
320 |
l = test_lib.InitLineLexer('fun(', self.arena)
|
321 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'fun'),
|
322 |
l.Read(lex_mode_e.ShCommand))
|
323 |
self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
|
324 |
|
325 |
l = test_lib.InitLineLexer('fun (', self.arena)
|
326 |
self.assertTokensEqual(Tok(Id.Lit_Chars, 'fun'),
|
327 |
l.Read(lex_mode_e.ShCommand))
|
328 |
self.assertEqual(Id.Op_LParen, l.LookPastSpace(lex_mode_e.ShCommand))
|
329 |
|
330 |
|
331 |
class RegexTest(unittest.TestCase):
|
332 |
def testNul(self):
|
333 |
nul_pat = re.compile(r'[\0]')
|
334 |
self.assertEqual(False, bool(nul_pat.match('x')))
|
335 |
self.assertEqual(True, bool(nul_pat.match('\0')))
|
336 |
|
337 |
_, p, _ = lexer_def.ECHO_E_DEF[-1]
|
338 |
print('P %r' % p)
|
339 |
last_echo_e_pat = re.compile(p)
|
340 |
self.assertEqual(True, bool(last_echo_e_pat.match('x')))
|
341 |
self.assertEqual(False, bool(last_echo_e_pat.match('\0')))
|
342 |
|
343 |
|
344 |
class OtherLexerTest(unittest.TestCase):
|
345 |
def testEchoLexer(self):
|
346 |
CASES = [
|
347 |
r'newline \n NUL \0 octal \0377 hex \x00',
|
348 |
r'unicode \u0065 \U00000065',
|
349 |
r'\d \e \f \g',
|
350 |
]
|
351 |
for s in CASES:
|
352 |
lex = match.EchoLexer(s)
|
353 |
print(lex.Tokens())
|
354 |
|
355 |
def testPS1Lexer(self):
|
356 |
print(list(match.Ps1Tokens(r'foo')))
|
357 |
print(list(match.Ps1Tokens(r'\h \w \$')))
|
358 |
|
359 |
def testHistoryLexer(self):
|
360 |
print(list(match.HistoryTokens(r'echo hi')))
|
361 |
|
362 |
print(list(match.HistoryTokens(r'echo !! !* !^ !$')))
|
363 |
|
364 |
# No history operator with \ escape
|
365 |
tokens = list(match.HistoryTokens(r'echo \!!'))
|
366 |
print(tokens)
|
367 |
self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
|
368 |
|
369 |
print(list(match.HistoryTokens(r'echo !3...')))
|
370 |
print(list(match.HistoryTokens(r'echo !-5...')))
|
371 |
print(list(match.HistoryTokens(r'echo !x/foo.py bar')))
|
372 |
|
373 |
print('---')
|
374 |
|
375 |
# No history operator in single quotes
|
376 |
tokens = list(match.HistoryTokens(r"echo '!!' $'!!' "))
|
377 |
print(tokens)
|
378 |
self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
|
379 |
|
380 |
# No history operator in incomplete single quotes
|
381 |
tokens = list(match.HistoryTokens(r"echo '!! "))
|
382 |
print(tokens)
|
383 |
self.assert_(Id.History_Op not in [tok_type for tok_type, _ in tokens])
|
384 |
|
385 |
# Quoted single quote, and then a History operator
|
386 |
tokens = list(match.HistoryTokens(r"echo \' !! "))
|
387 |
print(tokens)
|
388 |
# YES operator
|
389 |
self.assert_(Id.History_Op in [tok_type for tok_type, _ in tokens])
|
390 |
|
391 |
def testHistoryDoesNotConflict(self):
|
392 |
# https://github.com/oilshell/oil/issues/264
|
393 |
#
|
394 |
# Bash has a bunch of hacks to suppress the conflict between ! for history
|
395 |
# and:
|
396 |
#
|
397 |
# 1. [!abc] globbing
|
398 |
# 2. ${!foo} indirect expansion
|
399 |
# 3. $!x -- the PID
|
400 |
# 4. !(foo|bar) -- extended glob
|
401 |
#
|
402 |
# I guess [[ a != b ]] doesn't match the pattern in bash.
|
403 |
|
404 |
three_other = [Id.History_Other, Id.History_Other, Id.History_Other]
|
405 |
two_other = [Id.History_Other, Id.History_Other]
|
406 |
CASES = [
|
407 |
(r'[!abc]', three_other),
|
408 |
(r'${!indirect}', three_other),
|
409 |
(r'$!x', three_other), # didn't need a special case
|
410 |
(r'!(foo|bar)', two_other), # didn't need a special case
|
411 |
]
|
412 |
|
413 |
for s, expected_types in CASES:
|
414 |
tokens = list(match.HistoryTokens(s))
|
415 |
print(tokens)
|
416 |
actual_types = [id_ for id_, val in tokens]
|
417 |
|
418 |
self.assert_(Id.History_Search not in actual_types, tokens)
|
419 |
|
420 |
self.assertEqual(expected_types, actual_types)
|
421 |
|
422 |
def testBraceRangeLexer(self):
|
423 |
CASES = [
|
424 |
'a..z',
|
425 |
'100..300',
|
426 |
'-300..-100..1',
|
427 |
'1.3', # invalid
|
428 |
'aa',
|
429 |
]
|
430 |
for s in CASES:
|
431 |
lex = match.BraceRangeLexer(s)
|
432 |
print(lex.Tokens())
|
433 |
|
434 |
|
435 |
if __name__ == '__main__':
|
436 |
unittest.main()
|