1# testlex.py
2
3import unittest
4try:
5    import StringIO
6except ImportError:
7    import io as StringIO
8
9import sys
10sys.path.insert(0,"..")
11sys.tracebacklimit = 0
12
13import ply.lex
14
15def check_expected(result,expected):
16    if sys.version_info[0] >= 3:
17        if isinstance(result,str):
18            result = result.encode('ascii')
19        if isinstance(expected,str):
20            expected = expected.encode('ascii')
21    resultlines = result.splitlines()
22    expectedlines = expected.splitlines()
23
24
25    if len(resultlines) != len(expectedlines):
26        return False
27
28    for rline,eline in zip(resultlines,expectedlines):
29        if not rline.endswith(eline):
30            return False
31    return True
32
33def run_import(module):
34    code = "import "+module
35    exec(code)
36    del sys.modules[module]
37
38# Tests related to errors and warnings when building lexers
39class LexErrorWarningTests(unittest.TestCase):
40    def setUp(self):
41        sys.stderr = StringIO.StringIO()
42        sys.stdout = StringIO.StringIO()
43    def tearDown(self):
44        sys.stderr = sys.__stderr__
45        sys.stdout = sys.__stdout__
46    def test_lex_doc1(self):
47        self.assertRaises(SyntaxError,run_import,"lex_doc1")
48        result = sys.stderr.getvalue()
49        self.assert_(check_expected(result,
50                              "lex_doc1.py:18: No regular expression defined for rule 't_NUMBER'\n"))
51    def test_lex_dup1(self):
52        self.assertRaises(SyntaxError,run_import,"lex_dup1")
53        result = sys.stderr.getvalue()
54        self.assert_(check_expected(result,
55                                    "lex_dup1.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" ))
56
57    def test_lex_dup2(self):
58        self.assertRaises(SyntaxError,run_import,"lex_dup2")
59        result = sys.stderr.getvalue()
60        self.assert_(check_expected(result,
61                                    "lex_dup2.py:22: Rule t_NUMBER redefined. Previously defined on line 18\n" ))
62
63    def test_lex_dup3(self):
64        self.assertRaises(SyntaxError,run_import,"lex_dup3")
65        result = sys.stderr.getvalue()
66        self.assert_(check_expected(result,
67                                    "lex_dup3.py:20: Rule t_NUMBER redefined. Previously defined on line 18\n" ))
68
69    def test_lex_empty(self):
70        self.assertRaises(SyntaxError,run_import,"lex_empty")
71        result = sys.stderr.getvalue()
72        self.assert_(check_expected(result,
73                                    "No rules of the form t_rulename are defined\n"
74                                    "No rules defined for state 'INITIAL'\n"))
75
76    def test_lex_error1(self):
77        run_import("lex_error1")
78        result = sys.stderr.getvalue()
79        self.assert_(check_expected(result,
80                                    "No t_error rule is defined\n"))
81
82    def test_lex_error2(self):
83        self.assertRaises(SyntaxError,run_import,"lex_error2")
84        result = sys.stderr.getvalue()
85        self.assert_(check_expected(result,
86                                    "Rule 't_error' must be defined as a function\n")
87                     )
88
89    def test_lex_error3(self):
90        self.assertRaises(SyntaxError,run_import,"lex_error3")
91        result = sys.stderr.getvalue()
92        self.assert_(check_expected(result,
93                                    "lex_error3.py:20: Rule 't_error' requires an argument\n"))
94
95    def test_lex_error4(self):
96        self.assertRaises(SyntaxError,run_import,"lex_error4")
97        result = sys.stderr.getvalue()
98        self.assert_(check_expected(result,
99                                    "lex_error4.py:20: Rule 't_error' has too many arguments\n"))
100
101    def test_lex_ignore(self):
102        self.assertRaises(SyntaxError,run_import,"lex_ignore")
103        result = sys.stderr.getvalue()
104        self.assert_(check_expected(result,
105                                    "lex_ignore.py:20: Rule 't_ignore' must be defined as a string\n"))
106
107    def test_lex_ignore2(self):
108        run_import("lex_ignore2")
109        result = sys.stderr.getvalue()
110        self.assert_(check_expected(result,
111                                    "t_ignore contains a literal backslash '\\'\n"))
112
113
114    def test_lex_re1(self):
115        self.assertRaises(SyntaxError,run_import,"lex_re1")
116        result = sys.stderr.getvalue()
117        self.assert_(check_expected(result,
118                                    "Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis\n"))
119
120    def test_lex_re2(self):
121        self.assertRaises(SyntaxError,run_import,"lex_re2")
122        result = sys.stderr.getvalue()
123        self.assert_(check_expected(result,
124                                    "Regular expression for rule 't_PLUS' matches empty string\n"))
125
126    def test_lex_re3(self):
127        self.assertRaises(SyntaxError,run_import,"lex_re3")
128        result = sys.stderr.getvalue()
129        self.assert_(check_expected(result,
130                                    "Invalid regular expression for rule 't_POUND'. unbalanced parenthesis\n"
131                                    "Make sure '#' in rule 't_POUND' is escaped with '\\#'\n"))
132
133    def test_lex_rule1(self):
134        self.assertRaises(SyntaxError,run_import,"lex_rule1")
135        result = sys.stderr.getvalue()
136        self.assert_(check_expected(result,
137                                    "t_NUMBER not defined as a function or string\n"))
138
139    def test_lex_rule2(self):
140        self.assertRaises(SyntaxError,run_import,"lex_rule2")
141        result = sys.stderr.getvalue()
142        self.assert_(check_expected(result,
143                                    "lex_rule2.py:18: Rule 't_NUMBER' requires an argument\n"))
144
145    def test_lex_rule3(self):
146        self.assertRaises(SyntaxError,run_import,"lex_rule3")
147        result = sys.stderr.getvalue()
148        self.assert_(check_expected(result,
149                                    "lex_rule3.py:18: Rule 't_NUMBER' has too many arguments\n"))
150
151
152    def test_lex_state1(self):
153        self.assertRaises(SyntaxError,run_import,"lex_state1")
154        result = sys.stderr.getvalue()
155        self.assert_(check_expected(result,
156                                   "states must be defined as a tuple or list\n"))
157
158    def test_lex_state2(self):
159        self.assertRaises(SyntaxError,run_import,"lex_state2")
160        result = sys.stderr.getvalue()
161        self.assert_(check_expected(result,
162                                    "Invalid state specifier 'comment'. Must be a tuple (statename,'exclusive|inclusive')\n"
163                                    "Invalid state specifier 'example'. Must be a tuple (statename,'exclusive|inclusive')\n"))
164
165    def test_lex_state3(self):
166        self.assertRaises(SyntaxError,run_import,"lex_state3")
167        result = sys.stderr.getvalue()
168        self.assert_(check_expected(result,
169                                    "State name 1 must be a string\n"
170                                    "No rules defined for state 'example'\n"))
171
172    def test_lex_state4(self):
173        self.assertRaises(SyntaxError,run_import,"lex_state4")
174        result = sys.stderr.getvalue()
175        self.assert_(check_expected(result,
176                                    "State type for state comment must be 'inclusive' or 'exclusive'\n"))
177
178
179    def test_lex_state5(self):
180        self.assertRaises(SyntaxError,run_import,"lex_state5")
181        result = sys.stderr.getvalue()
182        self.assert_(check_expected(result,
183                                    "State 'comment' already defined\n"))
184
185    def test_lex_state_noerror(self):
186        run_import("lex_state_noerror")
187        result = sys.stderr.getvalue()
188        self.assert_(check_expected(result,
189                                    "No error rule is defined for exclusive state 'comment'\n"))
190
191    def test_lex_state_norule(self):
192        self.assertRaises(SyntaxError,run_import,"lex_state_norule")
193        result = sys.stderr.getvalue()
194        self.assert_(check_expected(result,
195                                    "No rules defined for state 'example'\n"))
196
197    def test_lex_token1(self):
198        self.assertRaises(SyntaxError,run_import,"lex_token1")
199        result = sys.stderr.getvalue()
200        self.assert_(check_expected(result,
201                                    "No token list is defined\n"
202                                    "Rule 't_NUMBER' defined for an unspecified token NUMBER\n"
203                                    "Rule 't_PLUS' defined for an unspecified token PLUS\n"
204                                    "Rule 't_MINUS' defined for an unspecified token MINUS\n"
205))
206
207    def test_lex_token2(self):
208        self.assertRaises(SyntaxError,run_import,"lex_token2")
209        result = sys.stderr.getvalue()
210        self.assert_(check_expected(result,
211                                    "tokens must be a list or tuple\n"
212                                    "Rule 't_NUMBER' defined for an unspecified token NUMBER\n"
213                                    "Rule 't_PLUS' defined for an unspecified token PLUS\n"
214                                    "Rule 't_MINUS' defined for an unspecified token MINUS\n"
215))
216
217    def test_lex_token3(self):
218        self.assertRaises(SyntaxError,run_import,"lex_token3")
219        result = sys.stderr.getvalue()
220        self.assert_(check_expected(result,
221                                    "Rule 't_MINUS' defined for an unspecified token MINUS\n"))
222
223
224    def test_lex_token4(self):
225        self.assertRaises(SyntaxError,run_import,"lex_token4")
226        result = sys.stderr.getvalue()
227        self.assert_(check_expected(result,
228                                    "Bad token name '-'\n"))
229
230
231    def test_lex_token5(self):
232        try:
233            run_import("lex_token5")
234        except ply.lex.LexError:
235            e = sys.exc_info()[1]
236        self.assert_(check_expected(str(e),"lex_token5.py:19: Rule 't_NUMBER' returned an unknown token type 'NUM'"))
237
238    def test_lex_token_dup(self):
239        run_import("lex_token_dup")
240        result = sys.stderr.getvalue()
241        self.assert_(check_expected(result,
242                                    "Token 'MINUS' multiply defined\n"))
243
244
245    def test_lex_literal1(self):
246        self.assertRaises(SyntaxError,run_import,"lex_literal1")
247        result = sys.stderr.getvalue()
248        self.assert_(check_expected(result,
249                                    "Invalid literal '**'. Must be a single character\n"))
250
251    def test_lex_literal2(self):
252        self.assertRaises(SyntaxError,run_import,"lex_literal2")
253        result = sys.stderr.getvalue()
254        self.assert_(check_expected(result,
255                                    "Invalid literals specification. literals must be a sequence of characters\n"))
256
257import os
258import subprocess
259import shutil
260
261# Tests related to various build options associated with lexers
262class LexBuildOptionTests(unittest.TestCase):
263    def setUp(self):
264        sys.stderr = StringIO.StringIO()
265        sys.stdout = StringIO.StringIO()
266    def tearDown(self):
267        sys.stderr = sys.__stderr__
268        sys.stdout = sys.__stdout__
269        try:
270            shutil.rmtree("lexdir")
271        except OSError:
272            pass
273
274    def test_lex_module(self):
275        run_import("lex_module")
276        result = sys.stdout.getvalue()
277        self.assert_(check_expected(result,
278                                    "(NUMBER,3,1,0)\n"
279                                    "(PLUS,'+',1,1)\n"
280                                    "(NUMBER,4,1,2)\n"))
281
282    def test_lex_object(self):
283        run_import("lex_object")
284        result = sys.stdout.getvalue()
285        self.assert_(check_expected(result,
286                                    "(NUMBER,3,1,0)\n"
287                                    "(PLUS,'+',1,1)\n"
288                                    "(NUMBER,4,1,2)\n"))
289
290    def test_lex_closure(self):
291        run_import("lex_closure")
292        result = sys.stdout.getvalue()
293        self.assert_(check_expected(result,
294                                    "(NUMBER,3,1,0)\n"
295                                    "(PLUS,'+',1,1)\n"
296                                    "(NUMBER,4,1,2)\n"))
297    def test_lex_optimize(self):
298        try:
299            os.remove("lextab.py")
300        except OSError:
301            pass
302        try:
303            os.remove("lextab.pyc")
304        except OSError:
305            pass
306        try:
307            os.remove("lextab.pyo")
308        except OSError:
309            pass
310        run_import("lex_optimize")
311
312        result = sys.stdout.getvalue()
313        self.assert_(check_expected(result,
314                                    "(NUMBER,3,1,0)\n"
315                                    "(PLUS,'+',1,1)\n"
316                                    "(NUMBER,4,1,2)\n"))
317        self.assert_(os.path.exists("lextab.py"))
318
319
320        p = subprocess.Popen([sys.executable,'-O','lex_optimize.py'],
321                             stdout=subprocess.PIPE)
322        result = p.stdout.read()
323
324        self.assert_(check_expected(result,
325                                    "(NUMBER,3,1,0)\n"
326                                    "(PLUS,'+',1,1)\n"
327                                    "(NUMBER,4,1,2)\n"))
328        self.assert_(os.path.exists("lextab.pyo"))
329
330        os.remove("lextab.pyo")
331        p = subprocess.Popen([sys.executable,'-OO','lex_optimize.py'],
332                             stdout=subprocess.PIPE)
333        result = p.stdout.read()
334        self.assert_(check_expected(result,
335                                    "(NUMBER,3,1,0)\n"
336                                    "(PLUS,'+',1,1)\n"
337                                    "(NUMBER,4,1,2)\n"))
338        self.assert_(os.path.exists("lextab.pyo"))
339        try:
340            os.remove("lextab.py")
341        except OSError:
342            pass
343        try:
344            os.remove("lextab.pyc")
345        except OSError:
346            pass
347        try:
348            os.remove("lextab.pyo")
349        except OSError:
350            pass
351
352    def test_lex_optimize2(self):
353        try:
354            os.remove("opt2tab.py")
355        except OSError:
356            pass
357        try:
358            os.remove("opt2tab.pyc")
359        except OSError:
360            pass
361        try:
362            os.remove("opt2tab.pyo")
363        except OSError:
364            pass
365        run_import("lex_optimize2")
366        result = sys.stdout.getvalue()
367        self.assert_(check_expected(result,
368                                    "(NUMBER,3,1,0)\n"
369                                    "(PLUS,'+',1,1)\n"
370                                    "(NUMBER,4,1,2)\n"))
371        self.assert_(os.path.exists("opt2tab.py"))
372
373        p = subprocess.Popen([sys.executable,'-O','lex_optimize2.py'],
374                             stdout=subprocess.PIPE)
375        result = p.stdout.read()
376        self.assert_(check_expected(result,
377                                    "(NUMBER,3,1,0)\n"
378                                    "(PLUS,'+',1,1)\n"
379                                    "(NUMBER,4,1,2)\n"))
380        self.assert_(os.path.exists("opt2tab.pyo"))
381        os.remove("opt2tab.pyo")
382        p = subprocess.Popen([sys.executable,'-OO','lex_optimize2.py'],
383                             stdout=subprocess.PIPE)
384        result = p.stdout.read()
385        self.assert_(check_expected(result,
386                                    "(NUMBER,3,1,0)\n"
387                                    "(PLUS,'+',1,1)\n"
388                                    "(NUMBER,4,1,2)\n"))
389        self.assert_(os.path.exists("opt2tab.pyo"))
390        try:
391            os.remove("opt2tab.py")
392        except OSError:
393            pass
394        try:
395            os.remove("opt2tab.pyc")
396        except OSError:
397            pass
398        try:
399            os.remove("opt2tab.pyo")
400        except OSError:
401            pass
402
403    def test_lex_optimize3(self):
404        try:
405            shutil.rmtree("lexdir")
406        except OSError:
407            pass
408
409        os.mkdir("lexdir")
410        os.mkdir("lexdir/sub")
411        open("lexdir/__init__.py","w").write("")
412        open("lexdir/sub/__init__.py","w").write("")
413        run_import("lex_optimize3")
414        result = sys.stdout.getvalue()
415        self.assert_(check_expected(result,
416                                    "(NUMBER,3,1,0)\n"
417                                    "(PLUS,'+',1,1)\n"
418                                    "(NUMBER,4,1,2)\n"))
419        self.assert_(os.path.exists("lexdir/sub/calctab.py"))
420
421        p = subprocess.Popen([sys.executable,'-O','lex_optimize3.py'],
422                             stdout=subprocess.PIPE)
423        result = p.stdout.read()
424        self.assert_(check_expected(result,
425                                    "(NUMBER,3,1,0)\n"
426                                    "(PLUS,'+',1,1)\n"
427                                    "(NUMBER,4,1,2)\n"))
428        self.assert_(os.path.exists("lexdir/sub/calctab.pyo"))
429        os.remove("lexdir/sub/calctab.pyo")
430        p = subprocess.Popen([sys.executable,'-OO','lex_optimize3.py'],
431                             stdout=subprocess.PIPE)
432        result = p.stdout.read()
433        self.assert_(check_expected(result,
434                                    "(NUMBER,3,1,0)\n"
435                                    "(PLUS,'+',1,1)\n"
436                                    "(NUMBER,4,1,2)\n"))
437        self.assert_(os.path.exists("lexdir/sub/calctab.pyo"))
438        try:
439            shutil.rmtree("lexdir")
440        except OSError:
441            pass
442
443    def test_lex_opt_alias(self):
444        try:
445            os.remove("aliastab.py")
446        except OSError:
447            pass
448        try:
449            os.remove("aliastab.pyc")
450        except OSError:
451            pass
452        try:
453            os.remove("aliastab.pyo")
454        except OSError:
455            pass
456        run_import("lex_opt_alias")
457        result = sys.stdout.getvalue()
458        self.assert_(check_expected(result,
459                                    "(NUMBER,3,1,0)\n"
460                                    "(+,'+',1,1)\n"
461                                    "(NUMBER,4,1,2)\n"))
462        self.assert_(os.path.exists("aliastab.py"))
463
464        p = subprocess.Popen([sys.executable,'-O','lex_opt_alias.py'],
465                             stdout=subprocess.PIPE)
466        result = p.stdout.read()
467        self.assert_(check_expected(result,
468                                    "(NUMBER,3,1,0)\n"
469                                    "(+,'+',1,1)\n"
470                                    "(NUMBER,4,1,2)\n"))
471        self.assert_(os.path.exists("aliastab.pyo"))
472        os.remove("aliastab.pyo")
473        p = subprocess.Popen([sys.executable,'-OO','lex_opt_alias.py'],
474                             stdout=subprocess.PIPE)
475        result = p.stdout.read()
476        self.assert_(check_expected(result,
477                                    "(NUMBER,3,1,0)\n"
478                                    "(+,'+',1,1)\n"
479                                    "(NUMBER,4,1,2)\n"))
480        self.assert_(os.path.exists("aliastab.pyo"))
481        try:
482            os.remove("aliastab.py")
483        except OSError:
484            pass
485        try:
486            os.remove("aliastab.pyc")
487        except OSError:
488            pass
489        try:
490            os.remove("aliastab.pyo")
491        except OSError:
492            pass
493
494    def test_lex_many_tokens(self):
495        try:
496            os.remove("manytab.py")
497        except OSError:
498            pass
499        try:
500            os.remove("manytab.pyc")
501        except OSError:
502            pass
503        try:
504            os.remove("manytab.pyo")
505        except OSError:
506            pass
507        run_import("lex_many_tokens")
508        result = sys.stdout.getvalue()
509        self.assert_(check_expected(result,
510                                    "(TOK34,'TOK34:',1,0)\n"
511                                    "(TOK143,'TOK143:',1,7)\n"
512                                    "(TOK269,'TOK269:',1,15)\n"
513                                    "(TOK372,'TOK372:',1,23)\n"
514                                    "(TOK452,'TOK452:',1,31)\n"
515                                    "(TOK561,'TOK561:',1,39)\n"
516                                    "(TOK999,'TOK999:',1,47)\n"
517                                    ))
518
519        self.assert_(os.path.exists("manytab.py"))
520
521        p = subprocess.Popen([sys.executable,'-O','lex_many_tokens.py'],
522                             stdout=subprocess.PIPE)
523        result = p.stdout.read()
524        self.assert_(check_expected(result,
525                                    "(TOK34,'TOK34:',1,0)\n"
526                                    "(TOK143,'TOK143:',1,7)\n"
527                                    "(TOK269,'TOK269:',1,15)\n"
528                                    "(TOK372,'TOK372:',1,23)\n"
529                                    "(TOK452,'TOK452:',1,31)\n"
530                                    "(TOK561,'TOK561:',1,39)\n"
531                                    "(TOK999,'TOK999:',1,47)\n"
532                                    ))
533
534        self.assert_(os.path.exists("manytab.pyo"))
535        os.remove("manytab.pyo")
536        try:
537            os.remove("manytab.py")
538        except OSError:
539            pass
540        try:
541            os.remove("manytab.pyc")
542        except OSError:
543            pass
544        try:
545            os.remove("manytab.pyo")
546        except OSError:
547            pass
548
549# Tests related to run-time behavior of lexers
550class LexRunTests(unittest.TestCase):
551    def setUp(self):
552        sys.stderr = StringIO.StringIO()
553        sys.stdout = StringIO.StringIO()
554    def tearDown(self):
555        sys.stderr = sys.__stderr__
556        sys.stdout = sys.__stdout__
557
558    def test_lex_hedit(self):
559        run_import("lex_hedit")
560        result = sys.stdout.getvalue()
561        self.assert_(check_expected(result,
562                                    "(H_EDIT_DESCRIPTOR,'abc',1,0)\n"
563                                    "(H_EDIT_DESCRIPTOR,'abcdefghij',1,6)\n"
564                                    "(H_EDIT_DESCRIPTOR,'xy',1,20)\n"))
565
566    def test_lex_state_try(self):
567        run_import("lex_state_try")
568        result = sys.stdout.getvalue()
569        self.assert_(check_expected(result,
570                                    "(NUMBER,'3',1,0)\n"
571                                    "(PLUS,'+',1,2)\n"
572                                    "(NUMBER,'4',1,4)\n"
573                                    "Entering comment state\n"
574                                    "comment body LexToken(body_part,'This is a comment */',1,9)\n"
575                                    "(PLUS,'+',1,30)\n"
576                                    "(NUMBER,'10',1,32)\n"
577                                    ))
578
579
580
581unittest.main()
582