├── README.md
└── plugins
    ├── SmartJump.py
    └── SmartJump
        ├── __init__.py
        ├── lex.py
        └── yacc.py


/README.md:
--------------------------------------------------------------------------------
 1 | IDA Pro plugin to enhance the JumpAsk 'g' command
 2 | 
 3 | 
 4 | # Installation
 5 | 
 6 | Copy the contents of the plugin folder into your IDA_DIR/plugin folder  
 7 | Edit `IDA_DIR\cfg\idagui.cfg` so that the line that has default text with:  
 8 |     `"JumpAsk"    = 'g'`  
 9 | Instead reads:  
10 |     `"JumpAsk"    = 0`  
11 | You can append the text:  
12 |     `// 'g'`  
13 | To the line to give a full entry of:  
14 |     `"JumpAsk"    = 0 // 'g'`  
15 | If you do not want to remember what the default value was.  
16 | 
17 | # Usage
18 | 
19 | SmartJump is designed to improve the `g` keyboard shortcut in IDA, especially when using IDA to debug binaries. It allows a user to do basic mathematical operations `-`, `+`, `/`, `*` on values and labels in the JumpAsk window.  
20 | In addition, it allows a user to use the symbols `[` and `]` to dereference memory addresses and jump to the values contained at the address.  
21 | 
22 | The supported type of values that can be used in the jumpask window are:  
23 |     `here` and `here()` - these resolve to the current result of `idc.here()`  
24 |     `main` and `sub_123456` - you can still jump by names/labels in the binary  
25 |     `12ab34` and `0x12ab34` - all numbers are interpreted as hexadecimal numbers and can either be preceded by `0x` or not. If a global name also matches a hexadecimal number pattern then the global name will take precedence  
26 |     `eax`, `ebx`, ... , `rax`, ... - 32 and 64 bit registers for x86 and x64 are supported. Using `eax` on an x64 binary will mask the lower 32 bits and return that value  
27 | 
28 | All of these values can be used in combinations, a brief but inexhaustive list of examples is below:  
29 |     `[eax]` - Grabs the current value in eax, goes to the memory location and attempts to read a 32 bit pointer if in IDA32 or a 64 bit pointer if in IDA64. If the resolution is a valid address then it jumps there  
30 |     `[here] + rsp` - Grabs the value stored at the current address and adds it to the 64 bit stack pointer then jumps to the resulting stack location  
31 |     `[[ebx]*4]+[edx]` - You are starting to get the picture, you can do any jumps that resolve to an address in IDA...  
32 | 
33 | You can use the symbols `(` and `)` to explicitly group operations together rather than relying on the precedences assigned for the operators  
34 | E.g.  
35 |     `0x1200 * ([ebx] + here)` - This will grab the contents of ebx, add the current address to it and then multiply it by 0x1200 and try to jump to the result  
36 | 
37 | You do not have to match `[` and `]` symbols:  
38 |     `[[[eax` - This will automatically have enough `]` symbols appended to the end of the query to match the opening `[` symbols  
39 |                The final result would be a triple dereference of eax - `[[[eax]]]`  
40 |                The matching brace completion only supports the `[` symbol currently and will not match `(` symbols. All auto added braces are put at the end of the input expression  
41 | 


--------------------------------------------------------------------------------
/plugins/SmartJump.py:
--------------------------------------------------------------------------------
  1 | '''
  2 |    Copyright 2023 PwCIL
  3 | 
  4 |    Licensed under the Apache License, Version 2.0 (the "License");
  5 |    you may not use this file except in compliance with the License.
  6 |    You may obtain a copy of the License at
  7 | 
  8 |        http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 |    Unless required by applicable law or agreed to in writing, software
 11 |    distributed under the License is distributed on an "AS IS" BASIS,
 12 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |    See the License for the specific language governing permissions and
 14 |    limitations under the License.
 15 | '''
 16 | 
 17 | import idc, idaapi
 18 | import SmartJump.lex as lex
 19 | import SmartJump.yacc as yacc
 20 | import re
 21 | 
 22 | #utilities
 23 | arch_size = 32
 24 | if idaapi.idainfo_is_64bit():
 25 |     arch_size = 64
 26 | 
 27 | #support x86 and x64 so check which pointers we will be accessing
 28 | def get_pointer(address):
 29 |     if arch_size == 32:
 30 |         return idaapi.get_dword(address)
 31 |     else:
 32 |         return idaapi.get_qword(address)
 33 | 
 34 | tokens = ('HERE', 'HEXADDR', 'ADDR', 'PLUS', 'LBRACKET', 'RBRACKET', 'LPAREN', 'RPAREN', 'MINUS', 'TIMES', 'DIVIDE',
 35 |     'EAX', 'EBX', 'ECX', 'EDX', 'EIP', 'ESI', 'EDI', 'EBP', 'ESP',
 36 |     'RAX', 'RBX', 'RCX', 'RDX', 'RIP', 'RSI', 'RDI', 'RBP', 'RSP', 'R8', 'R9', 'R10', 'R11', 'R12', 'R13', 'R14', 'R15', 'NAME',
 37 |     )
 38 | 
 39 | t_PLUS      = r'\+'
 40 | t_MINUS     = r'-'
 41 | t_LBRACKET  = r'\['
 42 | t_RBRACKET  = r'\]'
 43 | t_TIMES     = r'\*'
 44 | t_DIVIDE    = r'/'
 45 | t_LPAREN    = r'\('
 46 | t_RPAREN    = r'\)'
 47 | 
 48 | def t_HERE(t):
 49 |     r'here(\(\))?'
 50 |     t.value = idc.here()
 51 |     return t
 52 | 
 53 | def t_RAX(t):
 54 |     r'rax'
 55 |     t.value = idaapi.get_reg_val("RAX")
 56 |     return t
 57 | 
 58 | def t_RBX(t):
 59 |     r'rbx'
 60 |     t.value = idaapi.get_reg_val("RBX")
 61 |     return t
 62 | 
 63 | def t_RCX(t):
 64 |     r'rcx'
 65 |     t.value = idaapi.get_reg_val("RCX")
 66 |     return t
 67 | 
 68 | def t_RDX(t):
 69 |     r'rdx'
 70 |     t.value = idaapi.get_reg_val("RDX")
 71 |     return t
 72 | 
 73 | def t_RIP(t):
 74 |     r'rip'
 75 |     t.value = idaapi.get_reg_val("RIP")
 76 |     return t
 77 | 
 78 | def t_RSI(t):
 79 |     r'rsi'
 80 |     t.value = idaapi.get_reg_val("RSI")
 81 |     return t
 82 | 
 83 | def t_RDI(t):
 84 |     r'rdi'
 85 |     t.value = idaapi.get_reg_val("RDI")
 86 |     return t
 87 | 
 88 | def t_RBP(t):
 89 |     r'rbp'
 90 |     t.value = idaapi.get_reg_val("RBP")
 91 |     return t
 92 | 
 93 | def t_RSP(t):
 94 |     r'rsp'
 95 |     t.value = idaapi.get_reg_val("RSP")
 96 |     return t
 97 | 
 98 | def t_R8(t):
 99 |     r'r8'
100 |     t.value = idaapi.get_reg_val("R8")
101 |     return t
102 | 
103 | def t_R9(t):
104 |     r'r9'
105 |     t.value = idaapi.get_reg_val("R9")
106 |     return t
107 | 
108 | def t_R10(t):
109 |     r'r10'
110 |     t.value = idaapi.get_reg_val("R10")
111 |     return t
112 | 
113 | def t_R11(t):
114 |     r'r11'
115 |     t.value = idaapi.get_reg_val("R11")
116 |     return t
117 | 
118 | def t_R12(t):
119 |     r'r12'
120 |     t.value = idaapi.get_reg_val("R12")
121 |     return t
122 | 
123 | def t_R13(t):
124 |     r'r13'
125 |     t.value = idaapi.get_reg_val("R13")
126 |     return t
127 | 
128 | def t_R14(t):
129 |     r'r14'
130 |     t.value = idaapi.get_reg_val("R14")
131 |     return t
132 | 
133 | def t_R15(t):
134 |     r'r15'
135 |     t.value = idaapi.get_reg_val("R15")
136 |     return t
137 | 
138 | def t_EAX(t):
139 |     r'eax'
140 |     if arch_size == 32:
141 |         t.value = idaapi.get_reg_val("EAX")
142 |     else:
143 |         t.value = (idaapi.get_reg_val("RAX")) & 0xffffffff
144 |     return t
145 | 
146 | def t_EBX(t):
147 |     r'ebx'
148 |     if arch_size == 32:
149 |         t.value = idaapi.get_reg_val("EBX")
150 |     else:
151 |         t.value = (idaapi.get_reg_val("RBX")) & 0xffffffff
152 |     return t
153 | 
154 | def t_ECX(t):
155 |     r'ecx'
156 |     if arch_size == 32:
157 |         t.value = idaapi.get_reg_val("ECX")
158 |     else:
159 |         t.value = (idaapi.get_reg_val("RCX")) & 0xffffffff
160 |     return t
161 | 
162 | def t_EDX(t):
163 |     r'edx'
164 |     if arch_size == 32:
165 |         t.value = idaapi.get_reg_val("EDX")
166 |     else:
167 |         t.value = (idaapi.get_reg_val("RDX")) & 0xffffffff
168 |     return t
169 | 
170 | def t_EIP(t):
171 |     r'eip'
172 |     if arch_size == 32:
173 |         t.value = idaapi.get_reg_val("EIP")
174 |     else:
175 |         t.value = (idaapi.get_reg_val("RIP")) & 0xffffffff
176 |     return t
177 | 
178 | def t_ESI(t):
179 |     r'esi'
180 |     if arch_size == 32:
181 |         t.value = idaapi.get_reg_val("ESI")
182 |     else:
183 |         t.value = (idaapi.get_reg_val("RSI")) & 0xffffffff
184 |     return t
185 | 
186 | def t_EDI(t):
187 |     r'edi'
188 |     if arch_size == 32:
189 |         t.value = idaapi.get_reg_val("EDI")
190 |     else:
191 |         t.value = (idaapi.get_reg_val("RDI")) & 0xffffffff
192 |     return t
193 | 
194 | def t_EBP(t):
195 |     r'ebp'
196 |     if arch_size == 32:
197 |         t.value = idaapi.get_reg_val("EBP")
198 |     else:
199 |         t.value = (idaapi.get_reg_val("RBP")) & 0xffffffff
200 |     return t
201 | 
202 | def t_ESP(t):
203 |     r'esp'
204 |     if arch_size == 32:
205 |         t.value = idaapi.get_reg_val("ESP")
206 |     else:
207 |         t.value = (idaapi.get_reg_val("RSP")) & 0xffffffff
208 |     return t
209 | 
210 | def t_NAME(t):
211 |     r'[a-zA-Z_\.][a-zA-Z0-9_\.]*'
212 |     temp = idc.get_name_ea_simple(t.value)
213 |     if temp == idaapi.BADADDR:
214 |         t.value = int(t.value, 16)
215 |     else:
216 |         t.value = temp
217 |     return t
218 | 
219 | def t_HEXADDR(t):
220 |     r'0x[0-9a-f]+'
221 |     t.value = int(t.value[2:], 16)
222 |     return t
223 | 
224 | def t_ADDR(t):
225 |     r'[0-9a-f]+'
226 |     t.value = int(t.value, 16)
227 |     return t
228 | 
229 | t_ignore = " \t"
230 | 
231 | def t_newline(t):
232 |     r'\n+'
233 |     t.lexer.lineno += t.value.count("\n")
234 | 
235 | def t_error(t):
236 |     debug_out(f"Illegal character {t.value[0]!r}")
237 |     t.lexer.skip(1)
238 | 
239 | glob_lex = lex.lex(reflags=re.I)
240 | 
241 | precedence = (
242 |     ('left','PLUS','MINUS'),
243 |     ('left','TIMES','DIVIDE'),
244 | )
245 | 
246 | def p_expression_binop(p):
247 |     '''expression : expression PLUS expression
248 |                 | expression MINUS expression
249 |                 | expression TIMES expression
250 |                 | expression DIVIDE expression'''
251 |     if p[2] == '+'  : p[0] = p[1] + p[3]
252 |     elif p[2] == '-': p[0] = p[1] - p[3]
253 |     elif p[2] == '*': p[0] = p[1] * p[3]
254 |     elif p[2] == '/': p[0] = p[1] / p[3]
255 |     
256 | def p_expression_group(p):
257 |     'expression : LPAREN expression RPAREN'
258 |     p[0] = p[2]
259 | 
260 | def p_expression_deref(p):
261 |     'expression : LBRACKET expression RBRACKET'
262 |     p[0] = get_pointer(p[2])
263 | 
264 | def p_expression_hexaddr(p):
265 |     'expression : HEXADDR'
266 |     p[0] = p[1]
267 | 
268 | def p_expression_addr(p):
269 |     'expression : ADDR'
270 |     p[0] = p[1]
271 | 
272 | def p_expression_reg(p):
273 |     '''expression : RAX
274 |                 | RBX
275 |                 | RCX
276 |                 | RDX
277 |                 | RSI
278 |                 | RDI
279 |                 | RBP
280 |                 | RSP
281 |                 | RIP
282 |                 | R8
283 |                 | R9
284 |                 | R10
285 |                 | R11
286 |                 | R12
287 |                 | R13
288 |                 | R14
289 |                 | R15
290 |                 | EAX
291 |                 | EBX
292 |                 | ECX
293 |                 | EDX
294 |                 | ESI
295 |                 | EDI
296 |                 | EBP
297 |                 | ESP
298 |                 | EIP
299 |                 | HERE'''
300 |     p[0] = p[1]
301 | 
302 | def p_expression_name(p):
303 |     'expression : NAME'
304 |     p[0] = p[1]
305 | 
306 | def p_error(p):
307 |     print(f"Syntax error at {p.value!r}")
308 | 
309 | glob_parser = yacc.yacc()
310 | 
311 | def debug_out(str):
312 |     print ("[SMARTJUMPER]: %s" % str)
313 | 
314 | class SmartJump_t(idaapi.plugin_t):
315 |     flags = 0
316 |     comment = "Smart IDA jumping"
317 |     wanted_hotkey = 'g'
318 |     help = "Runs by replacing Go command when pressing G"
319 |     wanted_name = "SmartJumper"
320 |     lexer = None
321 |     parser = None
322 |     
323 |     def init(self):
324 |         global glob_lex
325 |         global glob_parser
326 |         if idaapi.ph_get_id() != idaapi.PLFM_386:
327 |             return idaapi.PLUGIN_SKIP
328 |         debug_out("Loading Parsers")
329 |         
330 |         self.lexer = glob_lex
331 |         self.parser = glob_parser
332 |         debug_out("Loaded SmartJumper")
333 |         return idaapi.PLUGIN_KEEP
334 | 
335 |     def run(self, arg=0):
336 |         jump_str = idaapi.ask_str("", 0, "Jump expression...")
337 |         if jump_str != None:
338 |             try:
339 |                 open_deref = jump_str.count("[")
340 |                 close_deref = jump_str.count("]")
341 |                 if close_deref < open_deref:
342 |                     jump_str += "]" * (open_deref-close_deref)
343 |                 if open_deref < close_deref:
344 |                     debug_out("mismatched dereferences")
345 |                 else:
346 |                     result = self.parser.parse(jump_str, lexer=self.lexer)
347 |                     debug_out("resolved to %08x" % result)
348 |                     idaapi.jumpto(result)
349 |             except:
350 |                 debug_out("problem parsing")
351 | 
352 |     def term(self):
353 |         return
354 | 
355 | def PLUGIN_ENTRY():
356 |     return SmartJump_t()
357 | 


--------------------------------------------------------------------------------
/plugins/SmartJump/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '4.0'
2 | __all__ = ['lex','yacc']
3 | 


--------------------------------------------------------------------------------
/plugins/SmartJump/lex.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | # ply: lex.py
  3 | #
  4 | # Copyright (C) 2001-2020
  5 | # David M. Beazley (Dabeaz LLC)
  6 | # All rights reserved.
  7 | #
  8 | # Latest version: https://github.com/dabeaz/ply
  9 | #
 10 | # Redistribution and use in source and binary forms, with or without
 11 | # modification, are permitted provided that the following conditions are
 12 | # met:
 13 | #
 14 | # * Redistributions of source code must retain the above copyright notice,
 15 | #   this list of conditions and the following disclaimer.
 16 | # * Redistributions in binary form must reproduce the above copyright notice,
 17 | #   this list of conditions and the following disclaimer in the documentation
 18 | #   and/or other materials provided with the distribution.
 19 | # * Neither the name of David Beazley or Dabeaz LLC may be used to
 20 | #   endorse or promote products derived from this software without
 21 | #   specific prior written permission.
 22 | #
 23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 24 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 25 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 26 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 27 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 28 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 29 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 30 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 31 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 32 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 33 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 34 | # -----------------------------------------------------------------------------
 35 | 
 36 | import re
 37 | import sys
 38 | import types
 39 | import copy
 40 | import os
 41 | import inspect
 42 | 
 43 | # This tuple contains acceptable string types
 44 | StringTypes = (str, bytes)
 45 | 
 46 | # This regular expression is used to match valid token names
 47 | _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
 48 | 
 49 | # Exception thrown when invalid token encountered and no default error
 50 | # handler is defined.
 51 | class LexError(Exception):
 52 |     def __init__(self, message, s):
 53 |         self.args = (message,)
 54 |         self.text = s
 55 | 
 56 | # Token class.  This class is used to represent the tokens produced.
 57 | class LexToken(object):
 58 |     def __repr__(self):
 59 |         return f'LexToken({self.type},{self.value!r},{self.lineno},{self.lexpos})'
 60 | 
 61 | # This object is a stand-in for a logging object created by the
 62 | # logging module.
 63 | 
 64 | class PlyLogger(object):
 65 |     def __init__(self, f):
 66 |         self.f = f
 67 | 
 68 |     def critical(self, msg, *args, **kwargs):
 69 |         self.f.write((msg % args) + '\n')
 70 | 
 71 |     def warning(self, msg, *args, **kwargs):
 72 |         self.f.write('WARNING: ' + (msg % args) + '\n')
 73 | 
 74 |     def error(self, msg, *args, **kwargs):
 75 |         self.f.write('ERROR: ' + (msg % args) + '\n')
 76 | 
 77 |     info = critical
 78 |     debug = critical
 79 | 
 80 | # -----------------------------------------------------------------------------
 81 | #                        === Lexing Engine ===
 82 | #
 83 | # The following Lexer class implements the lexer runtime.   There are only
 84 | # a few public methods and attributes:
 85 | #
 86 | #    input()          -  Store a new string in the lexer
 87 | #    token()          -  Get the next token
 88 | #    clone()          -  Clone the lexer
 89 | #
 90 | #    lineno           -  Current line number
 91 | #    lexpos           -  Current position in the input string
 92 | # -----------------------------------------------------------------------------
 93 | 
 94 | class Lexer:
 95 |     def __init__(self):
 96 |         self.lexre = None             # Master regular expression. This is a list of
 97 |                                       # tuples (re, findex) where re is a compiled
 98 |                                       # regular expression and findex is a list
 99 |                                       # mapping regex group numbers to rules
100 |         self.lexretext = None         # Current regular expression strings
101 |         self.lexstatere = {}          # Dictionary mapping lexer states to master regexs
102 |         self.lexstateretext = {}      # Dictionary mapping lexer states to regex strings
103 |         self.lexstaterenames = {}     # Dictionary mapping lexer states to symbol names
104 |         self.lexstate = 'INITIAL'     # Current lexer state
105 |         self.lexstatestack = []       # Stack of lexer states
106 |         self.lexstateinfo = None      # State information
107 |         self.lexstateignore = {}      # Dictionary of ignored characters for each state
108 |         self.lexstateerrorf = {}      # Dictionary of error functions for each state
109 |         self.lexstateeoff = {}        # Dictionary of eof functions for each state
110 |         self.lexreflags = 0           # Optional re compile flags
111 |         self.lexdata = None           # Actual input data (as a string)
112 |         self.lexpos = 0               # Current position in input text
113 |         self.lexlen = 0               # Length of the input text
114 |         self.lexerrorf = None         # Error rule (if any)
115 |         self.lexeoff = None           # EOF rule (if any)
116 |         self.lextokens = None         # List of valid tokens
117 |         self.lexignore = ''           # Ignored characters
118 |         self.lexliterals = ''         # Literal characters that can be passed through
119 |         self.lexmodule = None         # Module
120 |         self.lineno = 1               # Current line number
121 | 
122 |     def clone(self, object=None):
123 |         c = copy.copy(self)
124 | 
125 |         # If the object parameter has been supplied, it means we are attaching the
126 |         # lexer to a new object.  In this case, we have to rebind all methods in
127 |         # the lexstatere and lexstateerrorf tables.
128 | 
129 |         if object:
130 |             newtab = {}
131 |             for key, ritem in self.lexstatere.items():
132 |                 newre = []
133 |                 for cre, findex in ritem:
134 |                     newfindex = []
135 |                     for f in findex:
136 |                         if not f or not f[0]:
137 |                             newfindex.append(f)
138 |                             continue
139 |                         newfindex.append((getattr(object, f[0].__name__), f[1]))
140 |                 newre.append((cre, newfindex))
141 |                 newtab[key] = newre
142 |             c.lexstatere = newtab
143 |             c.lexstateerrorf = {}
144 |             for key, ef in self.lexstateerrorf.items():
145 |                 c.lexstateerrorf[key] = getattr(object, ef.__name__)
146 |             c.lexmodule = object
147 |         return c
148 | 
149 |     # ------------------------------------------------------------
150 |     # input() - Push a new string into the lexer
151 |     # ------------------------------------------------------------
152 |     def input(self, s):
153 |         self.lexdata = s
154 |         self.lexpos = 0
155 |         self.lexlen = len(s)
156 | 
157 |     # ------------------------------------------------------------
158 |     # begin() - Changes the lexing state
159 |     # ------------------------------------------------------------
160 |     def begin(self, state):
161 |         if state not in self.lexstatere:
162 |             raise ValueError(f'Undefined state {state!r}')
163 |         self.lexre = self.lexstatere[state]
164 |         self.lexretext = self.lexstateretext[state]
165 |         self.lexignore = self.lexstateignore.get(state, '')
166 |         self.lexerrorf = self.lexstateerrorf.get(state, None)
167 |         self.lexeoff = self.lexstateeoff.get(state, None)
168 |         self.lexstate = state
169 | 
170 |     # ------------------------------------------------------------
171 |     # push_state() - Changes the lexing state and saves old on stack
172 |     # ------------------------------------------------------------
173 |     def push_state(self, state):
174 |         self.lexstatestack.append(self.lexstate)
175 |         self.begin(state)
176 | 
177 |     # ------------------------------------------------------------
178 |     # pop_state() - Restores the previous state
179 |     # ------------------------------------------------------------
180 |     def pop_state(self):
181 |         self.begin(self.lexstatestack.pop())
182 | 
183 |     # ------------------------------------------------------------
184 |     # current_state() - Returns the current lexing state
185 |     # ------------------------------------------------------------
186 |     def current_state(self):
187 |         return self.lexstate
188 | 
189 |     # ------------------------------------------------------------
190 |     # skip() - Skip ahead n characters
191 |     # ------------------------------------------------------------
192 |     def skip(self, n):
193 |         self.lexpos += n
194 | 
195 |     # ------------------------------------------------------------
196 |     # token() - Return the next token from the Lexer
197 |     #
198 |     # Note: This function has been carefully implemented to be as fast
199 |     # as possible.  Don't make changes unless you really know what
200 |     # you are doing
201 |     # ------------------------------------------------------------
202 |     def token(self):
203 |         # Make local copies of frequently referenced attributes
204 |         lexpos    = self.lexpos
205 |         lexlen    = self.lexlen
206 |         lexignore = self.lexignore
207 |         lexdata   = self.lexdata
208 | 
209 |         while lexpos < lexlen:
210 |             # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
211 |             if lexdata[lexpos] in lexignore:
212 |                 lexpos += 1
213 |                 continue
214 | 
215 |             # Look for a regular expression match
216 |             for lexre, lexindexfunc in self.lexre:
217 |                 m = lexre.match(lexdata, lexpos)
218 |                 if not m:
219 |                     continue
220 | 
221 |                 # Create a token for return
222 |                 tok = LexToken()
223 |                 tok.value = m.group()
224 |                 tok.lineno = self.lineno
225 |                 tok.lexpos = lexpos
226 | 
227 |                 i = m.lastindex
228 |                 func, tok.type = lexindexfunc[i]
229 | 
230 |                 if not func:
231 |                     # If no token type was set, it's an ignored token
232 |                     if tok.type:
233 |                         self.lexpos = m.end()
234 |                         return tok
235 |                     else:
236 |                         lexpos = m.end()
237 |                         break
238 | 
239 |                 lexpos = m.end()
240 | 
241 |                 # If token is processed by a function, call it
242 | 
243 |                 tok.lexer = self      # Set additional attributes useful in token rules
244 |                 self.lexmatch = m
245 |                 self.lexpos = lexpos
246 |                 newtok = func(tok)
247 |                 del tok.lexer
248 |                 del self.lexmatch
249 | 
250 |                 # Every function must return a token, if nothing, we just move to next token
251 |                 if not newtok:
252 |                     lexpos    = self.lexpos         # This is here in case user has updated lexpos.
253 |                     lexignore = self.lexignore      # This is here in case there was a state change
254 |                     break
255 |                 return newtok
256 |             else:
257 |                 # No match, see if in literals
258 |                 if lexdata[lexpos] in self.lexliterals:
259 |                     tok = LexToken()
260 |                     tok.value = lexdata[lexpos]
261 |                     tok.lineno = self.lineno
262 |                     tok.type = tok.value
263 |                     tok.lexpos = lexpos
264 |                     self.lexpos = lexpos + 1
265 |                     return tok
266 | 
267 |                 # No match. Call t_error() if defined.
268 |                 if self.lexerrorf:
269 |                     tok = LexToken()
270 |                     tok.value = self.lexdata[lexpos:]
271 |                     tok.lineno = self.lineno
272 |                     tok.type = 'error'
273 |                     tok.lexer = self
274 |                     tok.lexpos = lexpos
275 |                     self.lexpos = lexpos
276 |                     newtok = self.lexerrorf(tok)
277 |                     if lexpos == self.lexpos:
278 |                         # Error method didn't change text position at all. This is an error.
279 |                         raise LexError(f"Scanning error. Illegal character {lexdata[lexpos]!r}",
280 |                                        lexdata[lexpos:])
281 |                     lexpos = self.lexpos
282 |                     if not newtok:
283 |                         continue
284 |                     return newtok
285 | 
286 |                 self.lexpos = lexpos
287 |                 raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}",
288 |                                lexdata[lexpos:])
289 | 
290 |         if self.lexeoff:
291 |             tok = LexToken()
292 |             tok.type = 'eof'
293 |             tok.value = ''
294 |             tok.lineno = self.lineno
295 |             tok.lexpos = lexpos
296 |             tok.lexer = self
297 |             self.lexpos = lexpos
298 |             newtok = self.lexeoff(tok)
299 |             return newtok
300 | 
301 |         self.lexpos = lexpos + 1
302 |         if self.lexdata is None:
303 |             raise RuntimeError('No input string given with input()')
304 |         return None
305 | 
306 |     # Iterator interface
307 |     def __iter__(self):
308 |         return self
309 | 
310 |     def __next__(self):
311 |         t = self.token()
312 |         if t is None:
313 |             raise StopIteration
314 |         return t
315 | 
316 | # -----------------------------------------------------------------------------
317 | #                           ==== Lex Builder ===
318 | #
319 | # The functions and classes below are used to collect lexing information
320 | # and build a Lexer object from it.
321 | # -----------------------------------------------------------------------------
322 | 
323 | # -----------------------------------------------------------------------------
324 | # _get_regex(func)
325 | #
326 | # Returns the regular expression assigned to a function either as a doc string
327 | # or as a .regex attribute attached by the @TOKEN decorator.
328 | # -----------------------------------------------------------------------------
329 | def _get_regex(func):
330 |     return getattr(func, 'regex', func.__doc__)
331 | 
332 | # -----------------------------------------------------------------------------
333 | # get_caller_module_dict()
334 | #
335 | # This function returns a dictionary containing all of the symbols defined within
336 | # a caller further down the call stack.  This is used to get the environment
337 | # associated with the yacc() call if none was provided.
338 | # -----------------------------------------------------------------------------
339 | def get_caller_module_dict(levels):
340 |     f = sys._getframe(levels)
341 |     return { **f.f_globals, **f.f_locals }
342 | 
343 | # -----------------------------------------------------------------------------
344 | # _form_master_re()
345 | #
346 | # This function takes a list of all of the regex components and attempts to
347 | # form the master regular expression.  Given limitations in the Python re
348 | # module, it may be necessary to break the master regex into separate expressions.
349 | # -----------------------------------------------------------------------------
350 | def _form_master_re(relist, reflags, ldict, toknames):
351 |     if not relist:
352 |         return [], [], []
353 |     regex = '|'.join(relist)
354 |     try:
355 |         lexre = re.compile(regex, reflags)
356 | 
357 |         # Build the index to function map for the matching engine
358 |         lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1)
359 |         lexindexnames = lexindexfunc[:]
360 | 
361 |         for f, i in lexre.groupindex.items():
362 |             handle = ldict.get(f, None)
363 |             if type(handle) in (types.FunctionType, types.MethodType):
364 |                 lexindexfunc[i] = (handle, toknames[f])
365 |                 lexindexnames[i] = f
366 |             elif handle is not None:
367 |                 lexindexnames[i] = f
368 |                 if f.find('ignore_') > 0:
369 |                     lexindexfunc[i] = (None, None)
370 |                 else:
371 |                     lexindexfunc[i] = (None, toknames[f])
372 | 
373 |         return [(lexre, lexindexfunc)], [regex], [lexindexnames]
374 |     except Exception:
375 |         m = (len(relist) // 2) + 1
376 |         llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames)
377 |         rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames)
378 |         return (llist+rlist), (lre+rre), (lnames+rnames)
379 | 
380 | # -----------------------------------------------------------------------------
381 | # def _statetoken(s,names)
382 | #
383 | # Given a declaration name s of the form "t_" and a dictionary whose keys are
384 | # state names, this function returns a tuple (states,tokenname) where states
385 | # is a tuple of state names and tokenname is the name of the token.  For example,
386 | # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
387 | # -----------------------------------------------------------------------------
388 | def _statetoken(s, names):
389 |     parts = s.split('_')
390 |     for i, part in enumerate(parts[1:], 1):
391 |         if part not in names and part != 'ANY':
392 |             break
393 | 
394 |     if i > 1:
395 |         states = tuple(parts[1:i])
396 |     else:
397 |         states = ('INITIAL',)
398 | 
399 |     if 'ANY' in states:
400 |         states = tuple(names)
401 | 
402 |     tokenname = '_'.join(parts[i:])
403 |     return (states, tokenname)
404 | 
405 | 
406 | # -----------------------------------------------------------------------------
407 | # LexerReflect()
408 | #
409 | # This class represents information needed to build a lexer as extracted from a
410 | # user's input file.
411 | # -----------------------------------------------------------------------------
412 | class LexerReflect(object):
413 |     def __init__(self, ldict, log=None, reflags=0):
414 |         self.ldict      = ldict
415 |         self.error_func = None
416 |         self.tokens     = []
417 |         self.reflags    = reflags
418 |         self.stateinfo  = {'INITIAL': 'inclusive'}
419 |         self.modules    = set()
420 |         self.error      = False
421 |         self.log        = PlyLogger(sys.stderr) if log is None else log
422 | 
423 |     # Get all of the basic information
424 |     def get_all(self):
425 |         self.get_tokens()
426 |         self.get_literals()
427 |         self.get_states()
428 |         self.get_rules()
429 | 
430 |     # Validate all of the information
431 |     def validate_all(self):
432 |         self.validate_tokens()
433 |         self.validate_literals()
434 |         self.validate_rules()
435 |         return self.error
436 | 
437 |     # Get the tokens map
438 |     def get_tokens(self):
439 |         tokens = self.ldict.get('tokens', None)
440 |         if not tokens:
441 |             self.log.error('No token list is defined')
442 |             self.error = True
443 |             return
444 | 
445 |         if not isinstance(tokens, (list, tuple)):
446 |             self.log.error('tokens must be a list or tuple')
447 |             self.error = True
448 |             return
449 | 
450 |         if not tokens:
451 |             self.log.error('tokens is empty')
452 |             self.error = True
453 |             return
454 | 
455 |         self.tokens = tokens
456 | 
457 |     # Validate the tokens
458 |     def validate_tokens(self):
459 |         terminals = {}
460 |         for n in self.tokens:
461 |             if not _is_identifier.match(n):
462 |                 self.log.error(f"Bad token name {n!r}")
463 |                 self.error = True
464 |             if n in terminals:
465 |                 self.log.warning(f"Token {n!r} multiply defined")
466 |             terminals[n] = 1
467 | 
468 |     # Get the literals specifier
469 |     def get_literals(self):
470 |         self.literals = self.ldict.get('literals', '')
471 |         if not self.literals:
472 |             self.literals = ''
473 | 
474 |     # Validate literals
475 |     def validate_literals(self):
476 |         try:
477 |             for c in self.literals:
478 |                 if not isinstance(c, StringTypes) or len(c) > 1:
479 |                     self.log.error(f'Invalid literal {c!r}. Must be a single character')
480 |                     self.error = True
481 | 
482 |         except TypeError:
483 |             self.log.error('Invalid literals specification. literals must be a sequence of characters')
484 |             self.error = True
485 | 
486 |     def get_states(self):
487 |         self.states = self.ldict.get('states', None)
488 |         # Build statemap
489 |         if self.states:
490 |             if not isinstance(self.states, (tuple, list)):
491 |                 self.log.error('states must be defined as a tuple or list')
492 |                 self.error = True
493 |             else:
494 |                 for s in self.states:
495 |                     if not isinstance(s, tuple) or len(s) != 2:
496 |                         self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s)
497 |                         self.error = True
498 |                         continue
499 |                     name, statetype = s
500 |                     if not isinstance(name, StringTypes):
501 |                         self.log.error('State name %r must be a string', name)
502 |                         self.error = True
503 |                         continue
504 |                     if not (statetype == 'inclusive' or statetype == 'exclusive'):
505 |                         self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name)
506 |                         self.error = True
507 |                         continue
508 |                     if name in self.stateinfo:
509 |                         self.log.error("State %r already defined", name)
510 |                         self.error = True
511 |                         continue
512 |                     self.stateinfo[name] = statetype
513 | 
514 |     # Get all of the symbols with a t_ prefix and sort them into various
515 |     # categories (functions, strings, error functions, and ignore characters)
516 | 
517 |     def get_rules(self):
518 |         tsymbols = [f for f in self.ldict if f[:2] == 't_']
519 | 
520 |         # Now build up a list of functions and a list of strings
521 |         self.toknames = {}        # Mapping of symbols to token names
522 |         self.funcsym  = {}        # Symbols defined as functions
523 |         self.strsym   = {}        # Symbols defined as strings
524 |         self.ignore   = {}        # Ignore strings by state
525 |         self.errorf   = {}        # Error functions by state
526 |         self.eoff     = {}        # EOF functions by state
527 | 
528 |         for s in self.stateinfo:
529 |             self.funcsym[s] = []
530 |             self.strsym[s] = []
531 | 
532 |         if len(tsymbols) == 0:
533 |             self.log.error('No rules of the form t_rulename are defined')
534 |             self.error = True
535 |             return
536 | 
537 |         for f in tsymbols:
538 |             t = self.ldict[f]
539 |             states, tokname = _statetoken(f, self.stateinfo)
540 |             self.toknames[f] = tokname
541 | 
542 |             if hasattr(t, '__call__'):
543 |                 if tokname == 'error':
544 |                     for s in states:
545 |                         self.errorf[s] = t
546 |                 elif tokname == 'eof':
547 |                     for s in states:
548 |                         self.eoff[s] = t
549 |                 elif tokname == 'ignore':
550 |                     line = t.__code__.co_firstlineno
551 |                     file = t.__code__.co_filename
552 |                     self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__)
553 |                     self.error = True
554 |                 else:
555 |                     for s in states:
556 |                         self.funcsym[s].append((f, t))
557 |             elif isinstance(t, StringTypes):
558 |                 if tokname == 'ignore':
559 |                     for s in states:
560 |                         self.ignore[s] = t
561 |                     if '\\' in t:
562 |                         self.log.warning("%s contains a literal backslash '\\'", f)
563 | 
564 |                 elif tokname == 'error':
565 |                     self.log.error("Rule %r must be defined as a function", f)
566 |                     self.error = True
567 |                 else:
568 |                     for s in states:
569 |                         self.strsym[s].append((f, t))
570 |             else:
571 |                 self.log.error('%s not defined as a function or string', f)
572 |                 self.error = True
573 | 
574 |         # Sort the functions by line number
575 |         for f in self.funcsym.values():
576 |             f.sort(key=lambda x: x[1].__code__.co_firstlineno)
577 | 
578 |         # Sort the strings by regular expression length
579 |         for s in self.strsym.values():
580 |             s.sort(key=lambda x: len(x[1]), reverse=True)
581 | 
582 |     # Validate all of the t_rules collected
583 |     def validate_rules(self):
584 |         for state in self.stateinfo:
585 |             # Validate all rules defined by functions
586 | 
587 |             for fname, f in self.funcsym[state]:
588 |                 line = f.__code__.co_firstlineno
589 |                 file = f.__code__.co_filename
590 |                 module = inspect.getmodule(f)
591 |                 self.modules.add(module)
592 | 
593 |                 tokname = self.toknames[fname]
594 |                 if isinstance(f, types.MethodType):
595 |                     reqargs = 2
596 |                 else:
597 |                     reqargs = 1
598 |                 nargs = f.__code__.co_argcount
599 |                 if nargs > reqargs:
600 |                     self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__)
601 |                     self.error = True
602 |                     continue
603 | 
604 |                 if nargs < reqargs:
605 |                     self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__)
606 |                     self.error = True
607 |                     continue
608 | 
609 |                 if not _get_regex(f):
610 |                     self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__)
611 |                     self.error = True
612 |                     continue
613 | 
614 |                 try:
615 |                     c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags)
616 |                     if c.match(''):
617 |                         self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__)
618 |                         self.error = True
619 |                 except re.error as e:
620 |                     self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e)
621 |                     if '#' in _get_regex(f):
622 |                         self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__)
623 |                     self.error = True
624 | 
625 |             # Validate all rules defined by strings
626 |             for name, r in self.strsym[state]:
627 |                 tokname = self.toknames[name]
628 |                 if tokname == 'error':
629 |                     self.log.error("Rule %r must be defined as a function", name)
630 |                     self.error = True
631 |                     continue
632 | 
633 |                 if tokname not in self.tokens and tokname.find('ignore_') < 0:
634 |                     self.log.error("Rule %r defined for an unspecified token %s", name, tokname)
635 |                     self.error = True
636 |                     continue
637 | 
638 |                 try:
639 |                     c = re.compile('(?P<%s>%s)' % (name, r), self.reflags)
640 |                     if (c.match('')):
641 |                         self.log.error("Regular expression for rule %r matches empty string", name)
642 |                         self.error = True
643 |                 except re.error as e:
644 |                     self.log.error("Invalid regular expression for rule %r. %s", name, e)
645 |                     if '#' in r:
646 |                         self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name)
647 |                     self.error = True
648 | 
649 |             if not self.funcsym[state] and not self.strsym[state]:
650 |                 self.log.error("No rules defined for state %r", state)
651 |                 self.error = True
652 | 
653 |             # Validate the error function
654 |             efunc = self.errorf.get(state, None)
655 |             if efunc:
656 |                 f = efunc
657 |                 line = f.__code__.co_firstlineno
658 |                 file = f.__code__.co_filename
659 |                 module = inspect.getmodule(f)
660 |                 self.modules.add(module)
661 | 
662 |                 if isinstance(f, types.MethodType):
663 |                     reqargs = 2
664 |                 else:
665 |                     reqargs = 1
666 |                 nargs = f.__code__.co_argcount
667 |                 if nargs > reqargs:
668 |                     self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__)
669 |                     self.error = True
670 | 
671 |                 if nargs < reqargs:
672 |                     self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__)
673 |                     self.error = True
674 | 
675 |         for module in self.modules:
676 |             self.validate_module(module)
677 | 
678 |     # -----------------------------------------------------------------------------
679 |     # validate_module()
680 |     #
681 |     # This checks to see if there are duplicated t_rulename() functions or strings
682 |     # in the parser input file.  This is done using a simple regular expression
683 |     # match on each line in the source code of the given module.
684 |     # -----------------------------------------------------------------------------
685 | 
686 |     def validate_module(self, module):
687 |         try:
688 |             lines, linen = inspect.getsourcelines(module)
689 |         except IOError:
690 |             return
691 | 
692 |         fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
693 |         sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
694 | 
695 |         counthash = {}
696 |         linen += 1
697 |         for line in lines:
698 |             m = fre.match(line)
699 |             if not m:
700 |                 m = sre.match(line)
701 |             if m:
702 |                 name = m.group(1)
703 |                 prev = counthash.get(name)
704 |                 if not prev:
705 |                     counthash[name] = linen
706 |                 else:
707 |                     filename = inspect.getsourcefile(module)
708 |                     self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev)
709 |                     self.error = True
710 |             linen += 1
711 | 
712 | # -----------------------------------------------------------------------------
713 | # lex(module)
714 | #
715 | # Build all of the regular expression rules from definitions in the supplied module
716 | # -----------------------------------------------------------------------------
717 | def lex(*, module=None, object=None, debug=False, 
718 |         reflags=int(re.VERBOSE), debuglog=None, errorlog=None):
719 | 
720 |     global lexer
721 | 
722 |     ldict = None
723 |     stateinfo  = {'INITIAL': 'inclusive'}
724 |     lexobj = Lexer()
725 |     global token, input
726 | 
727 |     if errorlog is None:
728 |         errorlog = PlyLogger(sys.stderr)
729 | 
730 |     if debug:
731 |         if debuglog is None:
732 |             debuglog = PlyLogger(sys.stderr)
733 | 
734 |     # Get the module dictionary used for the lexer
735 |     if object:
736 |         module = object
737 | 
738 |     # Get the module dictionary used for the parser
739 |     if module:
740 |         _items = [(k, getattr(module, k)) for k in dir(module)]
741 |         ldict = dict(_items)
742 |         # If no __file__ attribute is available, try to obtain it from the __module__ instead
743 |         if '__file__' not in ldict:
744 |             ldict['__file__'] = sys.modules[ldict['__module__']].__file__
745 |     else:
746 |         ldict = get_caller_module_dict(2)
747 | 
748 |     # Collect parser information from the dictionary
749 |     linfo = LexerReflect(ldict, log=errorlog, reflags=reflags)
750 |     linfo.get_all()
751 |     if linfo.validate_all():
752 |         raise SyntaxError("Can't build lexer")
753 | 
754 |     # Dump some basic debugging information
755 |     if debug:
756 |         debuglog.info('lex: tokens   = %r', linfo.tokens)
757 |         debuglog.info('lex: literals = %r', linfo.literals)
758 |         debuglog.info('lex: states   = %r', linfo.stateinfo)
759 | 
760 |     # Build a dictionary of valid token names
761 |     lexobj.lextokens = set()
762 |     for n in linfo.tokens:
763 |         lexobj.lextokens.add(n)
764 | 
765 |     # Get literals specification
766 |     if isinstance(linfo.literals, (list, tuple)):
767 |         lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
768 |     else:
769 |         lexobj.lexliterals = linfo.literals
770 | 
771 |     lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals)
772 | 
773 |     # Get the stateinfo dictionary
774 |     stateinfo = linfo.stateinfo
775 | 
776 |     regexs = {}
777 |     # Build the master regular expressions
778 |     for state in stateinfo:
779 |         regex_list = []
780 | 
781 |         # Add rules defined by functions first
782 |         for fname, f in linfo.funcsym[state]:
783 |             regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f)))
784 |             if debug:
785 |                 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state)
786 | 
787 |         # Now add all of the simple rules
788 |         for name, r in linfo.strsym[state]:
789 |             regex_list.append('(?P<%s>%s)' % (name, r))
790 |             if debug:
791 |                 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state)
792 | 
793 |         regexs[state] = regex_list
794 | 
795 |     # Build the master regular expressions
796 | 
797 |     if debug:
798 |         debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====')
799 | 
800 |     for state in regexs:
801 |         lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames)
802 |         lexobj.lexstatere[state] = lexre
803 |         lexobj.lexstateretext[state] = re_text
804 |         lexobj.lexstaterenames[state] = re_names
805 |         if debug:
806 |             for i, text in enumerate(re_text):
807 |                 debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text)
808 | 
809 |     # For inclusive states, we need to add the regular expressions from the INITIAL state
810 |     for state, stype in stateinfo.items():
811 |         if state != 'INITIAL' and stype == 'inclusive':
812 |             lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
813 |             lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
814 |             lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
815 | 
816 |     lexobj.lexstateinfo = stateinfo
817 |     lexobj.lexre = lexobj.lexstatere['INITIAL']
818 |     lexobj.lexretext = lexobj.lexstateretext['INITIAL']
819 |     lexobj.lexreflags = reflags
820 | 
821 |     # Set up ignore variables
822 |     lexobj.lexstateignore = linfo.ignore
823 |     lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '')
824 | 
825 |     # Set up error functions
826 |     lexobj.lexstateerrorf = linfo.errorf
827 |     lexobj.lexerrorf = linfo.errorf.get('INITIAL', None)
828 |     if not lexobj.lexerrorf:
829 |         errorlog.warning('No t_error rule is defined')
830 | 
831 |     # Set up eof functions
832 |     lexobj.lexstateeoff = linfo.eoff
833 |     lexobj.lexeoff = linfo.eoff.get('INITIAL', None)
834 | 
835 |     # Check state information for ignore and error rules
836 |     for s, stype in stateinfo.items():
837 |         if stype == 'exclusive':
838 |             if s not in linfo.errorf:
839 |                 errorlog.warning("No error rule is defined for exclusive state %r", s)
840 |             if s not in linfo.ignore and lexobj.lexignore:
841 |                 errorlog.warning("No ignore rule is defined for exclusive state %r", s)
842 |         elif stype == 'inclusive':
843 |             if s not in linfo.errorf:
844 |                 linfo.errorf[s] = linfo.errorf.get('INITIAL', None)
845 |             if s not in linfo.ignore:
846 |                 linfo.ignore[s] = linfo.ignore.get('INITIAL', '')
847 | 
848 |     # Create global versions of the token() and input() functions
849 |     token = lexobj.token
850 |     input = lexobj.input
851 |     lexer = lexobj
852 | 
853 |     return lexobj
854 | 
855 | # -----------------------------------------------------------------------------
856 | # runmain()
857 | #
858 | # This runs the lexer as a main program
859 | # -----------------------------------------------------------------------------
860 | 
861 | def runmain(lexer=None, data=None):
862 |     if not data:
863 |         try:
864 |             filename = sys.argv[1]
865 |             with open(filename) as f:
866 |                 data = f.read()
867 |         except IndexError:
868 |             sys.stdout.write('Reading from standard input (type EOF to end):\n')
869 |             data = sys.stdin.read()
870 | 
871 |     if lexer:
872 |         _input = lexer.input
873 |     else:
874 |         _input = input
875 |     _input(data)
876 |     if lexer:
877 |         _token = lexer.token
878 |     else:
879 |         _token = token
880 | 
881 |     while True:
882 |         tok = _token()
883 |         if not tok:
884 |             break
885 |         sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n')
886 | 
887 | # -----------------------------------------------------------------------------
888 | # @TOKEN(regex)
889 | #
890 | # This decorator function can be used to set the regex expression on a function
891 | # when its docstring might need to be set in an alternative way
892 | # -----------------------------------------------------------------------------
893 | 
894 | def TOKEN(r):
895 |     def set_regex(f):
896 |         if hasattr(r, '__call__'):
897 |             f.regex = _get_regex(r)
898 |         else:
899 |             f.regex = r
900 |         return f
901 |     return set_regex
902 | 


--------------------------------------------------------------------------------
/plugins/SmartJump/yacc.py:
--------------------------------------------------------------------------------
   1 | # -----------------------------------------------------------------------------
   2 | # ply: yacc.py
   3 | #
   4 | # Copyright (C) 2001-2020
   5 | # David M. Beazley (Dabeaz LLC)
   6 | # All rights reserved.
   7 | #
   8 | # Latest version: https://github.com/dabeaz/ply
   9 | #
  10 | # Redistribution and use in source and binary forms, with or without
  11 | # modification, are permitted provided that the following conditions are
  12 | # met:
  13 | #
  14 | # * Redistributions of source code must retain the above copyright notice,
  15 | #   this list of conditions and the following disclaimer.
  16 | # * Redistributions in binary form must reproduce the above copyright notice,
  17 | #   this list of conditions and the following disclaimer in the documentation
  18 | #   and/or other materials provided with the distribution.
  19 | # * Neither the name of David Beazley or Dabeaz LLC may be used to
  20 | #   endorse or promote products derived from this software without
  21 | #   specific prior written permission.
  22 | #
  23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  24 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  25 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  26 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  27 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  28 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  29 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  30 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  32 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  33 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34 | # -----------------------------------------------------------------------------
  35 | #
  36 | # This implements an LR parser that is constructed from grammar rules defined
  37 | # as Python functions. The grammar is specified by supplying the BNF inside
  38 | # Python documentation strings.  The inspiration for this technique was borrowed
  39 | # from John Aycock's Spark parsing system.  PLY might be viewed as cross between
  40 | # Spark and the GNU bison utility.
  41 | #
  42 | # The current implementation is only somewhat object-oriented. The
  43 | # LR parser itself is defined in terms of an object (which allows multiple
  44 | # parsers to co-exist).  However, most of the variables used during table
  45 | # construction are defined in terms of global variables.  Users shouldn't
  46 | # notice unless they are trying to define multiple parsers at the same
  47 | # time using threads (in which case they should have their head examined).
  48 | #
  49 | # This implementation supports both SLR and LALR(1) parsing.  LALR(1)
  50 | # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
  51 | # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
  52 | # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
  53 | # by the more efficient DeRemer and Pennello algorithm.
  54 | #
  55 | # :::::::: WARNING :::::::
  56 | #
  57 | # Construction of LR parsing tables is fairly complicated and expensive.
  58 | # To make this module run fast, a *LOT* of work has been put into
  59 | # optimization---often at the expensive of readability and what might
  60 | # consider to be good Python "coding style."   Modify the code at your
  61 | # own risk!
  62 | # ----------------------------------------------------------------------------
  63 | 
  64 | import re
  65 | import types
  66 | import sys
  67 | import inspect
  68 | 
  69 | #-----------------------------------------------------------------------------
  70 | #                     === User configurable parameters ===
  71 | #
  72 | # Change these to modify the default behavior of yacc (if you wish)
  73 | #-----------------------------------------------------------------------------
  74 | 
  75 | yaccdebug   = False            # Debugging mode.  If set, yacc generates a
  76 |                                # a 'parser.out' file in the current directory
  77 | 
  78 | debug_file  = 'parser.out'     # Default name of the debugging file
  79 | error_count = 3                # Number of symbols that must be shifted to leave recovery mode
  80 | resultlimit = 40               # Size limit of results when running in debug mode.
  81 | 
  82 | MAXINT = sys.maxsize
  83 | 
  84 | # This object is a stand-in for a logging object created by the
  85 | # logging module.   PLY will use this by default to create things
  86 | # such as the parser.out file.  If a user wants more detailed
  87 | # information, they can create their own logging object and pass
  88 | # it into PLY.
  89 | 
  90 | class PlyLogger(object):
  91 |     def __init__(self, f):
  92 |         self.f = f
  93 | 
  94 |     def debug(self, msg, *args, **kwargs):
  95 |         self.f.write((msg % args) + '\n')
  96 | 
  97 |     info = debug
  98 | 
  99 |     def warning(self, msg, *args, **kwargs):
 100 |         self.f.write('WARNING: ' + (msg % args) + '\n')
 101 | 
 102 |     def error(self, msg, *args, **kwargs):
 103 |         self.f.write('ERROR: ' + (msg % args) + '\n')
 104 | 
 105 |     critical = debug
 106 | 
 107 | # Null logger is used when no output is generated. Does nothing.
 108 | class NullLogger(object):
 109 |     def __getattribute__(self, name):
 110 |         return self
 111 | 
 112 |     def __call__(self, *args, **kwargs):
 113 |         return self
 114 | 
 115 | # Exception raised for yacc-related errors
 116 | class YaccError(Exception):
 117 |     pass
 118 | 
 119 | # Format the result message that the parser produces when running in debug mode.
 120 | def format_result(r):
 121 |     repr_str = repr(r)
 122 |     if '\n' in repr_str:
 123 |         repr_str = repr(repr_str)
 124 |     if len(repr_str) > resultlimit:
 125 |         repr_str = repr_str[:resultlimit] + ' ...'
 126 |     result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
 127 |     return result
 128 | 
 129 | # Format stack entries when the parser is running in debug mode
 130 | def format_stack_entry(r):
 131 |     repr_str = repr(r)
 132 |     if '\n' in repr_str:
 133 |         repr_str = repr(repr_str)
 134 |     if len(repr_str) < 16:
 135 |         return repr_str
 136 |     else:
 137 |         return '<%s @ 0x%x>' % (type(r).__name__, id(r))
 138 | 
 139 | #-----------------------------------------------------------------------------
 140 | #                        ===  LR Parsing Engine ===
 141 | #
 142 | # The following classes are used for the LR parser itself.  These are not
 143 | # used during table construction and are independent of the actual LR
 144 | # table generation algorithm
 145 | #-----------------------------------------------------------------------------
 146 | 
 147 | # This class is used to hold non-terminal grammar symbols during parsing.
 148 | # It normally has the following attributes set:
 149 | #        .type       = Grammar symbol type
 150 | #        .value      = Symbol value
 151 | #        .lineno     = Starting line number
 152 | #        .endlineno  = Ending line number (optional, set automatically)
 153 | #        .lexpos     = Starting lex position
 154 | #        .endlexpos  = Ending lex position (optional, set automatically)
 155 | 
 156 | class YaccSymbol:
 157 |     def __str__(self):
 158 |         return self.type
 159 | 
 160 |     def __repr__(self):
 161 |         return str(self)
 162 | 
 163 | # This class is a wrapper around the objects actually passed to each
 164 | # grammar rule.   Index lookup and assignment actually assign the
 165 | # .value attribute of the underlying YaccSymbol object.
 166 | # The lineno() method returns the line number of a given
 167 | # item (or 0 if not defined).   The linespan() method returns
 168 | # a tuple of (startline,endline) representing the range of lines
 169 | # for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
 170 | # representing the range of positional information for a symbol.
 171 | 
 172 | class YaccProduction:
 173 |     def __init__(self, s, stack=None):
 174 |         self.slice = s
 175 |         self.stack = stack
 176 |         self.lexer = None
 177 |         self.parser = None
 178 | 
 179 |     def __getitem__(self, n):
 180 |         if isinstance(n, slice):
 181 |             return [s.value for s in self.slice[n]]
 182 |         elif n >= 0:
 183 |             return self.slice[n].value
 184 |         else:
 185 |             return self.stack[n].value
 186 | 
 187 |     def __setitem__(self, n, v):
 188 |         self.slice[n].value = v
 189 | 
 190 |     def __getslice__(self, i, j):
 191 |         return [s.value for s in self.slice[i:j]]
 192 | 
 193 |     def __len__(self):
 194 |         return len(self.slice)
 195 | 
 196 |     def lineno(self, n):
 197 |         return getattr(self.slice[n], 'lineno', 0)
 198 | 
 199 |     def set_lineno(self, n, lineno):
 200 |         self.slice[n].lineno = lineno
 201 | 
 202 |     def linespan(self, n):
 203 |         startline = getattr(self.slice[n], 'lineno', 0)
 204 |         endline = getattr(self.slice[n], 'endlineno', startline)
 205 |         return startline, endline
 206 | 
 207 |     def lexpos(self, n):
 208 |         return getattr(self.slice[n], 'lexpos', 0)
 209 | 
 210 |     def set_lexpos(self, n, lexpos):
 211 |         self.slice[n].lexpos = lexpos
 212 | 
 213 |     def lexspan(self, n):
 214 |         startpos = getattr(self.slice[n], 'lexpos', 0)
 215 |         endpos = getattr(self.slice[n], 'endlexpos', startpos)
 216 |         return startpos, endpos
 217 | 
 218 |     def error(self):
 219 |         raise SyntaxError
 220 | 
 221 | # -----------------------------------------------------------------------------
 222 | #                               == LRParser ==
 223 | #
 224 | # The LR Parsing engine.
 225 | # -----------------------------------------------------------------------------
 226 | 
 227 | class LRParser:
 228 |     def __init__(self, lrtab, errorf):
 229 |         self.productions = lrtab.lr_productions
 230 |         self.action = lrtab.lr_action
 231 |         self.goto = lrtab.lr_goto
 232 |         self.errorfunc = errorf
 233 |         self.set_defaulted_states()
 234 |         self.errorok = True
 235 | 
 236 |     def errok(self):
 237 |         self.errorok = True
 238 | 
 239 |     def restart(self):
 240 |         del self.statestack[:]
 241 |         del self.symstack[:]
 242 |         sym = YaccSymbol()
 243 |         sym.type = '$end'
 244 |         self.symstack.append(sym)
 245 |         self.statestack.append(0)
 246 | 
 247 |     # Defaulted state support.
 248 |     # This method identifies parser states where there is only one possible reduction action.
 249 |     # For such states, the parser can make a choose to make a rule reduction without consuming
 250 |     # the next look-ahead token.  This delayed invocation of the tokenizer can be useful in
 251 |     # certain kinds of advanced parsing situations where the lexer and parser interact with
 252 |     # each other or change states (i.e., manipulation of scope, lexer states, etc.).
 253 |     #
 254 |     # See:  http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
 255 |     def set_defaulted_states(self):
 256 |         self.defaulted_states = {}
 257 |         for state, actions in self.action.items():
 258 |             rules = list(actions.values())
 259 |             if len(rules) == 1 and rules[0] < 0:
 260 |                 self.defaulted_states[state] = rules[0]
 261 | 
 262 |     def disable_defaulted_states(self):
 263 |         self.defaulted_states = {}
 264 | 
 265 |     # parse().
 266 |     #
 267 |     # This is the core parsing engine.  To operate, it requires a lexer object.
 268 |     # Two options are provided.  The debug flag turns on debugging so that you can
 269 |     # see the various rule reductions and parsing steps.  tracking turns on position
 270 |     # tracking.  In this mode, symbols will record the starting/ending line number and
 271 |     # character index.
 272 | 
 273 |     def parse(self, input=None, lexer=None, debug=False, tracking=False):
 274 |         # If debugging has been specified as a flag, turn it into a logging object
 275 |         if isinstance(debug, int) and debug:
 276 |             debug = PlyLogger(sys.stderr)
 277 | 
 278 |         lookahead = None                         # Current lookahead symbol
 279 |         lookaheadstack = []                      # Stack of lookahead symbols
 280 |         actions = self.action                    # Local reference to action table (to avoid lookup on self.)
 281 |         goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
 282 |         prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
 283 |         defaulted_states = self.defaulted_states # Local reference to defaulted states
 284 |         pslice  = YaccProduction(None)           # Production object passed to grammar rules
 285 |         errorcount = 0                           # Used during error recovery
 286 | 
 287 |         if debug:
 288 |             debug.info('PLY: PARSE DEBUG START')
 289 | 
 290 |         # If no lexer was given, we will try to use the lex module
 291 |         if not lexer:
 292 |             from . import lex
 293 |             lexer = lex.lexer
 294 | 
 295 |         # Set up the lexer and parser objects on pslice
 296 |         pslice.lexer = lexer
 297 |         pslice.parser = self
 298 | 
 299 |         # If input was supplied, pass to lexer
 300 |         if input is not None:
 301 |             lexer.input(input)
 302 | 
 303 |         # Set the token function
 304 |         get_token = self.token = lexer.token
 305 | 
 306 |         # Set up the state and symbol stacks
 307 |         statestack = self.statestack = []   # Stack of parsing states
 308 |         symstack = self.symstack = []       # Stack of grammar symbols
 309 |         pslice.stack = symstack             # Put in the production
 310 |         errtoken   = None                   # Err token
 311 | 
 312 |         # The start state is assumed to be (0,$end)
 313 | 
 314 |         statestack.append(0)
 315 |         sym = YaccSymbol()
 316 |         sym.type = '$end'
 317 |         symstack.append(sym)
 318 |         state = 0
 319 |         while True:
 320 |             # Get the next symbol on the input.  If a lookahead symbol
 321 |             # is already set, we just use that. Otherwise, we'll pull
 322 |             # the next token off of the lookaheadstack or from the lexer
 323 | 
 324 |             if debug:
 325 |                 debug.debug('State  : %s', state)
 326 | 
 327 |             if state not in defaulted_states:
 328 |                 if not lookahead:
 329 |                     if not lookaheadstack:
 330 |                         lookahead = get_token()     # Get the next token
 331 |                     else:
 332 |                         lookahead = lookaheadstack.pop()
 333 |                     if not lookahead:
 334 |                         lookahead = YaccSymbol()
 335 |                         lookahead.type = '$end'
 336 | 
 337 |                 # Check the action table
 338 |                 ltype = lookahead.type
 339 |                 t = actions[state].get(ltype)
 340 |             else:
 341 |                 t = defaulted_states[state]
 342 |                 if debug:
 343 |                     debug.debug('Defaulted state %s: Reduce using %d', state, -t)
 344 | 
 345 |             if debug:
 346 |                 debug.debug('Stack  : %s',
 347 |                             ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 348 | 
 349 |             if t is not None:
 350 |                 if t > 0:
 351 |                     # shift a symbol on the stack
 352 |                     statestack.append(t)
 353 |                     state = t
 354 | 
 355 |                     if debug:
 356 |                         debug.debug('Action : Shift and goto state %s', t)
 357 | 
 358 |                     symstack.append(lookahead)
 359 |                     lookahead = None
 360 | 
 361 |                     # Decrease error count on successful shift
 362 |                     if errorcount:
 363 |                         errorcount -= 1
 364 |                     continue
 365 | 
 366 |                 if t < 0:
 367 |                     # reduce a symbol on the stack, emit a production
 368 |                     p = prod[-t]
 369 |                     pname = p.name
 370 |                     plen  = p.len
 371 | 
 372 |                     # Get production function
 373 |                     sym = YaccSymbol()
 374 |                     sym.type = pname       # Production name
 375 |                     sym.value = None
 376 | 
 377 |                     if debug:
 378 |                         if plen:
 379 |                             debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
 380 |                                        '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
 381 |                                        goto[statestack[-1-plen]][pname])
 382 |                         else:
 383 |                             debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
 384 |                                        goto[statestack[-1]][pname])
 385 | 
 386 |                     if plen:
 387 |                         targ = symstack[-plen-1:]
 388 |                         targ[0] = sym
 389 | 
 390 |                         if tracking:
 391 |                             t1 = targ[1]
 392 |                             sym.lineno = t1.lineno
 393 |                             sym.lexpos = t1.lexpos
 394 |                             t1 = targ[-1]
 395 |                             sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
 396 |                             sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
 397 | 
 398 |                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 399 |                         # The code enclosed in this section is duplicated
 400 |                         # below as a performance optimization.  Make sure
 401 |                         # changes get made in both locations.
 402 | 
 403 |                         pslice.slice = targ
 404 | 
 405 |                         try:
 406 |                             # Call the grammar rule with our special slice object
 407 |                             del symstack[-plen:]
 408 |                             self.state = state
 409 |                             p.callable(pslice)
 410 |                             del statestack[-plen:]
 411 |                             if debug:
 412 |                                 debug.info('Result : %s', format_result(pslice[0]))
 413 |                             symstack.append(sym)
 414 |                             state = goto[statestack[-1]][pname]
 415 |                             statestack.append(state)
 416 |                         except SyntaxError:
 417 |                             # If an error was set. Enter error recovery state
 418 |                             lookaheadstack.append(lookahead)    # Save the current lookahead token
 419 |                             symstack.extend(targ[1:-1])         # Put the production slice back on the stack
 420 |                             statestack.pop()                    # Pop back one state (before the reduce)
 421 |                             state = statestack[-1]
 422 |                             sym.type = 'error'
 423 |                             sym.value = 'error'
 424 |                             lookahead = sym
 425 |                             errorcount = error_count
 426 |                             self.errorok = False
 427 | 
 428 |                         continue
 429 | 
 430 |                     else:
 431 | 
 432 |                         if tracking:
 433 |                             sym.lineno = lexer.lineno
 434 |                             sym.lexpos = lexer.lexpos
 435 | 
 436 |                         targ = [sym]
 437 | 
 438 |                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 439 |                         # The code enclosed in this section is duplicated
 440 |                         # above as a performance optimization.  Make sure
 441 |                         # changes get made in both locations.
 442 | 
 443 |                         pslice.slice = targ
 444 | 
 445 |                         try:
 446 |                             # Call the grammar rule with our special slice object
 447 |                             self.state = state
 448 |                             p.callable(pslice)
 449 |                             if debug:
 450 |                                 debug.info('Result : %s', format_result(pslice[0]))
 451 |                             symstack.append(sym)
 452 |                             state = goto[statestack[-1]][pname]
 453 |                             statestack.append(state)
 454 |                         except SyntaxError:
 455 |                             # If an error was set. Enter error recovery state
 456 |                             lookaheadstack.append(lookahead)    # Save the current lookahead token
 457 |                             statestack.pop()                    # Pop back one state (before the reduce)
 458 |                             state = statestack[-1]
 459 |                             sym.type = 'error'
 460 |                             sym.value = 'error'
 461 |                             lookahead = sym
 462 |                             errorcount = error_count
 463 |                             self.errorok = False
 464 | 
 465 |                         continue
 466 | 
 467 |                 if t == 0:
 468 |                     n = symstack[-1]
 469 |                     result = getattr(n, 'value', None)
 470 | 
 471 |                     if debug:
 472 |                         debug.info('Done   : Returning %s', format_result(result))
 473 |                         debug.info('PLY: PARSE DEBUG END')
 474 | 
 475 |                     return result
 476 | 
 477 |             if t is None:
 478 | 
 479 |                 if debug:
 480 |                     debug.error('Error  : %s',
 481 |                                 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 482 | 
 483 |                 # We have some kind of parsing error here.  To handle
 484 |                 # this, we are going to push the current token onto
 485 |                 # the tokenstack and replace it with an 'error' token.
 486 |                 # If there are any synchronization rules, they may
 487 |                 # catch it.
 488 |                 #
 489 |                 # In addition to pushing the error token, we call call
 490 |                 # the user defined p_error() function if this is the
 491 |                 # first syntax error.  This function is only called if
 492 |                 # errorcount == 0.
 493 |                 if errorcount == 0 or self.errorok:
 494 |                     errorcount = error_count
 495 |                     self.errorok = False
 496 |                     errtoken = lookahead
 497 |                     if errtoken.type == '$end':
 498 |                         errtoken = None               # End of file!
 499 |                     if self.errorfunc:
 500 |                         if errtoken and not hasattr(errtoken, 'lexer'):
 501 |                             errtoken.lexer = lexer
 502 |                         self.state = state
 503 |                         tok = self.errorfunc(errtoken)
 504 |                         if self.errorok:
 505 |                             # User must have done some kind of panic
 506 |                             # mode recovery on their own.  The
 507 |                             # returned token is the next lookahead
 508 |                             lookahead = tok
 509 |                             errtoken = None
 510 |                             continue
 511 |                     else:
 512 |                         if errtoken:
 513 |                             if hasattr(errtoken, 'lineno'):
 514 |                                 lineno = lookahead.lineno
 515 |                             else:
 516 |                                 lineno = 0
 517 |                             if lineno:
 518 |                                 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
 519 |                             else:
 520 |                                 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
 521 |                         else:
 522 |                             sys.stderr.write('yacc: Parse error in input. EOF\n')
 523 |                             return
 524 | 
 525 |                 else:
 526 |                     errorcount = error_count
 527 | 
 528 |                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 529 |                 # entire parse has been rolled back and we're completely hosed.   The token is
 530 |                 # discarded and we just keep going.
 531 | 
 532 |                 if len(statestack) <= 1 and lookahead.type != '$end':
 533 |                     lookahead = None
 534 |                     errtoken = None
 535 |                     state = 0
 536 |                     # Nuke the pushback stack
 537 |                     del lookaheadstack[:]
 538 |                     continue
 539 | 
 540 |                 # case 2: the statestack has a couple of entries on it, but we're
 541 |                 # at the end of the file. nuke the top entry and generate an error token
 542 | 
 543 |                 # Start nuking entries on the stack
 544 |                 if lookahead.type == '$end':
 545 |                     # Whoa. We're really hosed here. Bail out
 546 |                     return
 547 | 
 548 |                 if lookahead.type != 'error':
 549 |                     sym = symstack[-1]
 550 |                     if sym.type == 'error':
 551 |                         # Hmmm. Error is on top of stack, we'll just nuke input
 552 |                         # symbol and continue
 553 |                         if tracking:
 554 |                             sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
 555 |                             sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
 556 |                         lookahead = None
 557 |                         continue
 558 | 
 559 |                     # Create the error symbol for the first time and make it the new lookahead symbol
 560 |                     t = YaccSymbol()
 561 |                     t.type = 'error'
 562 | 
 563 |                     if hasattr(lookahead, 'lineno'):
 564 |                         t.lineno = t.endlineno = lookahead.lineno
 565 |                     if hasattr(lookahead, 'lexpos'):
 566 |                         t.lexpos = t.endlexpos = lookahead.lexpos
 567 |                     t.value = lookahead
 568 |                     lookaheadstack.append(lookahead)
 569 |                     lookahead = t
 570 |                 else:
 571 |                     sym = symstack.pop()
 572 |                     if tracking:
 573 |                         lookahead.lineno = sym.lineno
 574 |                         lookahead.lexpos = sym.lexpos
 575 |                     statestack.pop()
 576 |                     state = statestack[-1]
 577 | 
 578 |                 continue
 579 | 
 580 |             # If we'r here, something really bad happened
 581 |             raise RuntimeError('yacc: internal parser error!!!\n')
 582 | 
 583 | # -----------------------------------------------------------------------------
 584 | #                          === Grammar Representation ===
 585 | #
 586 | # The following functions, classes, and variables are used to represent and
 587 | # manipulate the rules that make up a grammar.
 588 | # -----------------------------------------------------------------------------
 589 | 
 590 | # regex matching identifiers
 591 | _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
 592 | 
 593 | # -----------------------------------------------------------------------------
 594 | # class Production:
 595 | #
 596 | # This class stores the raw information about a single production or grammar rule.
 597 | # A grammar rule refers to a specification such as this:
 598 | #
 599 | #       expr : expr PLUS term
 600 | #
 601 | # Here are the basic attributes defined on all productions
 602 | #
 603 | #       name     - Name of the production.  For example 'expr'
 604 | #       prod     - A list of symbols on the right side ['expr','PLUS','term']
 605 | #       prec     - Production precedence level
 606 | #       number   - Production number.
 607 | #       func     - Function that executes on reduce
 608 | #       file     - File where production function is defined
 609 | #       lineno   - Line number where production function is defined
 610 | #
 611 | # The following attributes are defined or optional.
 612 | #
 613 | #       len       - Length of the production (number of symbols on right hand side)
 614 | #       usyms     - Set of unique symbols found in the production
 615 | # -----------------------------------------------------------------------------
 616 | 
 617 | class Production(object):
 618 |     reduced = 0
 619 |     def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
 620 |         self.name     = name
 621 |         self.prod     = tuple(prod)
 622 |         self.number   = number
 623 |         self.func     = func
 624 |         self.callable = None
 625 |         self.file     = file
 626 |         self.line     = line
 627 |         self.prec     = precedence
 628 | 
 629 |         # Internal settings used during table construction
 630 | 
 631 |         self.len  = len(self.prod)   # Length of the production
 632 | 
 633 |         # Create a list of unique production symbols used in the production
 634 |         self.usyms = []
 635 |         for s in self.prod:
 636 |             if s not in self.usyms:
 637 |                 self.usyms.append(s)
 638 | 
 639 |         # List of all LR items for the production
 640 |         self.lr_items = []
 641 |         self.lr_next = None
 642 | 
 643 |         # Create a string representation
 644 |         if self.prod:
 645 |             self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
 646 |         else:
 647 |             self.str = '%s -> <empty>' % self.name
 648 | 
 649 |     def __str__(self):
 650 |         return self.str
 651 | 
 652 |     def __repr__(self):
 653 |         return 'Production(' + str(self) + ')'
 654 | 
 655 |     def __len__(self):
 656 |         return len(self.prod)
 657 | 
 658 |     def __nonzero__(self):
 659 |         return 1
 660 | 
 661 |     def __getitem__(self, index):
 662 |         return self.prod[index]
 663 | 
 664 |     # Return the nth lr_item from the production (or None if at the end)
 665 |     def lr_item(self, n):
 666 |         if n > len(self.prod):
 667 |             return None
 668 |         p = LRItem(self, n)
 669 |         # Precompute the list of productions immediately following.
 670 |         try:
 671 |             p.lr_after = self.Prodnames[p.prod[n+1]]
 672 |         except (IndexError, KeyError):
 673 |             p.lr_after = []
 674 |         try:
 675 |             p.lr_before = p.prod[n-1]
 676 |         except IndexError:
 677 |             p.lr_before = None
 678 |         return p
 679 | 
 680 |     # Bind the production function name to a callable
 681 |     def bind(self, pdict):
 682 |         if self.func:
 683 |             self.callable = pdict[self.func]
 684 | 
 685 | # -----------------------------------------------------------------------------
 686 | # class LRItem
 687 | #
 688 | # This class represents a specific stage of parsing a production rule.  For
 689 | # example:
 690 | #
 691 | #       expr : expr . PLUS term
 692 | #
 693 | # In the above, the "." represents the current location of the parse.  Here
 694 | # basic attributes:
 695 | #
 696 | #       name       - Name of the production.  For example 'expr'
 697 | #       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
 698 | #       number     - Production number.
 699 | #
 700 | #       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
 701 | #                    then lr_next refers to 'expr -> expr PLUS . term'
 702 | #       lr_index   - LR item index (location of the ".") in the prod list.
 703 | #       lookaheads - LALR lookahead symbols for this item
 704 | #       len        - Length of the production (number of symbols on right hand side)
 705 | #       lr_after    - List of all productions that immediately follow
 706 | #       lr_before   - Grammar symbol immediately before
 707 | # -----------------------------------------------------------------------------
 708 | 
 709 | class LRItem(object):
 710 |     def __init__(self, p, n):
 711 |         self.name       = p.name
 712 |         self.prod       = list(p.prod)
 713 |         self.number     = p.number
 714 |         self.lr_index   = n
 715 |         self.lookaheads = {}
 716 |         self.prod.insert(n, '.')
 717 |         self.prod       = tuple(self.prod)
 718 |         self.len        = len(self.prod)
 719 |         self.usyms      = p.usyms
 720 | 
 721 |     def __str__(self):
 722 |         if self.prod:
 723 |             s = '%s -> %s' % (self.name, ' '.join(self.prod))
 724 |         else:
 725 |             s = '%s -> <empty>' % self.name
 726 |         return s
 727 | 
 728 |     def __repr__(self):
 729 |         return 'LRItem(' + str(self) + ')'
 730 | 
 731 | # -----------------------------------------------------------------------------
 732 | # rightmost_terminal()
 733 | #
 734 | # Return the rightmost terminal from a list of symbols.  Used in add_production()
 735 | # -----------------------------------------------------------------------------
 736 | def rightmost_terminal(symbols, terminals):
 737 |     i = len(symbols) - 1
 738 |     while i >= 0:
 739 |         if symbols[i] in terminals:
 740 |             return symbols[i]
 741 |         i -= 1
 742 |     return None
 743 | 
 744 | # -----------------------------------------------------------------------------
 745 | #                           === GRAMMAR CLASS ===
 746 | #
 747 | # The following class represents the contents of the specified grammar along
 748 | # with various computed properties such as first sets, follow sets, LR items, etc.
 749 | # This data is used for critical parts of the table generation process later.
 750 | # -----------------------------------------------------------------------------
 751 | 
 752 | class GrammarError(YaccError):
 753 |     pass
 754 | 
 755 | class Grammar(object):
 756 |     def __init__(self, terminals):
 757 |         self.Productions  = [None]  # A list of all of the productions.  The first
 758 |                                     # entry is always reserved for the purpose of
 759 |                                     # building an augmented grammar
 760 | 
 761 |         self.Prodnames    = {}      # A dictionary mapping the names of nonterminals to a list of all
 762 |                                     # productions of that nonterminal.
 763 | 
 764 |         self.Prodmap      = {}      # A dictionary that is only used to detect duplicate
 765 |                                     # productions.
 766 | 
 767 |         self.Terminals    = {}      # A dictionary mapping the names of terminal symbols to a
 768 |                                     # list of the rules where they are used.
 769 | 
 770 |         for term in terminals:
 771 |             self.Terminals[term] = []
 772 | 
 773 |         self.Terminals['error'] = []
 774 | 
 775 |         self.Nonterminals = {}      # A dictionary mapping names of nonterminals to a list
 776 |                                     # of rule numbers where they are used.
 777 | 
 778 |         self.First        = {}      # A dictionary of precomputed FIRST(x) symbols
 779 | 
 780 |         self.Follow       = {}      # A dictionary of precomputed FOLLOW(x) symbols
 781 | 
 782 |         self.Precedence   = {}      # Precedence rules for each terminal. Contains tuples of the
 783 |                                     # form ('right',level) or ('nonassoc', level) or ('left',level)
 784 | 
 785 |         self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
 786 |                                     # This is only used to provide error checking and to generate
 787 |                                     # a warning about unused precedence rules.
 788 | 
 789 |         self.Start = None           # Starting symbol for the grammar
 790 | 
 791 | 
 792 |     def __len__(self):
 793 |         return len(self.Productions)
 794 | 
 795 |     def __getitem__(self, index):
 796 |         return self.Productions[index]
 797 | 
 798 |     # -----------------------------------------------------------------------------
 799 |     # set_precedence()
 800 |     #
 801 |     # Sets the precedence for a given terminal. assoc is the associativity such as
 802 |     # 'left','right', or 'nonassoc'.  level is a numeric level.
 803 |     #
 804 |     # -----------------------------------------------------------------------------
 805 | 
 806 |     def set_precedence(self, term, assoc, level):
 807 |         assert self.Productions == [None], 'Must call set_precedence() before add_production()'
 808 |         if term in self.Precedence:
 809 |             raise GrammarError('Precedence already specified for terminal %r' % term)
 810 |         if assoc not in ['left', 'right', 'nonassoc']:
 811 |             raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
 812 |         self.Precedence[term] = (assoc, level)
 813 | 
 814 |     # -----------------------------------------------------------------------------
 815 |     # add_production()
 816 |     #
 817 |     # Given an action function, this function assembles a production rule and
 818 |     # computes its precedence level.
 819 |     #
 820 |     # The production rule is supplied as a list of symbols.   For example,
 821 |     # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
 822 |     # symbols ['expr','PLUS','term'].
 823 |     #
 824 |     # Precedence is determined by the precedence of the right-most non-terminal
 825 |     # or the precedence of a terminal specified by %prec.
 826 |     #
 827 |     # A variety of error checks are performed to make sure production symbols
 828 |     # are valid and that %prec is used correctly.
 829 |     # -----------------------------------------------------------------------------
 830 | 
 831 |     def add_production(self, prodname, syms, func=None, file='', line=0):
 832 | 
 833 |         if prodname in self.Terminals:
 834 |             raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
 835 |         if prodname == 'error':
 836 |             raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
 837 |         if not _is_identifier.match(prodname):
 838 |             raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
 839 | 
 840 |         # Look for literal tokens
 841 |         for n, s in enumerate(syms):
 842 |             if s[0] in "'\"":
 843 |                 try:
 844 |                     c = eval(s)
 845 |                     if (len(c) > 1):
 846 |                         raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
 847 |                                            (file, line, s, prodname))
 848 |                     if c not in self.Terminals:
 849 |                         self.Terminals[c] = []
 850 |                     syms[n] = c
 851 |                     continue
 852 |                 except SyntaxError:
 853 |                     pass
 854 |             if not _is_identifier.match(s) and s != '%prec':
 855 |                 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
 856 | 
 857 |         # Determine the precedence level
 858 |         if '%prec' in syms:
 859 |             if syms[-1] == '%prec':
 860 |                 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
 861 |             if syms[-2] != '%prec':
 862 |                 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
 863 |                                    (file, line))
 864 |             precname = syms[-1]
 865 |             prodprec = self.Precedence.get(precname)
 866 |             if not prodprec:
 867 |                 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
 868 |             else:
 869 |                 self.UsedPrecedence.add(precname)
 870 |             del syms[-2:]     # Drop %prec from the rule
 871 |         else:
 872 |             # If no %prec, precedence is determined by the rightmost terminal symbol
 873 |             precname = rightmost_terminal(syms, self.Terminals)
 874 |             prodprec = self.Precedence.get(precname, ('right', 0))
 875 | 
 876 |         # See if the rule is already in the rulemap
 877 |         map = '%s -> %s' % (prodname, syms)
 878 |         if map in self.Prodmap:
 879 |             m = self.Prodmap[map]
 880 |             raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
 881 |                                'Previous definition at %s:%d' % (m.file, m.line))
 882 | 
 883 |         # From this point on, everything is valid.  Create a new Production instance
 884 |         pnumber  = len(self.Productions)
 885 |         if prodname not in self.Nonterminals:
 886 |             self.Nonterminals[prodname] = []
 887 | 
 888 |         # Add the production number to Terminals and Nonterminals
 889 |         for t in syms:
 890 |             if t in self.Terminals:
 891 |                 self.Terminals[t].append(pnumber)
 892 |             else:
 893 |                 if t not in self.Nonterminals:
 894 |                     self.Nonterminals[t] = []
 895 |                 self.Nonterminals[t].append(pnumber)
 896 | 
 897 |         # Create a production and add it to the list of productions
 898 |         p = Production(pnumber, prodname, syms, prodprec, func, file, line)
 899 |         self.Productions.append(p)
 900 |         self.Prodmap[map] = p
 901 | 
 902 |         # Add to the global productions list
 903 |         try:
 904 |             self.Prodnames[prodname].append(p)
 905 |         except KeyError:
 906 |             self.Prodnames[prodname] = [p]
 907 | 
 908 |     # -----------------------------------------------------------------------------
 909 |     # set_start()
 910 |     #
 911 |     # Sets the starting symbol and creates the augmented grammar.  Production
 912 |     # rule 0 is S' -> start where start is the start symbol.
 913 |     # -----------------------------------------------------------------------------
 914 | 
 915 |     def set_start(self, start=None):
 916 |         if not start:
 917 |             start = self.Productions[1].name
 918 |         if start not in self.Nonterminals:
 919 |             raise GrammarError('start symbol %s undefined' % start)
 920 |         self.Productions[0] = Production(0, "S'", [start])
 921 |         self.Nonterminals[start].append(0)
 922 |         self.Start = start
 923 | 
 924 |     # -----------------------------------------------------------------------------
 925 |     # find_unreachable()
 926 |     #
 927 |     # Find all of the nonterminal symbols that can't be reached from the starting
 928 |     # symbol.  Returns a list of nonterminals that can't be reached.
 929 |     # -----------------------------------------------------------------------------
 930 | 
 931 |     def find_unreachable(self):
 932 | 
 933 |         # Mark all symbols that are reachable from a symbol s
 934 |         def mark_reachable_from(s):
 935 |             if s in reachable:
 936 |                 return
 937 |             reachable.add(s)
 938 |             for p in self.Prodnames.get(s, []):
 939 |                 for r in p.prod:
 940 |                     mark_reachable_from(r)
 941 | 
 942 |         reachable = set()
 943 |         mark_reachable_from(self.Productions[0].prod[0])
 944 |         return [s for s in self.Nonterminals if s not in reachable]
 945 | 
 946 |     # -----------------------------------------------------------------------------
 947 |     # infinite_cycles()
 948 |     #
 949 |     # This function looks at the various parsing rules and tries to detect
 950 |     # infinite recursion cycles (grammar rules where there is no possible way
 951 |     # to derive a string of only terminals).
 952 |     # -----------------------------------------------------------------------------
 953 | 
 954 |     def infinite_cycles(self):
 955 |         terminates = {}
 956 | 
 957 |         # Terminals:
 958 |         for t in self.Terminals:
 959 |             terminates[t] = True
 960 | 
 961 |         terminates['$end'] = True
 962 | 
 963 |         # Nonterminals:
 964 | 
 965 |         # Initialize to false:
 966 |         for n in self.Nonterminals:
 967 |             terminates[n] = False
 968 | 
 969 |         # Then propagate termination until no change:
 970 |         while True:
 971 |             some_change = False
 972 |             for (n, pl) in self.Prodnames.items():
 973 |                 # Nonterminal n terminates iff any of its productions terminates.
 974 |                 for p in pl:
 975 |                     # Production p terminates iff all of its rhs symbols terminate.
 976 |                     for s in p.prod:
 977 |                         if not terminates[s]:
 978 |                             # The symbol s does not terminate,
 979 |                             # so production p does not terminate.
 980 |                             p_terminates = False
 981 |                             break
 982 |                     else:
 983 |                         # didn't break from the loop,
 984 |                         # so every symbol s terminates
 985 |                         # so production p terminates.
 986 |                         p_terminates = True
 987 | 
 988 |                     if p_terminates:
 989 |                         # symbol n terminates!
 990 |                         if not terminates[n]:
 991 |                             terminates[n] = True
 992 |                             some_change = True
 993 |                         # Don't need to consider any more productions for this n.
 994 |                         break
 995 | 
 996 |             if not some_change:
 997 |                 break
 998 | 
 999 |         infinite = []
1000 |         for (s, term) in terminates.items():
1001 |             if not term:
1002 |                 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1003 |                     # s is used-but-not-defined, and we've already warned of that,
1004 |                     # so it would be overkill to say that it's also non-terminating.
1005 |                     pass
1006 |                 else:
1007 |                     infinite.append(s)
1008 | 
1009 |         return infinite
1010 | 
1011 |     # -----------------------------------------------------------------------------
1012 |     # undefined_symbols()
1013 |     #
1014 |     # Find all symbols that were used the grammar, but not defined as tokens or
1015 |     # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
1016 |     # and prod is the production where the symbol was used.
1017 |     # -----------------------------------------------------------------------------
1018 |     def undefined_symbols(self):
1019 |         result = []
1020 |         for p in self.Productions:
1021 |             if not p:
1022 |                 continue
1023 | 
1024 |             for s in p.prod:
1025 |                 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1026 |                     result.append((s, p))
1027 |         return result
1028 | 
1029 |     # -----------------------------------------------------------------------------
1030 |     # unused_terminals()
1031 |     #
1032 |     # Find all terminals that were defined, but not used by the grammar.  Returns
1033 |     # a list of all symbols.
1034 |     # -----------------------------------------------------------------------------
1035 |     def unused_terminals(self):
1036 |         unused_tok = []
1037 |         for s, v in self.Terminals.items():
1038 |             if s != 'error' and not v:
1039 |                 unused_tok.append(s)
1040 | 
1041 |         return unused_tok
1042 | 
1043 |     # ------------------------------------------------------------------------------
1044 |     # unused_rules()
1045 |     #
1046 |     # Find all grammar rules that were defined,  but not used (maybe not reachable)
1047 |     # Returns a list of productions.
1048 |     # ------------------------------------------------------------------------------
1049 | 
1050 |     def unused_rules(self):
1051 |         unused_prod = []
1052 |         for s, v in self.Nonterminals.items():
1053 |             if not v:
1054 |                 p = self.Prodnames[s][0]
1055 |                 unused_prod.append(p)
1056 |         return unused_prod
1057 | 
1058 |     # -----------------------------------------------------------------------------
1059 |     # unused_precedence()
1060 |     #
1061 |     # Returns a list of tuples (term,precedence) corresponding to precedence
1062 |     # rules that were never used by the grammar.  term is the name of the terminal
1063 |     # on which precedence was applied and precedence is a string such as 'left' or
1064 |     # 'right' corresponding to the type of precedence.
1065 |     # -----------------------------------------------------------------------------
1066 | 
1067 |     def unused_precedence(self):
1068 |         unused = []
1069 |         for termname in self.Precedence:
1070 |             if not (termname in self.Terminals or termname in self.UsedPrecedence):
1071 |                 unused.append((termname, self.Precedence[termname][0]))
1072 | 
1073 |         return unused
1074 | 
1075 |     # -------------------------------------------------------------------------
1076 |     # _first()
1077 |     #
1078 |     # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1079 |     #
1080 |     # During execution of compute_first1, the result may be incomplete.
1081 |     # Afterward (e.g., when called from compute_follow()), it will be complete.
1082 |     # -------------------------------------------------------------------------
1083 |     def _first(self, beta):
1084 | 
1085 |         # We are computing First(x1,x2,x3,...,xn)
1086 |         result = []
1087 |         for x in beta:
1088 |             x_produces_empty = False
1089 | 
1090 |             # Add all the non-<empty> symbols of First[x] to the result.
1091 |             for f in self.First[x]:
1092 |                 if f == '<empty>':
1093 |                     x_produces_empty = True
1094 |                 else:
1095 |                     if f not in result:
1096 |                         result.append(f)
1097 | 
1098 |             if x_produces_empty:
1099 |                 # We have to consider the next x in beta,
1100 |                 # i.e. stay in the loop.
1101 |                 pass
1102 |             else:
1103 |                 # We don't have to consider any further symbols in beta.
1104 |                 break
1105 |         else:
1106 |             # There was no 'break' from the loop,
1107 |             # so x_produces_empty was true for all x in beta,
1108 |             # so beta produces empty as well.
1109 |             result.append('<empty>')
1110 | 
1111 |         return result
1112 | 
1113 |     # -------------------------------------------------------------------------
1114 |     # compute_first()
1115 |     #
1116 |     # Compute the value of FIRST1(X) for all symbols
1117 |     # -------------------------------------------------------------------------
1118 |     def compute_first(self):
1119 |         if self.First:
1120 |             return self.First
1121 | 
1122 |         # Terminals:
1123 |         for t in self.Terminals:
1124 |             self.First[t] = [t]
1125 | 
1126 |         self.First['$end'] = ['$end']
1127 | 
1128 |         # Nonterminals:
1129 | 
1130 |         # Initialize to the empty set:
1131 |         for n in self.Nonterminals:
1132 |             self.First[n] = []
1133 | 
1134 |         # Then propagate symbols until no change:
1135 |         while True:
1136 |             some_change = False
1137 |             for n in self.Nonterminals:
1138 |                 for p in self.Prodnames[n]:
1139 |                     for f in self._first(p.prod):
1140 |                         if f not in self.First[n]:
1141 |                             self.First[n].append(f)
1142 |                             some_change = True
1143 |             if not some_change:
1144 |                 break
1145 | 
1146 |         return self.First
1147 | 
1148 |     # ---------------------------------------------------------------------
1149 |     # compute_follow()
1150 |     #
1151 |     # Computes all of the follow sets for every non-terminal symbol.  The
1152 |     # follow set is the set of all symbols that might follow a given
1153 |     # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
1154 |     # ---------------------------------------------------------------------
1155 |     def compute_follow(self, start=None):
1156 |         # If already computed, return the result
1157 |         if self.Follow:
1158 |             return self.Follow
1159 | 
1160 |         # If first sets not computed yet, do that first.
1161 |         if not self.First:
1162 |             self.compute_first()
1163 | 
1164 |         # Add '$end' to the follow list of the start symbol
1165 |         for k in self.Nonterminals:
1166 |             self.Follow[k] = []
1167 | 
1168 |         if not start:
1169 |             start = self.Productions[1].name
1170 | 
1171 |         self.Follow[start] = ['$end']
1172 | 
1173 |         while True:
1174 |             didadd = False
1175 |             for p in self.Productions[1:]:
1176 |                 # Here is the production set
1177 |                 for i, B in enumerate(p.prod):
1178 |                     if B in self.Nonterminals:
1179 |                         # Okay. We got a non-terminal in a production
1180 |                         fst = self._first(p.prod[i+1:])
1181 |                         hasempty = False
1182 |                         for f in fst:
1183 |                             if f != '<empty>' and f not in self.Follow[B]:
1184 |                                 self.Follow[B].append(f)
1185 |                                 didadd = True
1186 |                             if f == '<empty>':
1187 |                                 hasempty = True
1188 |                         if hasempty or i == (len(p.prod)-1):
1189 |                             # Add elements of follow(a) to follow(b)
1190 |                             for f in self.Follow[p.name]:
1191 |                                 if f not in self.Follow[B]:
1192 |                                     self.Follow[B].append(f)
1193 |                                     didadd = True
1194 |             if not didadd:
1195 |                 break
1196 |         return self.Follow
1197 | 
1198 | 
1199 |     # -----------------------------------------------------------------------------
1200 |     # build_lritems()
1201 |     #
1202 |     # This function walks the list of productions and builds a complete set of the
1203 |     # LR items.  The LR items are stored in two ways:  First, they are uniquely
1204 |     # numbered and placed in the list _lritems.  Second, a linked list of LR items
1205 |     # is built for each production.  For example:
1206 |     #
1207 |     #   E -> E PLUS E
1208 |     #
1209 |     # Creates the list
1210 |     #
1211 |     #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1212 |     # -----------------------------------------------------------------------------
1213 | 
1214 |     def build_lritems(self):
1215 |         for p in self.Productions:
1216 |             lastlri = p
1217 |             i = 0
1218 |             lr_items = []
1219 |             while True:
1220 |                 if i > len(p):
1221 |                     lri = None
1222 |                 else:
1223 |                     lri = LRItem(p, i)
1224 |                     # Precompute the list of productions immediately following
1225 |                     try:
1226 |                         lri.lr_after = self.Prodnames[lri.prod[i+1]]
1227 |                     except (IndexError, KeyError):
1228 |                         lri.lr_after = []
1229 |                     try:
1230 |                         lri.lr_before = lri.prod[i-1]
1231 |                     except IndexError:
1232 |                         lri.lr_before = None
1233 | 
1234 |                 lastlri.lr_next = lri
1235 |                 if not lri:
1236 |                     break
1237 |                 lr_items.append(lri)
1238 |                 lastlri = lri
1239 |                 i += 1
1240 |             p.lr_items = lr_items
1241 | 
1242 | # -----------------------------------------------------------------------------
1243 | #                           === LR Generator ===
1244 | #
1245 | # The following classes and functions are used to generate LR parsing tables on
1246 | # a grammar.
1247 | # -----------------------------------------------------------------------------
1248 | 
1249 | # -----------------------------------------------------------------------------
1250 | # digraph()
1251 | # traverse()
1252 | #
1253 | # The following two functions are used to compute set valued functions
1254 | # of the form:
1255 | #
1256 | #     F(x) = F'(x) U U{F(y) | x R y}
1257 | #
1258 | # This is used to compute the values of Read() sets as well as FOLLOW sets
1259 | # in LALR(1) generation.
1260 | #
1261 | # Inputs:  X    - An input set
1262 | #          R    - A relation
1263 | #          FP   - Set-valued function
1264 | # ------------------------------------------------------------------------------
1265 | 
1266 | def digraph(X, R, FP):
1267 |     N = {}
1268 |     for x in X:
1269 |         N[x] = 0
1270 |     stack = []
1271 |     F = {}
1272 |     for x in X:
1273 |         if N[x] == 0:
1274 |             traverse(x, N, stack, F, X, R, FP)
1275 |     return F
1276 | 
1277 | def traverse(x, N, stack, F, X, R, FP):
1278 |     stack.append(x)
1279 |     d = len(stack)
1280 |     N[x] = d
1281 |     F[x] = FP(x)             # F(X) <- F'(x)
1282 | 
1283 |     rel = R(x)               # Get y's related to x
1284 |     for y in rel:
1285 |         if N[y] == 0:
1286 |             traverse(y, N, stack, F, X, R, FP)
1287 |         N[x] = min(N[x], N[y])
1288 |         for a in F.get(y, []):
1289 |             if a not in F[x]:
1290 |                 F[x].append(a)
1291 |     if N[x] == d:
1292 |         N[stack[-1]] = MAXINT
1293 |         F[stack[-1]] = F[x]
1294 |         element = stack.pop()
1295 |         while element != x:
1296 |             N[stack[-1]] = MAXINT
1297 |             F[stack[-1]] = F[x]
1298 |             element = stack.pop()
1299 | 
1300 | class LALRError(YaccError):
1301 |     pass
1302 | 
1303 | 
1304 | # -----------------------------------------------------------------------------
1305 | #                             == LRTable ==
1306 | #
1307 | # This class implements the LR table generation algorithm.  There are no
1308 | # public methods.
1309 | # -----------------------------------------------------------------------------
1310 | 
1311 | class LRTable:
1312 |     def __init__(self, grammar, log=None):
1313 |         self.grammar = grammar
1314 | 
1315 |         # Set up the logger
1316 |         if not log:
1317 |             log = NullLogger()
1318 |         self.log = log
1319 | 
1320 |         # Internal attributes
1321 |         self.lr_action     = {}        # Action table
1322 |         self.lr_goto       = {}        # Goto table
1323 |         self.lr_productions  = grammar.Productions    # Copy of grammar Production array
1324 |         self.lr_goto_cache = {}        # Cache of computed gotos
1325 |         self.lr0_cidhash   = {}        # Cache of closures
1326 | 
1327 |         self._add_count    = 0         # Internal counter used to detect cycles
1328 | 
1329 |         # Diagnostic information filled in by the table generator
1330 |         self.sr_conflict   = 0
1331 |         self.rr_conflict   = 0
1332 |         self.conflicts     = []        # List of conflicts
1333 | 
1334 |         self.sr_conflicts  = []
1335 |         self.rr_conflicts  = []
1336 | 
1337 |         # Build the tables
1338 |         self.grammar.build_lritems()
1339 |         self.grammar.compute_first()
1340 |         self.grammar.compute_follow()
1341 |         self.lr_parse_table()
1342 | 
1343 |     # Bind all production function names to callable objects in pdict
1344 |     def bind_callables(self, pdict):
1345 |         for p in self.lr_productions:
1346 |             p.bind(pdict)
1347 | 
1348 |     # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1349 | 
1350 |     def lr0_closure(self, I):
1351 |         self._add_count += 1
1352 | 
1353 |         # Add everything in I to J
1354 |         J = I[:]
1355 |         didadd = True
1356 |         while didadd:
1357 |             didadd = False
1358 |             for j in J:
1359 |                 for x in j.lr_after:
1360 |                     if getattr(x, 'lr0_added', 0) == self._add_count:
1361 |                         continue
1362 |                     # Add B --> .G to J
1363 |                     J.append(x.lr_next)
1364 |                     x.lr0_added = self._add_count
1365 |                     didadd = True
1366 | 
1367 |         return J
1368 | 
1369 |     # Compute the LR(0) goto function goto(I,X) where I is a set
1370 |     # of LR(0) items and X is a grammar symbol.   This function is written
1371 |     # in a way that guarantees uniqueness of the generated goto sets
1372 |     # (i.e. the same goto set will never be returned as two different Python
1373 |     # objects).  With uniqueness, we can later do fast set comparisons using
1374 |     # id(obj) instead of element-wise comparison.
1375 | 
1376 |     def lr0_goto(self, I, x):
1377 |         # First we look for a previously cached entry
1378 |         g = self.lr_goto_cache.get((id(I), x))
1379 |         if g:
1380 |             return g
1381 | 
1382 |         # Now we generate the goto set in a way that guarantees uniqueness
1383 |         # of the result
1384 | 
1385 |         s = self.lr_goto_cache.get(x)
1386 |         if not s:
1387 |             s = {}
1388 |             self.lr_goto_cache[x] = s
1389 | 
1390 |         gs = []
1391 |         for p in I:
1392 |             n = p.lr_next
1393 |             if n and n.lr_before == x:
1394 |                 s1 = s.get(id(n))
1395 |                 if not s1:
1396 |                     s1 = {}
1397 |                     s[id(n)] = s1
1398 |                 gs.append(n)
1399 |                 s = s1
1400 |         g = s.get('$end')
1401 |         if not g:
1402 |             if gs:
1403 |                 g = self.lr0_closure(gs)
1404 |                 s['$end'] = g
1405 |             else:
1406 |                 s['$end'] = gs
1407 |         self.lr_goto_cache[(id(I), x)] = g
1408 |         return g
1409 | 
1410 |     # Compute the LR(0) sets of item function
1411 |     def lr0_items(self):
1412 |         C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
1413 |         i = 0
1414 |         for I in C:
1415 |             self.lr0_cidhash[id(I)] = i
1416 |             i += 1
1417 | 
1418 |         # Loop over the items in C and each grammar symbols
1419 |         i = 0
1420 |         while i < len(C):
1421 |             I = C[i]
1422 |             i += 1
1423 | 
1424 |             # Collect all of the symbols that could possibly be in the goto(I,X) sets
1425 |             asyms = {}
1426 |             for ii in I:
1427 |                 for s in ii.usyms:
1428 |                     asyms[s] = None
1429 | 
1430 |             for x in asyms:
1431 |                 g = self.lr0_goto(I, x)
1432 |                 if not g or id(g) in self.lr0_cidhash:
1433 |                     continue
1434 |                 self.lr0_cidhash[id(g)] = len(C)
1435 |                 C.append(g)
1436 | 
1437 |         return C
1438 | 
1439 |     # -----------------------------------------------------------------------------
1440 |     #                       ==== LALR(1) Parsing ====
1441 |     #
1442 |     # LALR(1) parsing is almost exactly the same as SLR except that instead of
1443 |     # relying upon Follow() sets when performing reductions, a more selective
1444 |     # lookahead set that incorporates the state of the LR(0) machine is utilized.
1445 |     # Thus, we mainly just have to focus on calculating the lookahead sets.
1446 |     #
1447 |     # The method used here is due to DeRemer and Pennelo (1982).
1448 |     #
1449 |     # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
1450 |     #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
1451 |     #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
1452 |     #
1453 |     # Further details can also be found in:
1454 |     #
1455 |     #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
1456 |     #      McGraw-Hill Book Company, (1985).
1457 |     #
1458 |     # -----------------------------------------------------------------------------
1459 | 
1460 |     # -----------------------------------------------------------------------------
1461 |     # compute_nullable_nonterminals()
1462 |     #
1463 |     # Creates a dictionary containing all of the non-terminals that might produce
1464 |     # an empty production.
1465 |     # -----------------------------------------------------------------------------
1466 | 
1467 |     def compute_nullable_nonterminals(self):
1468 |         nullable = set()
1469 |         num_nullable = 0
1470 |         while True:
1471 |             for p in self.grammar.Productions[1:]:
1472 |                 if p.len == 0:
1473 |                     nullable.add(p.name)
1474 |                     continue
1475 |                 for t in p.prod:
1476 |                     if t not in nullable:
1477 |                         break
1478 |                 else:
1479 |                     nullable.add(p.name)
1480 |             if len(nullable) == num_nullable:
1481 |                 break
1482 |             num_nullable = len(nullable)
1483 |         return nullable
1484 | 
1485 |     # -----------------------------------------------------------------------------
1486 |     # find_nonterminal_trans(C)
1487 |     #
1488 |     # Given a set of LR(0) items, this functions finds all of the non-terminal
1489 |     # transitions.    These are transitions in which a dot appears immediately before
1490 |     # a non-terminal.   Returns a list of tuples of the form (state,N) where state
1491 |     # is the state number and N is the nonterminal symbol.
1492 |     #
1493 |     # The input C is the set of LR(0) items.
1494 |     # -----------------------------------------------------------------------------
1495 | 
1496 |     def find_nonterminal_transitions(self, C):
1497 |         trans = []
1498 |         for stateno, state in enumerate(C):
1499 |             for p in state:
1500 |                 if p.lr_index < p.len - 1:
1501 |                     t = (stateno, p.prod[p.lr_index+1])
1502 |                     if t[1] in self.grammar.Nonterminals:
1503 |                         if t not in trans:
1504 |                             trans.append(t)
1505 |         return trans
1506 | 
1507 |     # -----------------------------------------------------------------------------
1508 |     # dr_relation()
1509 |     #
1510 |     # Computes the DR(p,A) relationships for non-terminal transitions.  The input
1511 |     # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
1512 |     #
1513 |     # Returns a list of terminals.
1514 |     # -----------------------------------------------------------------------------
1515 | 
1516 |     def dr_relation(self, C, trans, nullable):
1517 |         state, N = trans
1518 |         terms = []
1519 | 
1520 |         g = self.lr0_goto(C[state], N)
1521 |         for p in g:
1522 |             if p.lr_index < p.len - 1:
1523 |                 a = p.prod[p.lr_index+1]
1524 |                 if a in self.grammar.Terminals:
1525 |                     if a not in terms:
1526 |                         terms.append(a)
1527 | 
1528 |         # This extra bit is to handle the start state
1529 |         if state == 0 and N == self.grammar.Productions[0].prod[0]:
1530 |             terms.append('$end')
1531 | 
1532 |         return terms
1533 | 
1534 |     # -----------------------------------------------------------------------------
1535 |     # reads_relation()
1536 |     #
1537 |     # Computes the READS() relation (p,A) READS (t,C).
1538 |     # -----------------------------------------------------------------------------
1539 | 
1540 |     def reads_relation(self, C, trans, empty):
1541 |         # Look for empty transitions
1542 |         rel = []
1543 |         state, N = trans
1544 | 
1545 |         g = self.lr0_goto(C[state], N)
1546 |         j = self.lr0_cidhash.get(id(g), -1)
1547 |         for p in g:
1548 |             if p.lr_index < p.len - 1:
1549 |                 a = p.prod[p.lr_index + 1]
1550 |                 if a in empty:
1551 |                     rel.append((j, a))
1552 | 
1553 |         return rel
1554 | 
1555 |     # -----------------------------------------------------------------------------
1556 |     # compute_lookback_includes()
1557 |     #
1558 |     # Determines the lookback and includes relations
1559 |     #
1560 |     # LOOKBACK:
1561 |     #
1562 |     # This relation is determined by running the LR(0) state machine forward.
1563 |     # For example, starting with a production "N : . A B C", we run it forward
1564 |     # to obtain "N : A B C ."   We then build a relationship between this final
1565 |     # state and the starting state.   These relationships are stored in a dictionary
1566 |     # lookdict.
1567 |     #
1568 |     # INCLUDES:
1569 |     #
1570 |     # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
1571 |     #
1572 |     # This relation is used to determine non-terminal transitions that occur
1573 |     # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
1574 |     # if the following holds:
1575 |     #
1576 |     #       B -> LAT, where T -> epsilon and p' -L-> p
1577 |     #
1578 |     # L is essentially a prefix (which may be empty), T is a suffix that must be
1579 |     # able to derive an empty string.  State p' must lead to state p with the string L.
1580 |     #
1581 |     # -----------------------------------------------------------------------------
1582 | 
1583 |     def compute_lookback_includes(self, C, trans, nullable):
1584 |         lookdict = {}          # Dictionary of lookback relations
1585 |         includedict = {}       # Dictionary of include relations
1586 | 
1587 |         # Make a dictionary of non-terminal transitions
1588 |         dtrans = {}
1589 |         for t in trans:
1590 |             dtrans[t] = 1
1591 | 
1592 |         # Loop over all transitions and compute lookbacks and includes
1593 |         for state, N in trans:
1594 |             lookb = []
1595 |             includes = []
1596 |             for p in C[state]:
1597 |                 if p.name != N:
1598 |                     continue
1599 | 
1600 |                 # Okay, we have a name match.  We now follow the production all the way
1601 |                 # through the state machine until we get the . on the right hand side
1602 | 
1603 |                 lr_index = p.lr_index
1604 |                 j = state
1605 |                 while lr_index < p.len - 1:
1606 |                     lr_index = lr_index + 1
1607 |                     t = p.prod[lr_index]
1608 | 
1609 |                     # Check to see if this symbol and state are a non-terminal transition
1610 |                     if (j, t) in dtrans:
1611 |                         # Yes.  Okay, there is some chance that this is an includes relation
1612 |                         # the only way to know for certain is whether the rest of the
1613 |                         # production derives empty
1614 | 
1615 |                         li = lr_index + 1
1616 |                         while li < p.len:
1617 |                             if p.prod[li] in self.grammar.Terminals:
1618 |                                 break      # No forget it
1619 |                             if p.prod[li] not in nullable:
1620 |                                 break
1621 |                             li = li + 1
1622 |                         else:
1623 |                             # Appears to be a relation between (j,t) and (state,N)
1624 |                             includes.append((j, t))
1625 | 
1626 |                     g = self.lr0_goto(C[j], t)               # Go to next set
1627 |                     j = self.lr0_cidhash.get(id(g), -1)      # Go to next state
1628 | 
1629 |                 # When we get here, j is the final state, now we have to locate the production
1630 |                 for r in C[j]:
1631 |                     if r.name != p.name:
1632 |                         continue
1633 |                     if r.len != p.len:
1634 |                         continue
1635 |                     i = 0
1636 |                     # This look is comparing a production ". A B C" with "A B C ."
1637 |                     while i < r.lr_index:
1638 |                         if r.prod[i] != p.prod[i+1]:
1639 |                             break
1640 |                         i = i + 1
1641 |                     else:
1642 |                         lookb.append((j, r))
1643 |             for i in includes:
1644 |                 if i not in includedict:
1645 |                     includedict[i] = []
1646 |                 includedict[i].append((state, N))
1647 |             lookdict[(state, N)] = lookb
1648 | 
1649 |         return lookdict, includedict
1650 | 
1651 |     # -----------------------------------------------------------------------------
1652 |     # compute_read_sets()
1653 |     #
1654 |     # Given a set of LR(0) items, this function computes the read sets.
1655 |     #
1656 |     # Inputs:  C        =  Set of LR(0) items
1657 |     #          ntrans   = Set of nonterminal transitions
1658 |     #          nullable = Set of empty transitions
1659 |     #
1660 |     # Returns a set containing the read sets
1661 |     # -----------------------------------------------------------------------------
1662 | 
1663 |     def compute_read_sets(self, C, ntrans, nullable):
1664 |         FP = lambda x: self.dr_relation(C, x, nullable)
1665 |         R =  lambda x: self.reads_relation(C, x, nullable)
1666 |         F = digraph(ntrans, R, FP)
1667 |         return F
1668 | 
1669 |     # -----------------------------------------------------------------------------
1670 |     # compute_follow_sets()
1671 |     #
1672 |     # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
1673 |     # and an include set, this function computes the follow sets
1674 |     #
1675 |     # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
1676 |     #
1677 |     # Inputs:
1678 |     #            ntrans     = Set of nonterminal transitions
1679 |     #            readsets   = Readset (previously computed)
1680 |     #            inclsets   = Include sets (previously computed)
1681 |     #
1682 |     # Returns a set containing the follow sets
1683 |     # -----------------------------------------------------------------------------
1684 | 
1685 |     def compute_follow_sets(self, ntrans, readsets, inclsets):
1686 |         FP = lambda x: readsets[x]
1687 |         R  = lambda x: inclsets.get(x, [])
1688 |         F = digraph(ntrans, R, FP)
1689 |         return F
1690 | 
1691 |     # -----------------------------------------------------------------------------
1692 |     # add_lookaheads()
1693 |     #
1694 |     # Attaches the lookahead symbols to grammar rules.
1695 |     #
1696 |     # Inputs:    lookbacks         -  Set of lookback relations
1697 |     #            followset         -  Computed follow set
1698 |     #
1699 |     # This function directly attaches the lookaheads to productions contained
1700 |     # in the lookbacks set
1701 |     # -----------------------------------------------------------------------------
1702 | 
1703 |     def add_lookaheads(self, lookbacks, followset):
1704 |         for trans, lb in lookbacks.items():
1705 |             # Loop over productions in lookback
1706 |             for state, p in lb:
1707 |                 if state not in p.lookaheads:
1708 |                     p.lookaheads[state] = []
1709 |                 f = followset.get(trans, [])
1710 |                 for a in f:
1711 |                     if a not in p.lookaheads[state]:
1712 |                         p.lookaheads[state].append(a)
1713 | 
1714 |     # -----------------------------------------------------------------------------
1715 |     # add_lalr_lookaheads()
1716 |     #
1717 |     # This function does all of the work of adding lookahead information for use
1718 |     # with LALR parsing
1719 |     # -----------------------------------------------------------------------------
1720 | 
1721 |     def add_lalr_lookaheads(self, C):
1722 |         # Determine all of the nullable nonterminals
1723 |         nullable = self.compute_nullable_nonterminals()
1724 | 
1725 |         # Find all non-terminal transitions
1726 |         trans = self.find_nonterminal_transitions(C)
1727 | 
1728 |         # Compute read sets
1729 |         readsets = self.compute_read_sets(C, trans, nullable)
1730 | 
1731 |         # Compute lookback/includes relations
1732 |         lookd, included = self.compute_lookback_includes(C, trans, nullable)
1733 | 
1734 |         # Compute LALR FOLLOW sets
1735 |         followsets = self.compute_follow_sets(trans, readsets, included)
1736 | 
1737 |         # Add all of the lookaheads
1738 |         self.add_lookaheads(lookd, followsets)
1739 | 
1740 |     # -----------------------------------------------------------------------------
1741 |     # lr_parse_table()
1742 |     #
1743 |     # This function constructs the parse tables for SLR or LALR
1744 |     # -----------------------------------------------------------------------------
1745 |     def lr_parse_table(self):
1746 |         Productions = self.grammar.Productions
1747 |         Precedence  = self.grammar.Precedence
1748 |         goto   = self.lr_goto         # Goto array
1749 |         action = self.lr_action       # Action array
1750 |         log    = self.log             # Logger for output
1751 | 
1752 |         actionp = {}                  # Action production array (temporary)
1753 | 
1754 |         # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1755 |         # This determines the number of states
1756 | 
1757 |         C = self.lr0_items()
1758 |         self.add_lalr_lookaheads(C)
1759 | 
1760 |         # Build the parser table, state by state
1761 |         st = 0
1762 |         for I in C:
1763 |             # Loop over each production in I
1764 |             actlist = []              # List of actions
1765 |             st_action  = {}
1766 |             st_actionp = {}
1767 |             st_goto    = {}
1768 |             log.info('')
1769 |             log.info('state %d', st)
1770 |             log.info('')
1771 |             for p in I:
1772 |                 log.info('    (%d) %s', p.number, p)
1773 |             log.info('')
1774 | 
1775 |             for p in I:
1776 |                     if p.len == p.lr_index + 1:
1777 |                         if p.name == "S'":
1778 |                             # Start symbol. Accept!
1779 |                             st_action['$end'] = 0
1780 |                             st_actionp['$end'] = p
1781 |                         else:
1782 |                             # We are at the end of a production.  Reduce!
1783 |                             laheads = p.lookaheads[st]
1784 |                             for a in laheads:
1785 |                                 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
1786 |                                 r = st_action.get(a)
1787 |                                 if r is not None:
1788 |                                     # Whoa. Have a shift/reduce or reduce/reduce conflict
1789 |                                     if r > 0:
1790 |                                         # Need to decide on shift or reduce here
1791 |                                         # By default we favor shifting. Need to add
1792 |                                         # some precedence rules here.
1793 | 
1794 |                                         # Shift precedence comes from the token
1795 |                                         sprec, slevel = Precedence.get(a, ('right', 0))
1796 | 
1797 |                                         # Reduce precedence comes from rule being reduced (p)
1798 |                                         rprec, rlevel = Productions[p.number].prec
1799 | 
1800 |                                         if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1801 |                                             # We really need to reduce here.
1802 |                                             st_action[a] = -p.number
1803 |                                             st_actionp[a] = p
1804 |                                             if not slevel and not rlevel:
1805 |                                                 log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
1806 |                                                 self.sr_conflicts.append((st, a, 'reduce'))
1807 |                                             Productions[p.number].reduced += 1
1808 |                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
1809 |                                             st_action[a] = None
1810 |                                         else:
1811 |                                             # Hmmm. Guess we'll keep the shift
1812 |                                             if not rlevel:
1813 |                                                 log.info('  ! shift/reduce conflict for %s resolved as shift', a)
1814 |                                                 self.sr_conflicts.append((st, a, 'shift'))
1815 |                                     elif r < 0:
1816 |                                         # Reduce/reduce conflict.   In this case, we favor the rule
1817 |                                         # that was defined first in the grammar file
1818 |                                         oldp = Productions[-r]
1819 |                                         pp = Productions[p.number]
1820 |                                         if oldp.line > pp.line:
1821 |                                             st_action[a] = -p.number
1822 |                                             st_actionp[a] = p
1823 |                                             chosenp, rejectp = pp, oldp
1824 |                                             Productions[p.number].reduced += 1
1825 |                                             Productions[oldp.number].reduced -= 1
1826 |                                         else:
1827 |                                             chosenp, rejectp = oldp, pp
1828 |                                         self.rr_conflicts.append((st, chosenp, rejectp))
1829 |                                         log.info('  ! reduce/reduce conflict for %s resolved using rule %d (%s)',
1830 |                                                  a, st_actionp[a].number, st_actionp[a])
1831 |                                     else:
1832 |                                         raise LALRError('Unknown conflict in state %d' % st)
1833 |                                 else:
1834 |                                     st_action[a] = -p.number
1835 |                                     st_actionp[a] = p
1836 |                                     Productions[p.number].reduced += 1
1837 |                     else:
1838 |                         i = p.lr_index
1839 |                         a = p.prod[i+1]       # Get symbol right after the "."
1840 |                         if a in self.grammar.Terminals:
1841 |                             g = self.lr0_goto(I, a)
1842 |                             j = self.lr0_cidhash.get(id(g), -1)
1843 |                             if j >= 0:
1844 |                                 # We are in a shift state
1845 |                                 actlist.append((a, p, 'shift and go to state %d' % j))
1846 |                                 r = st_action.get(a)
1847 |                                 if r is not None:
1848 |                                     # Whoa have a shift/reduce or shift/shift conflict
1849 |                                     if r > 0:
1850 |                                         if r != j:
1851 |                                             raise LALRError('Shift/shift conflict in state %d' % st)
1852 |                                     elif r < 0:
1853 |                                         # Do a precedence check.
1854 |                                         #   -  if precedence of reduce rule is higher, we reduce.
1855 |                                         #   -  if precedence of reduce is same and left assoc, we reduce.
1856 |                                         #   -  otherwise we shift
1857 | 
1858 |                                         # Shift precedence comes from the token
1859 |                                         sprec, slevel = Precedence.get(a, ('right', 0))
1860 | 
1861 |                                         # Reduce precedence comes from the rule that could have been reduced
1862 |                                         rprec, rlevel = Productions[st_actionp[a].number].prec
1863 | 
1864 |                                         if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
1865 |                                             # We decide to shift here... highest precedence to shift
1866 |                                             Productions[st_actionp[a].number].reduced -= 1
1867 |                                             st_action[a] = j
1868 |                                             st_actionp[a] = p
1869 |                                             if not rlevel:
1870 |                                                 log.info('  ! shift/reduce conflict for %s resolved as shift', a)
1871 |                                                 self.sr_conflicts.append((st, a, 'shift'))
1872 |                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
1873 |                                             st_action[a] = None
1874 |                                         else:
1875 |                                             # Hmmm. Guess we'll keep the reduce
1876 |                                             if not slevel and not rlevel:
1877 |                                                 log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
1878 |                                                 self.sr_conflicts.append((st, a, 'reduce'))
1879 | 
1880 |                                     else:
1881 |                                         raise LALRError('Unknown conflict in state %d' % st)
1882 |                                 else:
1883 |                                     st_action[a] = j
1884 |                                     st_actionp[a] = p
1885 | 
1886 |             # Print the actions associated with each terminal
1887 |             _actprint = {}
1888 |             for a, p, m in actlist:
1889 |                 if a in st_action:
1890 |                     if p is st_actionp[a]:
1891 |                         log.info('    %-15s %s', a, m)
1892 |                         _actprint[(a, m)] = 1
1893 |             log.info('')
1894 |             # Print the actions that were not used. (debugging)
1895 |             not_used = 0
1896 |             for a, p, m in actlist:
1897 |                 if a in st_action:
1898 |                     if p is not st_actionp[a]:
1899 |                         if not (a, m) in _actprint:
1900 |                             log.debug('  ! %-15s [ %s ]', a, m)
1901 |                             not_used = 1
1902 |                             _actprint[(a, m)] = 1
1903 |             if not_used:
1904 |                 log.debug('')
1905 | 
1906 |             # Construct the goto table for this state
1907 | 
1908 |             nkeys = {}
1909 |             for ii in I:
1910 |                 for s in ii.usyms:
1911 |                     if s in self.grammar.Nonterminals:
1912 |                         nkeys[s] = None
1913 |             for n in nkeys:
1914 |                 g = self.lr0_goto(I, n)
1915 |                 j = self.lr0_cidhash.get(id(g), -1)
1916 |                 if j >= 0:
1917 |                     st_goto[n] = j
1918 |                     log.info('    %-30s shift and go to state %d', n, j)
1919 | 
1920 |             action[st] = st_action
1921 |             actionp[st] = st_actionp
1922 |             goto[st] = st_goto
1923 |             st += 1
1924 | 
1925 | # -----------------------------------------------------------------------------
1926 | #                            === INTROSPECTION ===
1927 | #
1928 | # The following functions and classes are used to implement the PLY
1929 | # introspection features followed by the yacc() function itself.
1930 | # -----------------------------------------------------------------------------
1931 | 
1932 | # -----------------------------------------------------------------------------
1933 | # get_caller_module_dict()
1934 | #
1935 | # This function returns a dictionary containing all of the symbols defined within
1936 | # a caller further down the call stack.  This is used to get the environment
1937 | # associated with the yacc() call if none was provided.
1938 | # -----------------------------------------------------------------------------
1939 | 
1940 | def get_caller_module_dict(levels):
1941 |     f = sys._getframe(levels)
1942 |     ldict = f.f_globals.copy()
1943 |     if f.f_globals != f.f_locals:
1944 |         ldict.update(f.f_locals)
1945 |     return ldict
1946 | 
1947 | # -----------------------------------------------------------------------------
1948 | # parse_grammar()
1949 | #
1950 | # This takes a raw grammar rule string and parses it into production data
1951 | # -----------------------------------------------------------------------------
1952 | def parse_grammar(doc, file, line):
1953 |     grammar = []
1954 |     # Split the doc string into lines
1955 |     pstrings = doc.splitlines()
1956 |     lastp = None
1957 |     dline = line
1958 |     for ps in pstrings:
1959 |         dline += 1
1960 |         p = ps.split()
1961 |         if not p:
1962 |             continue
1963 |         try:
1964 |             if p[0] == '|':
1965 |                 # This is a continuation of a previous rule
1966 |                 if not lastp:
1967 |                     raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
1968 |                 prodname = lastp
1969 |                 syms = p[1:]
1970 |             else:
1971 |                 prodname = p[0]
1972 |                 lastp = prodname
1973 |                 syms   = p[2:]
1974 |                 assign = p[1]
1975 |                 if assign != ':' and assign != '::=':
1976 |                     raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
1977 | 
1978 |             grammar.append((file, dline, prodname, syms))
1979 |         except SyntaxError:
1980 |             raise
1981 |         except Exception:
1982 |             raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
1983 | 
1984 |     return grammar
1985 | 
1986 | # -----------------------------------------------------------------------------
1987 | # ParserReflect()
1988 | #
1989 | # This class represents information extracted for building a parser including
1990 | # start symbol, error function, tokens, precedence list, action functions,
1991 | # etc.
1992 | # -----------------------------------------------------------------------------
1993 | class ParserReflect(object):
1994 |     def __init__(self, pdict, log=None):
1995 |         self.pdict      = pdict
1996 |         self.start      = None
1997 |         self.error_func = None
1998 |         self.tokens     = None
1999 |         self.modules    = set()
2000 |         self.grammar    = []
2001 |         self.error      = False
2002 | 
2003 |         if log is None:
2004 |             self.log = PlyLogger(sys.stderr)
2005 |         else:
2006 |             self.log = log
2007 | 
2008 |     # Get all of the basic information
2009 |     def get_all(self):
2010 |         self.get_start()
2011 |         self.get_error_func()
2012 |         self.get_tokens()
2013 |         self.get_precedence()
2014 |         self.get_pfunctions()
2015 | 
2016 |     # Validate all of the information
2017 |     def validate_all(self):
2018 |         self.validate_start()
2019 |         self.validate_error_func()
2020 |         self.validate_tokens()
2021 |         self.validate_precedence()
2022 |         self.validate_pfunctions()
2023 |         self.validate_modules()
2024 |         return self.error
2025 | 
2026 |     # Compute a signature over the grammar
2027 |     def signature(self):
2028 |         parts = []
2029 |         try:
2030 |             if self.start:
2031 |                 parts.append(self.start)
2032 |             if self.prec:
2033 |                 parts.append(''.join([''.join(p) for p in self.prec]))
2034 |             if self.tokens:
2035 |                 parts.append(' '.join(self.tokens))
2036 |             for f in self.pfuncs:
2037 |                 if f[3]:
2038 |                     parts.append(f[3])
2039 |         except (TypeError, ValueError):
2040 |             pass
2041 |         return ''.join(parts)
2042 | 
2043 |     # -----------------------------------------------------------------------------
2044 |     # validate_modules()
2045 |     #
2046 |     # This method checks to see if there are duplicated p_rulename() functions
2047 |     # in the parser module file.  Without this function, it is really easy for
2048 |     # users to make mistakes by cutting and pasting code fragments (and it's a real
2049 |     # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
2050 |     # we just do a little regular expression pattern matching of def statements
2051 |     # to try and detect duplicates.
2052 |     # -----------------------------------------------------------------------------
2053 | 
2054 |     def validate_modules(self):
2055 |         # Match def p_funcname(
2056 |         fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2057 | 
2058 |         for module in self.modules:
2059 |             try:
2060 |                 lines, linen = inspect.getsourcelines(module)
2061 |             except IOError:
2062 |                 continue
2063 | 
2064 |             counthash = {}
2065 |             for linen, line in enumerate(lines):
2066 |                 linen += 1
2067 |                 m = fre.match(line)
2068 |                 if m:
2069 |                     name = m.group(1)
2070 |                     prev = counthash.get(name)
2071 |                     if not prev:
2072 |                         counthash[name] = linen
2073 |                     else:
2074 |                         filename = inspect.getsourcefile(module)
2075 |                         self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
2076 |                                          filename, linen, name, prev)
2077 | 
2078 |     # Get the start symbol
2079 |     def get_start(self):
2080 |         self.start = self.pdict.get('start')
2081 | 
2082 |     # Validate the start symbol
2083 |     def validate_start(self):
2084 |         if self.start is not None:
2085 |             if not isinstance(self.start, str):
2086 |                 self.log.error("'start' must be a string")
2087 | 
2088 |     # Look for error handler
2089 |     def get_error_func(self):
2090 |         self.error_func = self.pdict.get('p_error')
2091 | 
2092 |     # Validate the error function
2093 |     def validate_error_func(self):
2094 |         if self.error_func:
2095 |             if isinstance(self.error_func, types.FunctionType):
2096 |                 ismethod = 0
2097 |             elif isinstance(self.error_func, types.MethodType):
2098 |                 ismethod = 1
2099 |             else:
2100 |                 self.log.error("'p_error' defined, but is not a function or method")
2101 |                 self.error = True
2102 |                 return
2103 | 
2104 |             eline = self.error_func.__code__.co_firstlineno
2105 |             efile = self.error_func.__code__.co_filename
2106 |             module = inspect.getmodule(self.error_func)
2107 |             self.modules.add(module)
2108 | 
2109 |             argcount = self.error_func.__code__.co_argcount - ismethod
2110 |             if argcount != 1:
2111 |                 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
2112 |                 self.error = True
2113 | 
2114 |     # Get the tokens map
2115 |     def get_tokens(self):
2116 |         tokens = self.pdict.get('tokens')
2117 |         if not tokens:
2118 |             self.log.error('No token list is defined')
2119 |             self.error = True
2120 |             return
2121 | 
2122 |         if not isinstance(tokens, (list, tuple)):
2123 |             self.log.error('tokens must be a list or tuple')
2124 |             self.error = True
2125 |             return
2126 | 
2127 |         if not tokens:
2128 |             self.log.error('tokens is empty')
2129 |             self.error = True
2130 |             return
2131 | 
2132 |         self.tokens = sorted(tokens)
2133 | 
2134 |     # Validate the tokens
2135 |     def validate_tokens(self):
2136 |         # Validate the tokens.
2137 |         if 'error' in self.tokens:
2138 |             self.log.error("Illegal token name 'error'. Is a reserved word")
2139 |             self.error = True
2140 |             return
2141 | 
2142 |         terminals = set()
2143 |         for n in self.tokens:
2144 |             if n in terminals:
2145 |                 self.log.warning('Token %r multiply defined', n)
2146 |             terminals.add(n)
2147 | 
2148 |     # Get the precedence map (if any)
2149 |     def get_precedence(self):
2150 |         self.prec = self.pdict.get('precedence')
2151 | 
2152 |     # Validate and parse the precedence map
2153 |     def validate_precedence(self):
2154 |         preclist = []
2155 |         if self.prec:
2156 |             if not isinstance(self.prec, (list, tuple)):
2157 |                 self.log.error('precedence must be a list or tuple')
2158 |                 self.error = True
2159 |                 return
2160 |             for level, p in enumerate(self.prec):
2161 |                 if not isinstance(p, (list, tuple)):
2162 |                     self.log.error('Bad precedence table')
2163 |                     self.error = True
2164 |                     return
2165 | 
2166 |                 if len(p) < 2:
2167 |                     self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
2168 |                     self.error = True
2169 |                     return
2170 |                 assoc = p[0]
2171 |                 if not isinstance(assoc, str):
2172 |                     self.log.error('precedence associativity must be a string')
2173 |                     self.error = True
2174 |                     return
2175 |                 for term in p[1:]:
2176 |                     if not isinstance(term, str):
2177 |                         self.log.error('precedence items must be strings')
2178 |                         self.error = True
2179 |                         return
2180 |                     preclist.append((term, assoc, level+1))
2181 |         self.preclist = preclist
2182 | 
2183 |     # Get all p_functions from the grammar
2184 |     def get_pfunctions(self):
2185 |         p_functions = []
2186 |         for name, item in self.pdict.items():
2187 |             if not name.startswith('p_') or name == 'p_error':
2188 |                 continue
2189 |             if isinstance(item, (types.FunctionType, types.MethodType)):
2190 |                 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
2191 |                 module = inspect.getmodule(item)
2192 |                 p_functions.append((line, module, name, item.__doc__))
2193 | 
2194 |         # Sort all of the actions by line number; make sure to stringify
2195 |         # modules to make them sortable, since `line` may not uniquely sort all
2196 |         # p functions
2197 |         p_functions.sort(key=lambda p_function: (
2198 |             p_function[0],
2199 |             str(p_function[1]),
2200 |             p_function[2],
2201 |             p_function[3]))
2202 |         self.pfuncs = p_functions
2203 | 
2204 |     # Validate all of the p_functions
2205 |     def validate_pfunctions(self):
2206 |         grammar = []
2207 |         # Check for non-empty symbols
2208 |         if len(self.pfuncs) == 0:
2209 |             self.log.error('no rules of the form p_rulename are defined')
2210 |             self.error = True
2211 |             return
2212 | 
2213 |         for line, module, name, doc in self.pfuncs:
2214 |             file = inspect.getsourcefile(module)
2215 |             func = self.pdict[name]
2216 |             if isinstance(func, types.MethodType):
2217 |                 reqargs = 2
2218 |             else:
2219 |                 reqargs = 1
2220 |             if func.__code__.co_argcount > reqargs:
2221 |                 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
2222 |                 self.error = True
2223 |             elif func.__code__.co_argcount < reqargs:
2224 |                 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
2225 |                 self.error = True
2226 |             elif not func.__doc__:
2227 |                 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
2228 |                                  file, line, func.__name__)
2229 |             else:
2230 |                 try:
2231 |                     parsed_g = parse_grammar(doc, file, line)
2232 |                     for g in parsed_g:
2233 |                         grammar.append((name, g))
2234 |                 except SyntaxError as e:
2235 |                     self.log.error(str(e))
2236 |                     self.error = True
2237 | 
2238 |                 # Looks like a valid grammar rule
2239 |                 # Mark the file in which defined.
2240 |                 self.modules.add(module)
2241 | 
2242 |         # Secondary validation step that looks for p_ definitions that are not functions
2243 |         # or functions that look like they might be grammar rules.
2244 | 
2245 |         for n, v in self.pdict.items():
2246 |             if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
2247 |                 continue
2248 |             if n.startswith('t_'):
2249 |                 continue
2250 |             if n.startswith('p_') and n != 'p_error':
2251 |                 self.log.warning('%r not defined as a function', n)
2252 |             if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
2253 |                    (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
2254 |                 if v.__doc__:
2255 |                     try:
2256 |                         doc = v.__doc__.split(' ')
2257 |                         if doc[1] == ':':
2258 |                             self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
2259 |                                              v.__code__.co_filename, v.__code__.co_firstlineno, n)
2260 |                     except IndexError:
2261 |                         pass
2262 | 
2263 |         self.grammar = grammar
2264 | 
2265 | # -----------------------------------------------------------------------------
2266 | # yacc(module)
2267 | #
2268 | # Build a parser
2269 | # -----------------------------------------------------------------------------
2270 | 
2271 | def yacc(*, debug=yaccdebug, module=None, start=None,
2272 |          check_recursion=True, optimize=False, debugfile=debug_file,
2273 |          debuglog=None, errorlog=None):
2274 | 
2275 |     # Reference to the parsing method of the last built parser
2276 |     global parse
2277 | 
2278 |     if errorlog is None:
2279 |         errorlog = PlyLogger(sys.stderr)
2280 | 
2281 |     # Get the module dictionary used for the parser
2282 |     if module:
2283 |         _items = [(k, getattr(module, k)) for k in dir(module)]
2284 |         pdict = dict(_items)
2285 |         # If no __file__ or __package__ attributes are available, try to obtain them
2286 |         # from the __module__ instead
2287 |         if '__file__' not in pdict:
2288 |             pdict['__file__'] = sys.modules[pdict['__module__']].__file__
2289 |         if '__package__' not in pdict and '__module__' in pdict:
2290 |             if hasattr(sys.modules[pdict['__module__']], '__package__'):
2291 |                 pdict['__package__'] = sys.modules[pdict['__module__']].__package__
2292 |     else:
2293 |         pdict = get_caller_module_dict(2)
2294 | 
2295 |     # Set start symbol if it's specified directly using an argument
2296 |     if start is not None:
2297 |         pdict['start'] = start
2298 | 
2299 |     # Collect parser information from the dictionary
2300 |     pinfo = ParserReflect(pdict, log=errorlog)
2301 |     pinfo.get_all()
2302 | 
2303 |     if pinfo.error:
2304 |         raise YaccError('Unable to build parser')
2305 | 
2306 |     if debuglog is None:
2307 |         if debug:
2308 |             try:
2309 |                 debuglog = PlyLogger(open(debugfile, 'w'))
2310 |             except IOError as e:
2311 |                 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
2312 |                 debuglog = NullLogger()
2313 |         else:
2314 |             debuglog = NullLogger()
2315 | 
2316 |     debuglog.info('Created by PLY (http://www.dabeaz.com/ply)')
2317 | 
2318 |     errors = False
2319 | 
2320 |     # Validate the parser information
2321 |     if pinfo.validate_all():
2322 |         raise YaccError('Unable to build parser')
2323 | 
2324 |     if not pinfo.error_func:
2325 |         errorlog.warning('no p_error() function is defined')
2326 | 
2327 |     # Create a grammar object
2328 |     grammar = Grammar(pinfo.tokens)
2329 | 
2330 |     # Set precedence level for terminals
2331 |     for term, assoc, level in pinfo.preclist:
2332 |         try:
2333 |             grammar.set_precedence(term, assoc, level)
2334 |         except GrammarError as e:
2335 |             errorlog.warning('%s', e)
2336 | 
2337 |     # Add productions to the grammar
2338 |     for funcname, gram in pinfo.grammar:
2339 |         file, line, prodname, syms = gram
2340 |         try:
2341 |             grammar.add_production(prodname, syms, funcname, file, line)
2342 |         except GrammarError as e:
2343 |             errorlog.error('%s', e)
2344 |             errors = True
2345 | 
2346 |     # Set the grammar start symbols
2347 |     try:
2348 |         if start is None:
2349 |             grammar.set_start(pinfo.start)
2350 |         else:
2351 |             grammar.set_start(start)
2352 |     except GrammarError as e:
2353 |         errorlog.error(str(e))
2354 |         errors = True
2355 | 
2356 |     if errors:
2357 |         raise YaccError('Unable to build parser')
2358 | 
2359 |     # Verify the grammar structure
2360 |     undefined_symbols = grammar.undefined_symbols()
2361 |     for sym, prod in undefined_symbols:
2362 |         errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
2363 |         errors = True
2364 | 
2365 |     unused_terminals = grammar.unused_terminals()
2366 |     if unused_terminals:
2367 |         debuglog.info('')
2368 |         debuglog.info('Unused terminals:')
2369 |         debuglog.info('')
2370 |         for term in unused_terminals:
2371 |             errorlog.warning('Token %r defined, but not used', term)
2372 |             debuglog.info('    %s', term)
2373 | 
2374 |     # Print out all productions to the debug log
2375 |     if debug:
2376 |         debuglog.info('')
2377 |         debuglog.info('Grammar')
2378 |         debuglog.info('')
2379 |         for n, p in enumerate(grammar.Productions):
2380 |             debuglog.info('Rule %-5d %s', n, p)
2381 | 
2382 |     # Find unused non-terminals
2383 |     unused_rules = grammar.unused_rules()
2384 |     for prod in unused_rules:
2385 |         errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
2386 | 
2387 |     if len(unused_terminals) == 1:
2388 |         errorlog.warning('There is 1 unused token')
2389 |     if len(unused_terminals) > 1:
2390 |         errorlog.warning('There are %d unused tokens', len(unused_terminals))
2391 | 
2392 |     if len(unused_rules) == 1:
2393 |         errorlog.warning('There is 1 unused rule')
2394 |     if len(unused_rules) > 1:
2395 |         errorlog.warning('There are %d unused rules', len(unused_rules))
2396 | 
2397 |     if debug:
2398 |         debuglog.info('')
2399 |         debuglog.info('Terminals, with rules where they appear')
2400 |         debuglog.info('')
2401 |         terms = list(grammar.Terminals)
2402 |         terms.sort()
2403 |         for term in terms:
2404 |             debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
2405 | 
2406 |         debuglog.info('')
2407 |         debuglog.info('Nonterminals, with rules where they appear')
2408 |         debuglog.info('')
2409 |         nonterms = list(grammar.Nonterminals)
2410 |         nonterms.sort()
2411 |         for nonterm in nonterms:
2412 |             debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
2413 |         debuglog.info('')
2414 | 
2415 |     if check_recursion:
2416 |         unreachable = grammar.find_unreachable()
2417 |         for u in unreachable:
2418 |             errorlog.warning('Symbol %r is unreachable', u)
2419 | 
2420 |         infinite = grammar.infinite_cycles()
2421 |         for inf in infinite:
2422 |             errorlog.error('Infinite recursion detected for symbol %r', inf)
2423 |             errors = True
2424 | 
2425 |     unused_prec = grammar.unused_precedence()
2426 |     for term, assoc in unused_prec:
2427 |         errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
2428 |         errors = True
2429 | 
2430 |     if errors:
2431 |         raise YaccError('Unable to build parser')
2432 | 
2433 |     # Run the LRTable on the grammar
2434 |     lr = LRTable(grammar, debuglog)
2435 | 
2436 |     if debug:
2437 |         num_sr = len(lr.sr_conflicts)
2438 | 
2439 |         # Report shift/reduce and reduce/reduce conflicts
2440 |         if num_sr == 1:
2441 |             errorlog.warning('1 shift/reduce conflict')
2442 |         elif num_sr > 1:
2443 |             errorlog.warning('%d shift/reduce conflicts', num_sr)
2444 | 
2445 |         num_rr = len(lr.rr_conflicts)
2446 |         if num_rr == 1:
2447 |             errorlog.warning('1 reduce/reduce conflict')
2448 |         elif num_rr > 1:
2449 |             errorlog.warning('%d reduce/reduce conflicts', num_rr)
2450 | 
2451 |     # Write out conflicts to the output file
2452 |     if debug and (lr.sr_conflicts or lr.rr_conflicts):
2453 |         debuglog.warning('')
2454 |         debuglog.warning('Conflicts:')
2455 |         debuglog.warning('')
2456 | 
2457 |         for state, tok, resolution in lr.sr_conflicts:
2458 |             debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s',  tok, state, resolution)
2459 | 
2460 |         already_reported = set()
2461 |         for state, rule, rejected in lr.rr_conflicts:
2462 |             if (state, id(rule), id(rejected)) in already_reported:
2463 |                 continue
2464 |             debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
2465 |             debuglog.warning('rejected rule (%s) in state %d', rejected, state)
2466 |             errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
2467 |             errorlog.warning('rejected rule (%s) in state %d', rejected, state)
2468 |             already_reported.add((state, id(rule), id(rejected)))
2469 | 
2470 |         warned_never = []
2471 |         for state, rule, rejected in lr.rr_conflicts:
2472 |             if not rejected.reduced and (rejected not in warned_never):
2473 |                 debuglog.warning('Rule (%s) is never reduced', rejected)
2474 |                 errorlog.warning('Rule (%s) is never reduced', rejected)
2475 |                 warned_never.append(rejected)
2476 | 
2477 |     # Build the parser
2478 |     lr.bind_callables(pinfo.pdict)
2479 |     parser = LRParser(lr, pinfo.error_func)
2480 | 
2481 |     parse = parser.parse
2482 |     return parser
2483 | 


--------------------------------------------------------------------------------