├── .no-sublime-package ├── .gitignore ├── messages.json ├── old.zip ├── mathevaluator.pyc ├── Default (Linux).sublime-keymap ├── Default (OSX).sublime-keymap ├── Default (Windows).sublime-keymap ├── messages └── install.txt ├── LICENSE ├── README.md └── mathevaluator.py /.no-sublime-package: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ./old.zip 2 | ./*.pyc 3 | -------------------------------------------------------------------------------- /messages.json: -------------------------------------------------------------------------------- 1 | { 2 | "install": "messages/install.txt" 3 | } 4 | -------------------------------------------------------------------------------- /old.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lengstrom/MathEvaluator/HEAD/old.zip -------------------------------------------------------------------------------- /mathevaluator.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lengstrom/MathEvaluator/HEAD/mathevaluator.pyc -------------------------------------------------------------------------------- /Default (Linux).sublime-keymap: -------------------------------------------------------------------------------- 1 | [{ 2 | "keys": ["ctrl+shift+m"], 3 | "command": "mathevaluator" 4 | }] -------------------------------------------------------------------------------- /Default (OSX).sublime-keymap: -------------------------------------------------------------------------------- 1 | [{ 2 | "keys": ["super+shift+m"], 3 | "command": "mathevaluator" 4 | }] -------------------------------------------------------------------------------- /Default (Windows).sublime-keymap: -------------------------------------------------------------------------------- 1 | [{ 2 | "keys": ["ctrl+shift+m"], 3 | "command": "mathevaluator" 4 | }] -------------------------------------------------------------------------------- /messages/install.txt: -------------------------------------------------------------------------------- 1 | Thanks for installing Selection Evaluator! 2 | 3 | Ceck out the code at https://github.com/meadowstream/MathEvaluator 4 | 5 | Usage: select a text area containing a mathematical expression, and use either control + shift + m or command + shift + m to evaluate the expression! 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 meadowstream 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #### Note: This is deprecated software, I no longer maintain or use it, and there are probably many issues with it. 2 | 3 | 4 | Sublime Text Math Evaluator 5 | =========== 6 | 7 | Evaluate mathematical expressions in Sublime Text selection regions inline. 8 | 9 | Use the command `control + shift + m` (or `super + shift + m`) to evaluate all mathematical expressions in selection areas. 10 | 11 | Symbols and functions supported: 12 | 13 | | Expression| Example | Output | Expression | Example | Output | 14 | |:---------:|:-------------:|:---------:|:----------:|:-------------:|:---------:| 15 | |+ | 3 + 4 | 7 | abs | abs(4) | 4 | 16 | |- | 3 - 4 | -1 | trunc | trunc(4) | 4 | 17 | |* | 3 * 4 | 12 | round | round(4) | 4 | 18 | |/ | 3 / 4 | 0.75 | sgn | sgn(4) | 1 | 19 | |^ | 3 ^ 4 | 81 | E | 5E4 | 50000 | 20 | |sin | sin(4) | -0.7568 | e | e | 2.7182 | 21 | |cos | cos(4) | -0.6536 | pi | pi | 3.1415 | 22 | |tan | tan(4) | 1.1578 | 23 | In Action 24 | ----------- 25 | ![In action](http://i.imgur.com/8cXEAHB.gif "Evaluator in Action") 26 | -------------------------------------------------------------------------------- /mathevaluator.py: -------------------------------------------------------------------------------- 1 | import sublime, sublime_plugin 2 | import math 3 | import operator 4 | 5 | exprStack = [] 6 | 7 | def pushFirst( strg, loc, toks ): 8 | exprStack.append( toks[0] ) 9 | def pushUMinus( strg, loc, toks ): 10 | if toks and toks[0]=='-': 11 | exprStack.append( 'unary -' ) 12 | #~ exprStack.append( '-1' ) 13 | #~ exprStack.append( '*' ) 14 | 15 | bnf = None 16 | def BNF(): 17 | """ 18 | expop :: '^' 19 | multop :: '*' | '/' 20 | addop :: '+' | '-' 21 | integer :: ['+' | '-'] '0'..'9'+ 22 | atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' 23 | factor :: atom [ expop factor ]* 24 | term :: factor [ multop factor ]* 25 | expr :: term [ addop term ]* 26 | """ 27 | global bnf 28 | if not bnf: 29 | point = Literal( "." ) 30 | e = CaselessLiteral( "E" ) 31 | fnumber = Combine( Word( "+-"+nums, nums ) + 32 | Optional( point + Optional( Word( nums ) ) ) + 33 | Optional( e + Word( "+-"+nums, nums ) ) ) 34 | ident = Word(alphas, alphas+nums+"_$") 35 | 36 | plus = Literal( "+" ) 37 | minus = Literal( "-" ) 38 | mult = Literal( "*" ) 39 | div = Literal( "/" ) 40 | lpar = Literal( "(" ).suppress() 41 | rpar = Literal( ")" ).suppress() 42 | addop = plus | minus 43 | multop = mult | div 44 | expop = Literal( "^" ) 45 | pi = CaselessLiteral( "PI" ) 46 | 47 | expr = Forward() 48 | atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus) 49 | 50 | # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ 51 | # that is, 2^3^2 = 2^(3^2), not (2^3)^2. 52 | factor = Forward() 53 | factor <<= atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) ) 54 | 55 | term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) ) 56 | expr <<= term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) ) 57 | bnf = expr 58 | return bnf 59 | 60 | # map operator symbols to corresponding arithmetic operations 61 | epsilon = 1e-12 62 | opn = { "+" : operator.add, 63 | "-" : operator.sub, 64 | "*" : operator.mul, 65 | "/" : operator.truediv, 66 | "^" : operator.pow } 67 | fn = { "sin" : math.sin, 68 | "cos" : math.cos, 69 | "tan" : math.tan, 70 | "sqrt" : math.sqrt, 71 | 'ln': math.log, 72 | "abs" : abs, 73 | "trunc" : lambda a: int(a), 74 | "round" : round, 75 | "sgn" : lambda a: abs(a)>epsilon and cmp(a,0) or 0} 76 | def evaluateStack( s ): 77 | op = s.pop() 78 | if op == 'unary -': 79 | return -evaluateStack( s ) 80 | if op in "+-*/^": 81 | op2 = evaluateStack( s ) 82 | op1 = evaluateStack( s ) 83 | return opn[op]( op1, op2 ) 84 | elif op == "PI": 85 | return math.pi # 3.1415926535 86 | elif op == "E": 87 | return math.e # 2.718281828 88 | elif op in fn: 89 | return fn[op]( evaluateStack( s ) ) 90 | elif op[0].isalpha(): 91 | return 0 92 | else: 93 | return float( op ) 94 | 95 | def test( s): 96 | try: 97 | global exprStack 98 | exprStack = [] 99 | results = BNF().parseString( s ) 100 | val = evaluateStack( exprStack[:] ) 101 | return val 102 | except: 103 | return False 104 | 105 | class mathevaluatorCommand(sublime_plugin.TextCommand): 106 | def run(self, edit): 107 | for region in self.view.sel(): 108 | if not region.empty(): 109 | s = self.view.substr(region) 110 | strlen = len(s) 111 | i = 0 112 | while i != strlen: 113 | if s[i] == '.': 114 | if i == 0: 115 | s = "0" + s 116 | strlen = strlen + 1 117 | i = i + 1 118 | else: 119 | if not s[i - 1].isdigit(): 120 | strlen = strlen + 1 121 | i = i + 1 122 | s = s[:i - 1] + '0' + s[i - 1:] 123 | i = i + 1 124 | 125 | evaluated = str(test(s)) 126 | if evaluated != 'None' and evaluated != 'False': 127 | if str(evaluated)[-2:] == ".0": 128 | evaluated = str(evaluated)[:-2] 129 | dotpos = evaluated.find('.') 130 | if dotpos != -1: 131 | evaluated = evaluated[:dotpos + 5] 132 | self.view.replace(edit, region, evaluated) 133 | 134 | # module pyparsing.py 135 | # 136 | # Copyright (c) 2003-2013 Paul T. McGuire 137 | # 138 | # Permission is hereby granted, free of charge, to any person obtaining 139 | # a copy of this software and associated documentation files (the 140 | # "Software"), to deal in the Software without restriction, including 141 | # without limitation the rights to use, copy, modify, merge, publish, 142 | # distribute, sublicense, and/or sell copies of the Software, and to 143 | # permit persons to whom the Software is furnished to do so, subject to 144 | # the following conditions: 145 | # 146 | # The above copyright notice and this permission notice shall be 147 | # included in all copies or substantial portions of the Software. 148 | # 149 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 150 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 151 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 152 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 153 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 154 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 155 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 156 | # 157 | 158 | __version__ = "2.0.1" 159 | __versionTime__ = "16 July 2013 22:22" 160 | __author__ = "Paul McGuire " 161 | 162 | import string 163 | from weakref import ref as wkref 164 | import copy 165 | import sys 166 | import warnings 167 | import re 168 | import sre_constants 169 | import collections 170 | #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 171 | 172 | __all__ = [ 173 | 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 174 | 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 175 | 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 176 | 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 177 | 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 178 | 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 179 | 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 180 | 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 181 | 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 182 | 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 183 | 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 184 | 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 185 | 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 186 | 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 187 | 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 188 | 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 189 | 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 190 | ] 191 | 192 | PY_3 = sys.version.startswith('3') 193 | if PY_3: 194 | _MAX_INT = sys.maxsize 195 | basestring = str 196 | unichr = chr 197 | _ustr = str 198 | 199 | # build list of single arg builtins, that can be used as parse actions 200 | singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 201 | 202 | else: 203 | _MAX_INT = sys.maxint 204 | range = xrange 205 | 206 | def _ustr(obj): 207 | """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 208 | str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 209 | then < returns the unicode object | encodes it with the default encoding | ... >. 210 | """ 211 | if isinstance(obj,unicode): 212 | return obj 213 | 214 | try: 215 | # If this works, then _ustr(obj) has the same behaviour as str(obj), so 216 | # it won't break any existing code. 217 | return str(obj) 218 | 219 | except UnicodeEncodeError: 220 | # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 221 | # state that "The return value must be a string object". However, does a 222 | # unicode object (being a subclass of basestring) count as a "string 223 | # object"? 224 | # If so, then return a unicode object: 225 | return unicode(obj) 226 | # Else encode it... but how? There are many choices... :) 227 | # Replace unprintables with escape codes? 228 | #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 229 | # Replace unprintables with question marks? 230 | #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 231 | # ... 232 | 233 | # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 234 | singleArgBuiltins = [] 235 | import __builtin__ 236 | for fname in "sum len sorted reversed list tuple set any all min max".split(): 237 | try: 238 | singleArgBuiltins.append(getattr(__builtin__,fname)) 239 | except AttributeError: 240 | continue 241 | 242 | 243 | def _xml_escape(data): 244 | """Escape &, <, >, ", ', etc. in a string of data.""" 245 | 246 | # ampersand must be replaced first 247 | from_symbols = '&><"\'' 248 | to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 249 | for from_,to_ in zip(from_symbols, to_symbols): 250 | data = data.replace(from_, to_) 251 | return data 252 | 253 | class _Constants(object): 254 | pass 255 | 256 | alphas = string.ascii_lowercase + string.ascii_uppercase 257 | nums = "0123456789" 258 | hexnums = nums + "ABCDEFabcdef" 259 | alphanums = alphas + nums 260 | _bslash = chr(92) 261 | printables = "".join(c for c in string.printable if c not in string.whitespace) 262 | 263 | class ParseBaseException(Exception): 264 | """base exception class for all parsing runtime exceptions""" 265 | # Performance tuning: we construct a *lot* of these, so keep this 266 | # constructor as small and fast as possible 267 | def __init__( self, pstr, loc=0, msg=None, elem=None ): 268 | self.loc = loc 269 | if msg is None: 270 | self.msg = pstr 271 | self.pstr = "" 272 | else: 273 | self.msg = msg 274 | self.pstr = pstr 275 | self.parserElement = elem 276 | 277 | def __getattr__( self, aname ): 278 | """supported attributes by name are: 279 | - lineno - returns the line number of the exception text 280 | - col - returns the column number of the exception text 281 | - line - returns the line containing the exception text 282 | """ 283 | if( aname == "lineno" ): 284 | return lineno( self.loc, self.pstr ) 285 | elif( aname in ("col", "column") ): 286 | return col( self.loc, self.pstr ) 287 | elif( aname == "line" ): 288 | return line( self.loc, self.pstr ) 289 | else: 290 | raise AttributeError(aname) 291 | 292 | def __str__( self ): 293 | return "%s (at char %d), (line:%d, col:%d)" % \ 294 | ( self.msg, self.loc, self.lineno, self.column ) 295 | def __repr__( self ): 296 | return _ustr(self) 297 | def markInputline( self, markerString = ">!<" ): 298 | """Extracts the exception line from the input string, and marks 299 | the location of the exception with a special symbol. 300 | """ 301 | line_str = self.line 302 | line_column = self.column - 1 303 | if markerString: 304 | line_str = "".join(line_str[:line_column], 305 | markerString, line_str[line_column:]) 306 | return line_str.strip() 307 | def __dir__(self): 308 | return "loc msg pstr parserElement lineno col line " \ 309 | "markInputline __str__ __repr__".split() 310 | 311 | class ParseException(ParseBaseException): 312 | """exception thrown when parse expressions don't match class; 313 | supported attributes by name are: 314 | - lineno - returns the line number of the exception text 315 | - col - returns the column number of the exception text 316 | - line - returns the line containing the exception text 317 | """ 318 | pass 319 | 320 | class ParseFatalException(ParseBaseException): 321 | """user-throwable exception thrown when inconsistent parse content 322 | is found; stops all parsing immediately""" 323 | pass 324 | 325 | class ParseSyntaxException(ParseFatalException): 326 | """just like C{L{ParseFatalException}}, but thrown internally when an 327 | C{L{ErrorStop}} ('-' operator) indicates that parsing is to stop immediately because 328 | an unbacktrackable syntax error has been found""" 329 | def __init__(self, pe): 330 | super(ParseSyntaxException, self).__init__( 331 | pe.pstr, pe.loc, pe.msg, pe.parserElement) 332 | 333 | #~ class ReparseException(ParseBaseException): 334 | #~ """Experimental class - parse actions can raise this exception to cause 335 | #~ pyparsing to reparse the input string: 336 | #~ - with a modified input string, and/or 337 | #~ - with a modified start location 338 | #~ Set the values of the ReparseException in the constructor, and raise the 339 | #~ exception in a parse action to cause pyparsing to use the new string/location. 340 | #~ Setting the values as None causes no change to be made. 341 | #~ """ 342 | #~ def __init_( self, newstring, restartLoc ): 343 | #~ self.newParseText = newstring 344 | #~ self.reparseLoc = restartLoc 345 | 346 | class RecursiveGrammarException(Exception): 347 | """exception thrown by C{validate()} if the grammar could be improperly recursive""" 348 | def __init__( self, parseElementList ): 349 | self.parseElementTrace = parseElementList 350 | 351 | def __str__( self ): 352 | return "RecursiveGrammarException: %s" % self.parseElementTrace 353 | 354 | class _ParseResultsWithOffset(object): 355 | def __init__(self,p1,p2): 356 | self.tup = (p1,p2) 357 | def __getitem__(self,i): 358 | return self.tup[i] 359 | def __repr__(self): 360 | return repr(self.tup) 361 | def setOffset(self,i): 362 | self.tup = (self.tup[0],i) 363 | 364 | class ParseResults(object): 365 | """Structured parse results, to provide multiple means of access to the parsed data: 366 | - as a list (C{len(results)}) 367 | - by list index (C{results[0], results[1]}, etc.) 368 | - by attribute (C{results.}) 369 | """ 370 | #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) 371 | def __new__(cls, toklist, name=None, asList=True, modal=True ): 372 | if isinstance(toklist, cls): 373 | return toklist 374 | retobj = object.__new__(cls) 375 | retobj.__doinit = True 376 | return retobj 377 | 378 | # Performance tuning: we construct a *lot* of these, so keep this 379 | # constructor as small and fast as possible 380 | def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ): 381 | if self.__doinit: 382 | self.__doinit = False 383 | self.__name = None 384 | self.__parent = None 385 | self.__accumNames = {} 386 | if isinstance(toklist, list): 387 | self.__toklist = toklist[:] 388 | else: 389 | self.__toklist = [toklist] 390 | self.__tokdict = dict() 391 | 392 | if name is not None and name: 393 | if not modal: 394 | self.__accumNames[name] = 0 395 | if isinstance(name,int): 396 | name = _ustr(name) # will always return a str, but use _ustr for consistency 397 | self.__name = name 398 | if not toklist in (None,'',[]): 399 | if isinstance(toklist,basestring): 400 | toklist = [ toklist ] 401 | if asList: 402 | if isinstance(toklist,ParseResults): 403 | self[name] = _ParseResultsWithOffset(toklist.copy(),0) 404 | else: 405 | self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 406 | self[name].__name = name 407 | else: 408 | try: 409 | self[name] = toklist[0] 410 | except (KeyError,TypeError,IndexError): 411 | self[name] = toklist 412 | 413 | def __getitem__( self, i ): 414 | if isinstance( i, (int,slice) ): 415 | return self.__toklist[i] 416 | else: 417 | if i not in self.__accumNames: 418 | return self.__tokdict[i][-1][0] 419 | else: 420 | return ParseResults([ v[0] for v in self.__tokdict[i] ]) 421 | 422 | def __setitem__( self, k, v, isinstance=isinstance ): 423 | if isinstance(v,_ParseResultsWithOffset): 424 | self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 425 | sub = v[0] 426 | elif isinstance(k,int): 427 | self.__toklist[k] = v 428 | sub = v 429 | else: 430 | self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 431 | sub = v 432 | if isinstance(sub,ParseResults): 433 | sub.__parent = wkref(self) 434 | 435 | def __delitem__( self, i ): 436 | if isinstance(i,(int,slice)): 437 | mylen = len( self.__toklist ) 438 | del self.__toklist[i] 439 | 440 | # convert int to slice 441 | if isinstance(i, int): 442 | if i < 0: 443 | i += mylen 444 | i = slice(i, i+1) 445 | # get removed indices 446 | removed = list(range(*i.indices(mylen))) 447 | removed.reverse() 448 | # fixup indices in token dictionary 449 | for name in self.__tokdict: 450 | occurrences = self.__tokdict[name] 451 | for j in removed: 452 | for k, (value, position) in enumerate(occurrences): 453 | occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 454 | else: 455 | del self.__tokdict[i] 456 | 457 | def __contains__( self, k ): 458 | return k in self.__tokdict 459 | 460 | def __len__( self ): return len( self.__toklist ) 461 | def __bool__(self): return len( self.__toklist ) > 0 462 | __nonzero__ = __bool__ 463 | def __iter__( self ): return iter( self.__toklist ) 464 | def __reversed__( self ): return iter( self.__toklist[::-1] ) 465 | def keys( self ): 466 | """Returns all named result keys.""" 467 | return self.__tokdict.keys() 468 | 469 | def pop( self, index=-1 ): 470 | """Removes and returns item at specified index (default=last). 471 | Will work with either numeric indices or dict-key indicies.""" 472 | ret = self[index] 473 | del self[index] 474 | return ret 475 | 476 | def get(self, key, defaultValue=None): 477 | """Returns named result matching the given key, or if there is no 478 | such name, then returns the given C{defaultValue} or C{None} if no 479 | C{defaultValue} is specified.""" 480 | if key in self: 481 | return self[key] 482 | else: 483 | return defaultValue 484 | 485 | def insert( self, index, insStr ): 486 | """Inserts new element at location index in the list of parsed tokens.""" 487 | self.__toklist.insert(index, insStr) 488 | # fixup indices in token dictionary 489 | for name in self.__tokdict: 490 | occurrences = self.__tokdict[name] 491 | for k, (value, position) in enumerate(occurrences): 492 | occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 493 | 494 | def items( self ): 495 | """Returns all named result keys and values as a list of tuples.""" 496 | return [(k,self[k]) for k in self.__tokdict] 497 | 498 | def values( self ): 499 | """Returns all named result values.""" 500 | return [ v[-1][0] for v in self.__tokdict.values() ] 501 | 502 | def __getattr__( self, name ): 503 | if True: #name not in self.__slots__: 504 | if name in self.__tokdict: 505 | if name not in self.__accumNames: 506 | return self.__tokdict[name][-1][0] 507 | else: 508 | return ParseResults([ v[0] for v in self.__tokdict[name] ]) 509 | else: 510 | return "" 511 | return None 512 | 513 | def __add__( self, other ): 514 | ret = self.copy() 515 | ret += other 516 | return ret 517 | 518 | def __iadd__( self, other ): 519 | if other.__tokdict: 520 | offset = len(self.__toklist) 521 | addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 522 | otheritems = other.__tokdict.items() 523 | otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 524 | for (k,vlist) in otheritems for v in vlist] 525 | for k,v in otherdictitems: 526 | self[k] = v 527 | if isinstance(v[0],ParseResults): 528 | v[0].__parent = wkref(self) 529 | 530 | self.__toklist += other.__toklist 531 | self.__accumNames.update( other.__accumNames ) 532 | return self 533 | 534 | def __radd__(self, other): 535 | if isinstance(other,int) and other == 0: 536 | return self.copy() 537 | 538 | def __repr__( self ): 539 | return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 540 | 541 | def __str__( self ): 542 | out = [] 543 | for i in self.__toklist: 544 | if isinstance(i, ParseResults): 545 | out.append(_ustr(i)) 546 | else: 547 | out.append(repr(i)) 548 | return '[' + ', '.join(out) + ']' 549 | 550 | def _asStringList( self, sep='' ): 551 | out = [] 552 | for item in self.__toklist: 553 | if out and sep: 554 | out.append(sep) 555 | if isinstance( item, ParseResults ): 556 | out += item._asStringList() 557 | else: 558 | out.append( _ustr(item) ) 559 | return out 560 | 561 | def asList( self ): 562 | """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 563 | out = [] 564 | for res in self.__toklist: 565 | if isinstance(res,ParseResults): 566 | out.append( res.asList() ) 567 | else: 568 | out.append( res ) 569 | return out 570 | 571 | def asDict( self ): 572 | """Returns the named parse results as dictionary.""" 573 | return dict( self.items() ) 574 | 575 | def copy( self ): 576 | """Returns a new copy of a C{ParseResults} object.""" 577 | ret = ParseResults( self.__toklist ) 578 | ret.__tokdict = self.__tokdict.copy() 579 | ret.__parent = self.__parent 580 | ret.__accumNames.update( self.__accumNames ) 581 | ret.__name = self.__name 582 | return ret 583 | 584 | def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 585 | """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 586 | nl = "\n" 587 | out = [] 588 | namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 589 | for v in vlist) 590 | nextLevelIndent = indent + " " 591 | 592 | # collapse out indents if formatting is not desired 593 | if not formatted: 594 | indent = "" 595 | nextLevelIndent = "" 596 | nl = "" 597 | 598 | selfTag = None 599 | if doctag is not None: 600 | selfTag = doctag 601 | else: 602 | if self.__name: 603 | selfTag = self.__name 604 | 605 | if not selfTag: 606 | if namedItemsOnly: 607 | return "" 608 | else: 609 | selfTag = "ITEM" 610 | 611 | out += [ nl, indent, "<", selfTag, ">" ] 612 | 613 | worklist = self.__toklist 614 | for i,res in enumerate(worklist): 615 | if isinstance(res,ParseResults): 616 | if i in namedItems: 617 | out += [ res.asXML(namedItems[i], 618 | namedItemsOnly and doctag is None, 619 | nextLevelIndent, 620 | formatted)] 621 | else: 622 | out += [ res.asXML(None, 623 | namedItemsOnly and doctag is None, 624 | nextLevelIndent, 625 | formatted)] 626 | else: 627 | # individual token, see if there is a name for it 628 | resTag = None 629 | if i in namedItems: 630 | resTag = namedItems[i] 631 | if not resTag: 632 | if namedItemsOnly: 633 | continue 634 | else: 635 | resTag = "ITEM" 636 | xmlBodyText = _xml_escape(_ustr(res)) 637 | out += [ nl, nextLevelIndent, "<", resTag, ">", 638 | xmlBodyText, 639 | "" ] 640 | 641 | out += [ nl, indent, "" ] 642 | return "".join(out) 643 | 644 | def __lookup(self,sub): 645 | for k,vlist in self.__tokdict.items(): 646 | for v,loc in vlist: 647 | if sub is v: 648 | return k 649 | return None 650 | 651 | def getName(self): 652 | """Returns the results name for this token expression.""" 653 | if self.__name: 654 | return self.__name 655 | elif self.__parent: 656 | par = self.__parent() 657 | if par: 658 | return par.__lookup(self) 659 | else: 660 | return None 661 | elif (len(self) == 1 and 662 | len(self.__tokdict) == 1 and 663 | self.__tokdict.values()[0][0][1] in (0,-1)): 664 | return self.__tokdict.keys()[0] 665 | else: 666 | return None 667 | 668 | def dump(self,indent='',depth=0): 669 | """Diagnostic method for listing out the contents of a C{ParseResults}. 670 | Accepts an optional C{indent} argument so that this string can be embedded 671 | in a nested display of other data.""" 672 | out = [] 673 | out.append( indent+_ustr(self.asList()) ) 674 | keys = self.items() 675 | keys.sort() 676 | for k,v in keys: 677 | if out: 678 | out.append('\n') 679 | out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 680 | if isinstance(v,ParseResults): 681 | if v.keys(): 682 | out.append( v.dump(indent,depth+1) ) 683 | else: 684 | out.append(_ustr(v)) 685 | else: 686 | out.append(_ustr(v)) 687 | return "".join(out) 688 | 689 | # add support for pickle protocol 690 | def __getstate__(self): 691 | return ( self.__toklist, 692 | ( self.__tokdict.copy(), 693 | self.__parent is not None and self.__parent() or None, 694 | self.__accumNames, 695 | self.__name ) ) 696 | 697 | def __setstate__(self,state): 698 | self.__toklist = state[0] 699 | (self.__tokdict, 700 | par, 701 | inAccumNames, 702 | self.__name) = state[1] 703 | self.__accumNames = {} 704 | self.__accumNames.update(inAccumNames) 705 | if par is not None: 706 | self.__parent = wkref(par) 707 | else: 708 | self.__parent = None 709 | 710 | def __dir__(self): 711 | return dir(super(ParseResults,self)) + list(self.keys()) 712 | 713 | collections.MutableMapping.register(ParseResults) 714 | 715 | def col (loc,strg): 716 | """Returns current column within a string, counting newlines as line separators. 717 | The first column is number 1. 718 | 719 | Note: the default parsing behavior is to expand tabs in the input string 720 | before starting the parsing process. See L{I{ParserElement.parseString}} for more information 721 | on parsing strings containing C{}s, and suggested methods to maintain a 722 | consistent view of the parsed string, the parse location, and line and column 723 | positions within the parsed string. 724 | """ 725 | return (loc} for more information 733 | on parsing strings containing C{}s, and suggested methods to maintain a 734 | consistent view of the parsed string, the parse location, and line and column 735 | positions within the parsed string. 736 | """ 737 | return strg.count("\n",0,loc) + 1 738 | 739 | def line( loc, strg ): 740 | """Returns the line of text containing loc within a string, counting newlines as line separators. 741 | """ 742 | lastCR = strg.rfind("\n", 0, loc) 743 | nextCR = strg.find("\n", loc) 744 | if nextCR >= 0: 745 | return strg[lastCR+1:nextCR] 746 | else: 747 | return strg[lastCR+1:] 748 | 749 | def _defaultStartDebugAction( instring, loc, expr ): 750 | print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) 751 | 752 | def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): 753 | print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 754 | 755 | def _defaultExceptionDebugAction( instring, loc, expr, exc ): 756 | print ("Exception raised:" + _ustr(exc)) 757 | 758 | def nullDebugAction(*args): 759 | """'Do-nothing' debug action, to suppress debugging output during parsing.""" 760 | pass 761 | 762 | # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 763 | #~ 'decorator to trim function calls to match the arity of the target' 764 | #~ def _trim_arity(func, maxargs=3): 765 | #~ if func in singleArgBuiltins: 766 | #~ return lambda s,l,t: func(t) 767 | #~ limit = 0 768 | #~ foundArity = False 769 | #~ def wrapper(*args): 770 | #~ nonlocal limit,foundArity 771 | #~ while 1: 772 | #~ try: 773 | #~ ret = func(*args[limit:]) 774 | #~ foundArity = True 775 | #~ return ret 776 | #~ except TypeError: 777 | #~ if limit == maxargs or foundArity: 778 | #~ raise 779 | #~ limit += 1 780 | #~ continue 781 | #~ return wrapper 782 | 783 | # this version is Python 2.x-3.x cross-compatible 784 | 'decorator to trim function calls to match the arity of the target' 785 | def _trim_arity(func, maxargs=2): 786 | if func in singleArgBuiltins: 787 | return lambda s,l,t: func(t) 788 | limit = [0] 789 | foundArity = [False] 790 | def wrapper(*args): 791 | while 1: 792 | try: 793 | ret = func(*args[limit[0]:]) 794 | foundArity[0] = True 795 | return ret 796 | except TypeError: 797 | if limit[0] <= maxargs and not foundArity[0]: 798 | limit[0] += 1 799 | continue 800 | raise 801 | return wrapper 802 | 803 | class ParserElement(object): 804 | """Abstract base level parser element class.""" 805 | DEFAULT_WHITE_CHARS = " \n\t\r" 806 | verbose_stacktrace = False 807 | 808 | def setDefaultWhitespaceChars( chars ): 809 | """Overrides the default whitespace chars 810 | """ 811 | ParserElement.DEFAULT_WHITE_CHARS = chars 812 | setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 813 | 814 | def inlineLiteralsUsing(cls): 815 | """ 816 | Set class to be used for inclusion of string literals into a parser. 817 | """ 818 | ParserElement.literalStringClass = cls 819 | inlineLiteralsUsing = staticmethod(inlineLiteralsUsing) 820 | 821 | def __init__( self, savelist=False ): 822 | self.parseAction = list() 823 | self.failAction = None 824 | #~ self.name = "" # don't define self.name, let subclasses try/except upcall 825 | self.strRepr = None 826 | self.resultsName = None 827 | self.saveAsList = savelist 828 | self.skipWhitespace = True 829 | self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 830 | self.copyDefaultWhiteChars = True 831 | self.mayReturnEmpty = False # used when checking for left-recursion 832 | self.keepTabs = False 833 | self.ignoreExprs = list() 834 | self.debug = False 835 | self.streamlined = False 836 | self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 837 | self.errmsg = "" 838 | self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 839 | self.debugActions = ( None, None, None ) #custom debug actions 840 | self.re = None 841 | self.callPreparse = True # used to avoid redundant calls to preParse 842 | self.callDuringTry = False 843 | 844 | def copy( self ): 845 | """Make a copy of this C{ParserElement}. Useful for defining different parse actions 846 | for the same parsing pattern, using copies of the original parse element.""" 847 | cpy = copy.copy( self ) 848 | cpy.parseAction = self.parseAction[:] 849 | cpy.ignoreExprs = self.ignoreExprs[:] 850 | if self.copyDefaultWhiteChars: 851 | cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 852 | return cpy 853 | 854 | def setName( self, name ): 855 | """Define name for this expression, for use in debugging.""" 856 | self.name = name 857 | self.errmsg = "Expected " + self.name 858 | if hasattr(self,"exception"): 859 | self.exception.msg = self.errmsg 860 | return self 861 | 862 | def setResultsName( self, name, listAllMatches=False ): 863 | """Define name for referencing matching tokens as a nested attribute 864 | of the returned parse results. 865 | NOTE: this returns a *copy* of the original C{ParserElement} object; 866 | this is so that the client can define a basic element, such as an 867 | integer, and reference it in multiple places with different names. 868 | 869 | You can also set results names using the abbreviated syntax, 870 | C{expr("name")} in place of C{expr.setResultsName("name")} - 871 | see L{I{__call__}<__call__>}. 872 | """ 873 | newself = self.copy() 874 | if name.endswith("*"): 875 | name = name[:-1] 876 | listAllMatches=True 877 | newself.resultsName = name 878 | newself.modalResults = not listAllMatches 879 | return newself 880 | 881 | def setBreak(self,breakFlag = True): 882 | """Method to invoke the Python pdb debugger when this element is 883 | about to be parsed. Set C{breakFlag} to True to enable, False to 884 | disable. 885 | """ 886 | if breakFlag: 887 | _parseMethod = self._parse 888 | def breaker(instring, loc, doActions=True, callPreParse=True): 889 | import pdb 890 | pdb.set_trace() 891 | return _parseMethod( instring, loc, doActions, callPreParse ) 892 | breaker._originalParseMethod = _parseMethod 893 | self._parse = breaker 894 | else: 895 | if hasattr(self._parse,"_originalParseMethod"): 896 | self._parse = self._parse._originalParseMethod 897 | return self 898 | 899 | def setParseAction( self, *fns, **kwargs ): 900 | """Define action to perform when successfully matching parse element definition. 901 | Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 902 | C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 903 | - s = the original string being parsed (see note below) 904 | - loc = the location of the matching substring 905 | - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 906 | If the functions in fns modify the tokens, they can return them as the return 907 | value from fn, and the modified list of tokens will replace the original. 908 | Otherwise, fn does not need to return any value. 909 | 910 | Note: the default parsing behavior is to expand tabs in the input string 911 | before starting the parsing process. See L{I{parseString}} for more information 912 | on parsing strings containing C{}s, and suggested methods to maintain a 913 | consistent view of the parsed string, the parse location, and line and column 914 | positions within the parsed string. 915 | """ 916 | self.parseAction = list(map(_trim_arity, list(fns))) 917 | self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 918 | return self 919 | 920 | def addParseAction( self, *fns, **kwargs ): 921 | """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" 922 | self.parseAction += list(map(_trim_arity, list(fns))) 923 | self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 924 | return self 925 | 926 | def setFailAction( self, fn ): 927 | """Define action to perform if parsing fails at this expression. 928 | Fail acton fn is a callable function that takes the arguments 929 | C{fn(s,loc,expr,err)} where: 930 | - s = string being parsed 931 | - loc = location where expression match was attempted and failed 932 | - expr = the parse expression that failed 933 | - err = the exception thrown 934 | The function returns no value. It may throw C{L{ParseFatalException}} 935 | if it is desired to stop parsing immediately.""" 936 | self.failAction = fn 937 | return self 938 | 939 | def _skipIgnorables( self, instring, loc ): 940 | exprsFound = True 941 | while exprsFound: 942 | exprsFound = False 943 | for e in self.ignoreExprs: 944 | try: 945 | while 1: 946 | loc,dummy = e._parse( instring, loc ) 947 | exprsFound = True 948 | except ParseException: 949 | pass 950 | return loc 951 | 952 | def preParse( self, instring, loc ): 953 | if self.ignoreExprs: 954 | loc = self._skipIgnorables( instring, loc ) 955 | 956 | if self.skipWhitespace: 957 | wt = self.whiteChars 958 | instrlen = len(instring) 959 | while loc < instrlen and instring[loc] in wt: 960 | loc += 1 961 | 962 | return loc 963 | 964 | def parseImpl( self, instring, loc, doActions=True ): 965 | return loc, [] 966 | 967 | def postParse( self, instring, loc, tokenlist ): 968 | return tokenlist 969 | 970 | #~ @profile 971 | def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 972 | debugging = ( self.debug ) #and doActions ) 973 | 974 | if debugging or self.failAction: 975 | #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 976 | if (self.debugActions[0] ): 977 | self.debugActions[0]( instring, loc, self ) 978 | if callPreParse and self.callPreparse: 979 | preloc = self.preParse( instring, loc ) 980 | else: 981 | preloc = loc 982 | tokensStart = preloc 983 | try: 984 | try: 985 | loc,tokens = self.parseImpl( instring, preloc, doActions ) 986 | except IndexError: 987 | raise ParseException( instring, len(instring), self.errmsg, self ) 988 | except ParseBaseException as err: 989 | #~ print ("Exception raised:", err) 990 | if self.debugActions[2]: 991 | self.debugActions[2]( instring, tokensStart, self, err ) 992 | if self.failAction: 993 | self.failAction( instring, tokensStart, self, err ) 994 | raise 995 | else: 996 | if callPreParse and self.callPreparse: 997 | preloc = self.preParse( instring, loc ) 998 | else: 999 | preloc = loc 1000 | tokensStart = preloc 1001 | if self.mayIndexError or loc >= len(instring): 1002 | try: 1003 | loc,tokens = self.parseImpl( instring, preloc, doActions ) 1004 | except IndexError: 1005 | raise ParseException( instring, len(instring), self.errmsg, self ) 1006 | else: 1007 | loc,tokens = self.parseImpl( instring, preloc, doActions ) 1008 | 1009 | tokens = self.postParse( instring, loc, tokens ) 1010 | 1011 | retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1012 | if self.parseAction and (doActions or self.callDuringTry): 1013 | if debugging: 1014 | try: 1015 | for fn in self.parseAction: 1016 | tokens = fn( instring, tokensStart, retTokens ) 1017 | if tokens is not None: 1018 | retTokens = ParseResults( tokens, 1019 | self.resultsName, 1020 | asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1021 | modal=self.modalResults ) 1022 | except ParseBaseException as err: 1023 | #~ print "Exception raised in user parse action:", err 1024 | if (self.debugActions[2] ): 1025 | self.debugActions[2]( instring, tokensStart, self, err ) 1026 | raise 1027 | else: 1028 | for fn in self.parseAction: 1029 | tokens = fn( instring, tokensStart, retTokens ) 1030 | if tokens is not None: 1031 | retTokens = ParseResults( tokens, 1032 | self.resultsName, 1033 | asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1034 | modal=self.modalResults ) 1035 | 1036 | if debugging: 1037 | #~ print ("Matched",self,"->",retTokens.asList()) 1038 | if (self.debugActions[1] ): 1039 | self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1040 | 1041 | return loc, retTokens 1042 | 1043 | def tryParse( self, instring, loc ): 1044 | try: 1045 | return self._parse( instring, loc, doActions=False )[0] 1046 | except ParseFatalException: 1047 | raise ParseException( instring, loc, self.errmsg, self) 1048 | 1049 | # this method gets repeatedly called during backtracking with the same arguments - 1050 | # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 1051 | def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 1052 | lookup = (self,instring,loc,callPreParse,doActions) 1053 | if lookup in ParserElement._exprArgCache: 1054 | value = ParserElement._exprArgCache[ lookup ] 1055 | if isinstance(value, Exception): 1056 | raise value 1057 | return (value[0],value[1].copy()) 1058 | else: 1059 | try: 1060 | value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1061 | ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1062 | return value 1063 | except ParseBaseException as pe: 1064 | pe.__traceback__ = None 1065 | ParserElement._exprArgCache[ lookup ] = pe 1066 | raise 1067 | 1068 | _parse = _parseNoCache 1069 | 1070 | # argument cache for optimizing repeated calls when backtracking through recursive expressions 1071 | _exprArgCache = {} 1072 | def resetCache(): 1073 | ParserElement._exprArgCache.clear() 1074 | resetCache = staticmethod(resetCache) 1075 | 1076 | _packratEnabled = False 1077 | def enablePackrat(): 1078 | """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1079 | Repeated parse attempts at the same string location (which happens 1080 | often in many complex grammars) can immediately return a cached value, 1081 | instead of re-executing parsing/validating code. Memoizing is done of 1082 | both valid results and parsing exceptions. 1083 | 1084 | This speedup may break existing programs that use parse actions that 1085 | have side-effects. For this reason, packrat parsing is disabled when 1086 | you first import pyparsing. To activate the packrat feature, your 1087 | program must call the class method C{ParserElement.enablePackrat()}. If 1088 | your program uses C{psyco} to "compile as you go", you must call 1089 | C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1090 | Python will crash. For best results, call C{enablePackrat()} immediately 1091 | after importing pyparsing. 1092 | """ 1093 | if not ParserElement._packratEnabled: 1094 | ParserElement._packratEnabled = True 1095 | ParserElement._parse = ParserElement._parseCache 1096 | enablePackrat = staticmethod(enablePackrat) 1097 | 1098 | def parseString( self, instring, parseAll=False ): 1099 | """Execute the parse expression with the given string. 1100 | This is the main interface to the client code, once the complete 1101 | expression has been built. 1102 | 1103 | If you want the grammar to require that the entire input string be 1104 | successfully parsed, then set C{parseAll} to True (equivalent to ending 1105 | the grammar with C{L{StringEnd()}}). 1106 | 1107 | Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1108 | in order to report proper column numbers in parse actions. 1109 | If the input string contains tabs and 1110 | the grammar uses parse actions that use the C{loc} argument to index into the 1111 | string being parsed, you can ensure you have a consistent view of the input 1112 | string by: 1113 | - calling C{parseWithTabs} on your grammar before calling C{parseString} 1114 | (see L{I{parseWithTabs}}) 1115 | - define your parse action using the full C{(s,loc,toks)} signature, and 1116 | reference the input string using the parse action's C{s} argument 1117 | - explictly expand the tabs in your input string before calling 1118 | C{parseString} 1119 | """ 1120 | ParserElement.resetCache() 1121 | if not self.streamlined: 1122 | self.streamline() 1123 | #~ self.saveAsList = True 1124 | for e in self.ignoreExprs: 1125 | e.streamline() 1126 | if not self.keepTabs: 1127 | instring = instring.expandtabs() 1128 | try: 1129 | loc, tokens = self._parse( instring, 0 ) 1130 | if parseAll: 1131 | loc = self.preParse( instring, loc ) 1132 | se = Empty() + StringEnd() 1133 | se._parse( instring, loc ) 1134 | except ParseBaseException as exc: 1135 | if ParserElement.verbose_stacktrace: 1136 | raise 1137 | else: 1138 | # catch and re-raise exception from here, clears out pyparsing internal stack trace 1139 | raise exc 1140 | else: 1141 | return tokens 1142 | 1143 | def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ): 1144 | """Scan the input string for expression matches. Each match will return the 1145 | matching tokens, start location, and end location. May be called with optional 1146 | C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1147 | C{overlap} is specified, then overlapping matches will be reported. 1148 | 1149 | Note that the start and end locations are reported relative to the string 1150 | being parsed. See L{I{parseString}} for more information on parsing 1151 | strings with embedded tabs.""" 1152 | if not self.streamlined: 1153 | self.streamline() 1154 | for e in self.ignoreExprs: 1155 | e.streamline() 1156 | 1157 | if not self.keepTabs: 1158 | instring = _ustr(instring).expandtabs() 1159 | instrlen = len(instring) 1160 | loc = 0 1161 | preparseFn = self.preParse 1162 | parseFn = self._parse 1163 | ParserElement.resetCache() 1164 | matches = 0 1165 | try: 1166 | while loc <= instrlen and matches < maxMatches: 1167 | try: 1168 | preloc = preparseFn( instring, loc ) 1169 | nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1170 | except ParseException: 1171 | loc = preloc+1 1172 | else: 1173 | if nextLoc > loc: 1174 | matches += 1 1175 | yield tokens, preloc, nextLoc 1176 | if overlap: 1177 | nextloc = preparseFn( instring, loc ) 1178 | if nextloc > loc: 1179 | loc = nextLoc 1180 | else: 1181 | loc += 1 1182 | else: 1183 | loc = nextLoc 1184 | else: 1185 | loc = preloc+1 1186 | except ParseBaseException as exc: 1187 | if ParserElement.verbose_stacktrace: 1188 | raise 1189 | else: 1190 | # catch and re-raise exception from here, clears out pyparsing internal stack trace 1191 | raise exc 1192 | 1193 | def transformString( self, instring ): 1194 | """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1195 | be returned from a parse action. To use C{transformString}, define a grammar and 1196 | attach a parse action to it that modifies the returned token list. 1197 | Invoking C{transformString()} on a target string will then scan for matches, 1198 | and replace the matched text patterns according to the logic in the parse 1199 | action. C{transformString()} returns the resulting transformed string.""" 1200 | out = [] 1201 | lastE = 0 1202 | # force preservation of s, to minimize unwanted transformation of string, and to 1203 | # keep string locs straight between transformString and scanString 1204 | self.keepTabs = True 1205 | try: 1206 | for t,s,e in self.scanString( instring ): 1207 | out.append( instring[lastE:s] ) 1208 | if t: 1209 | if isinstance(t,ParseResults): 1210 | out += t.asList() 1211 | elif isinstance(t,list): 1212 | out += t 1213 | else: 1214 | out.append(t) 1215 | lastE = e 1216 | out.append(instring[lastE:]) 1217 | out = [o for o in out if o] 1218 | return "".join(map(_ustr,_flatten(out))) 1219 | except ParseBaseException as exc: 1220 | if ParserElement.verbose_stacktrace: 1221 | raise 1222 | else: 1223 | # catch and re-raise exception from here, clears out pyparsing internal stack trace 1224 | raise exc 1225 | 1226 | def searchString( self, instring, maxMatches=_MAX_INT ): 1227 | """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1228 | to match the given parse expression. May be called with optional 1229 | C{maxMatches} argument, to clip searching after 'n' matches are found. 1230 | """ 1231 | try: 1232 | return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1233 | except ParseBaseException as exc: 1234 | if ParserElement.verbose_stacktrace: 1235 | raise 1236 | else: 1237 | # catch and re-raise exception from here, clears out pyparsing internal stack trace 1238 | raise exc 1239 | 1240 | def __add__(self, other ): 1241 | """Implementation of + operator - returns C{L{And}}""" 1242 | if isinstance( other, basestring ): 1243 | other = ParserElement.literalStringClass( other ) 1244 | if not isinstance( other, ParserElement ): 1245 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1246 | SyntaxWarning, stacklevel=2) 1247 | return None 1248 | return And( [ self, other ] ) 1249 | 1250 | def __radd__(self, other ): 1251 | """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1252 | if isinstance( other, basestring ): 1253 | other = ParserElement.literalStringClass( other ) 1254 | if not isinstance( other, ParserElement ): 1255 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1256 | SyntaxWarning, stacklevel=2) 1257 | return None 1258 | return other + self 1259 | 1260 | def __sub__(self, other): 1261 | """Implementation of - operator, returns C{L{And}} with error stop""" 1262 | if isinstance( other, basestring ): 1263 | other = ParserElement.literalStringClass( other ) 1264 | if not isinstance( other, ParserElement ): 1265 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1266 | SyntaxWarning, stacklevel=2) 1267 | return None 1268 | return And( [ self, And._ErrorStop(), other ] ) 1269 | 1270 | def __rsub__(self, other ): 1271 | """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1272 | if isinstance( other, basestring ): 1273 | other = ParserElement.literalStringClass( other ) 1274 | if not isinstance( other, ParserElement ): 1275 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1276 | SyntaxWarning, stacklevel=2) 1277 | return None 1278 | return other - self 1279 | 1280 | def __mul__(self,other): 1281 | """Implementation of * operator, allows use of C{expr * 3} in place of 1282 | C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1283 | tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1284 | may also include C{None} as in: 1285 | - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1286 | to C{expr*n + L{ZeroOrMore}(expr)} 1287 | (read as "at least n instances of C{expr}") 1288 | - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1289 | (read as "0 to n instances of C{expr}") 1290 | - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1291 | - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1292 | 1293 | Note that C{expr*(None,n)} does not raise an exception if 1294 | more than n exprs exist in the input stream; that is, 1295 | C{expr*(None,n)} does not enforce a maximum number of expr 1296 | occurrences. If this behavior is desired, then write 1297 | C{expr*(None,n) + ~expr} 1298 | 1299 | """ 1300 | if isinstance(other,int): 1301 | minElements, optElements = other,0 1302 | elif isinstance(other,tuple): 1303 | other = (other + (None, None))[:2] 1304 | if other[0] is None: 1305 | other = (0, other[1]) 1306 | if isinstance(other[0],int) and other[1] is None: 1307 | if other[0] == 0: 1308 | return ZeroOrMore(self) 1309 | if other[0] == 1: 1310 | return OneOrMore(self) 1311 | else: 1312 | return self*other[0] + ZeroOrMore(self) 1313 | elif isinstance(other[0],int) and isinstance(other[1],int): 1314 | minElements, optElements = other 1315 | optElements -= minElements 1316 | else: 1317 | raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1318 | else: 1319 | raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1320 | 1321 | if minElements < 0: 1322 | raise ValueError("cannot multiply ParserElement by negative value") 1323 | if optElements < 0: 1324 | raise ValueError("second tuple value must be greater or equal to first tuple value") 1325 | if minElements == optElements == 0: 1326 | raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1327 | 1328 | if (optElements): 1329 | def makeOptionalList(n): 1330 | if n>1: 1331 | return Optional(self + makeOptionalList(n-1)) 1332 | else: 1333 | return Optional(self) 1334 | if minElements: 1335 | if minElements == 1: 1336 | ret = self + makeOptionalList(optElements) 1337 | else: 1338 | ret = And([self]*minElements) + makeOptionalList(optElements) 1339 | else: 1340 | ret = makeOptionalList(optElements) 1341 | else: 1342 | if minElements == 1: 1343 | ret = self 1344 | else: 1345 | ret = And([self]*minElements) 1346 | return ret 1347 | 1348 | def __rmul__(self, other): 1349 | return self.__mul__(other) 1350 | 1351 | def __or__(self, other ): 1352 | """Implementation of | operator - returns C{L{MatchFirst}}""" 1353 | if isinstance( other, basestring ): 1354 | other = ParserElement.literalStringClass( other ) 1355 | if not isinstance( other, ParserElement ): 1356 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1357 | SyntaxWarning, stacklevel=2) 1358 | return None 1359 | return MatchFirst( [ self, other ] ) 1360 | 1361 | def __ror__(self, other ): 1362 | """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1363 | if isinstance( other, basestring ): 1364 | other = ParserElement.literalStringClass( other ) 1365 | if not isinstance( other, ParserElement ): 1366 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1367 | SyntaxWarning, stacklevel=2) 1368 | return None 1369 | return other | self 1370 | 1371 | def __xor__(self, other ): 1372 | """Implementation of ^ operator - returns C{L{Or}}""" 1373 | if isinstance( other, basestring ): 1374 | other = ParserElement.literalStringClass( other ) 1375 | if not isinstance( other, ParserElement ): 1376 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1377 | SyntaxWarning, stacklevel=2) 1378 | return None 1379 | return Or( [ self, other ] ) 1380 | 1381 | def __rxor__(self, other ): 1382 | """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1383 | if isinstance( other, basestring ): 1384 | other = ParserElement.literalStringClass( other ) 1385 | if not isinstance( other, ParserElement ): 1386 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1387 | SyntaxWarning, stacklevel=2) 1388 | return None 1389 | return other ^ self 1390 | 1391 | def __and__(self, other ): 1392 | """Implementation of & operator - returns C{L{Each}}""" 1393 | if isinstance( other, basestring ): 1394 | other = ParserElement.literalStringClass( other ) 1395 | if not isinstance( other, ParserElement ): 1396 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1397 | SyntaxWarning, stacklevel=2) 1398 | return None 1399 | return Each( [ self, other ] ) 1400 | 1401 | def __rand__(self, other ): 1402 | """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1403 | if isinstance( other, basestring ): 1404 | other = ParserElement.literalStringClass( other ) 1405 | if not isinstance( other, ParserElement ): 1406 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1407 | SyntaxWarning, stacklevel=2) 1408 | return None 1409 | return other & self 1410 | 1411 | def __invert__( self ): 1412 | """Implementation of ~ operator - returns C{L{NotAny}}""" 1413 | return NotAny( self ) 1414 | 1415 | def __call__(self, name): 1416 | """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1417 | userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1418 | could be written as:: 1419 | userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1420 | 1421 | If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1422 | passed as C{True}. 1423 | """ 1424 | return self.setResultsName(name) 1425 | 1426 | def suppress( self ): 1427 | """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1428 | cluttering up returned output. 1429 | """ 1430 | return Suppress( self ) 1431 | 1432 | def leaveWhitespace( self ): 1433 | """Disables the skipping of whitespace before matching the characters in the 1434 | C{ParserElement}'s defined pattern. This is normally only used internally by 1435 | the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1436 | """ 1437 | self.skipWhitespace = False 1438 | return self 1439 | 1440 | def setWhitespaceChars( self, chars ): 1441 | """Overrides the default whitespace chars 1442 | """ 1443 | self.skipWhitespace = True 1444 | self.whiteChars = chars 1445 | self.copyDefaultWhiteChars = False 1446 | return self 1447 | 1448 | def parseWithTabs( self ): 1449 | """Overrides default behavior to expand C{}s to spaces before parsing the input string. 1450 | Must be called before C{parseString} when the input grammar contains elements that 1451 | match C{} characters.""" 1452 | self.keepTabs = True 1453 | return self 1454 | 1455 | def ignore( self, other ): 1456 | """Define expression to be ignored (e.g., comments) while doing pattern 1457 | matching; may be called repeatedly, to define multiple comment or other 1458 | ignorable patterns. 1459 | """ 1460 | if isinstance( other, Suppress ): 1461 | if other not in self.ignoreExprs: 1462 | self.ignoreExprs.append( other.copy() ) 1463 | else: 1464 | self.ignoreExprs.append( Suppress( other.copy() ) ) 1465 | return self 1466 | 1467 | def setDebugActions( self, startAction, successAction, exceptionAction ): 1468 | """Enable display of debugging messages while doing pattern matching.""" 1469 | self.debugActions = (startAction or _defaultStartDebugAction, 1470 | successAction or _defaultSuccessDebugAction, 1471 | exceptionAction or _defaultExceptionDebugAction) 1472 | self.debug = True 1473 | return self 1474 | 1475 | def setDebug( self, flag=True ): 1476 | """Enable display of debugging messages while doing pattern matching. 1477 | Set C{flag} to True to enable, False to disable.""" 1478 | if flag: 1479 | self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1480 | else: 1481 | self.debug = False 1482 | return self 1483 | 1484 | def __str__( self ): 1485 | return self.name 1486 | 1487 | def __repr__( self ): 1488 | return _ustr(self) 1489 | 1490 | def streamline( self ): 1491 | self.streamlined = True 1492 | self.strRepr = None 1493 | return self 1494 | 1495 | def checkRecursion( self, parseElementList ): 1496 | pass 1497 | 1498 | def validate( self, validateTrace=[] ): 1499 | """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1500 | self.checkRecursion( [] ) 1501 | 1502 | def parseFile( self, file_or_filename, parseAll=False ): 1503 | """Execute the parse expression on the given file or filename. 1504 | If a filename is specified (instead of a file object), 1505 | the entire file is opened, read, and closed before parsing. 1506 | """ 1507 | try: 1508 | file_contents = file_or_filename.read() 1509 | except AttributeError: 1510 | f = open(file_or_filename, "r") 1511 | file_contents = f.read() 1512 | f.close() 1513 | try: 1514 | return self.parseString(file_contents, parseAll) 1515 | except ParseBaseException as exc: 1516 | if ParserElement.verbose_stacktrace: 1517 | raise 1518 | else: 1519 | # catch and re-raise exception from here, clears out pyparsing internal stack trace 1520 | raise exc 1521 | 1522 | def __eq__(self,other): 1523 | if isinstance(other, ParserElement): 1524 | return self is other or self.__dict__ == other.__dict__ 1525 | elif isinstance(other, basestring): 1526 | try: 1527 | self.parseString(_ustr(other), parseAll=True) 1528 | return True 1529 | except ParseBaseException: 1530 | return False 1531 | else: 1532 | return super(ParserElement,self)==other 1533 | 1534 | def __ne__(self,other): 1535 | return not (self == other) 1536 | 1537 | def __hash__(self): 1538 | return hash(id(self)) 1539 | 1540 | def __req__(self,other): 1541 | return self == other 1542 | 1543 | def __rne__(self,other): 1544 | return not (self == other) 1545 | 1546 | 1547 | class Token(ParserElement): 1548 | """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" 1549 | def __init__( self ): 1550 | super(Token,self).__init__( savelist=False ) 1551 | 1552 | def setName(self, name): 1553 | s = super(Token,self).setName(name) 1554 | self.errmsg = "Expected " + self.name 1555 | return s 1556 | 1557 | 1558 | class Empty(Token): 1559 | """An empty token, will always match.""" 1560 | def __init__( self ): 1561 | super(Empty,self).__init__() 1562 | self.name = "Empty" 1563 | self.mayReturnEmpty = True 1564 | self.mayIndexError = False 1565 | 1566 | 1567 | class NoMatch(Token): 1568 | """A token that will never match.""" 1569 | def __init__( self ): 1570 | super(NoMatch,self).__init__() 1571 | self.name = "NoMatch" 1572 | self.mayReturnEmpty = True 1573 | self.mayIndexError = False 1574 | self.errmsg = "Unmatchable token" 1575 | 1576 | def parseImpl( self, instring, loc, doActions=True ): 1577 | raise ParseException(instring, loc, self.errmsg, self) 1578 | 1579 | 1580 | class Literal(Token): 1581 | """Token to exactly match a specified string.""" 1582 | def __init__( self, matchString ): 1583 | super(Literal,self).__init__() 1584 | self.match = matchString 1585 | self.matchLen = len(matchString) 1586 | try: 1587 | self.firstMatchChar = matchString[0] 1588 | except IndexError: 1589 | warnings.warn("null string passed to Literal; use Empty() instead", 1590 | SyntaxWarning, stacklevel=2) 1591 | self.__class__ = Empty 1592 | self.name = '"%s"' % _ustr(self.match) 1593 | self.errmsg = "Expected " + self.name 1594 | self.mayReturnEmpty = False 1595 | self.mayIndexError = False 1596 | 1597 | # Performance tuning: this routine gets called a *lot* 1598 | # if this is a single character match string and the first character matches, 1599 | # short-circuit as quickly as possible, and avoid calling startswith 1600 | #~ @profile 1601 | def parseImpl( self, instring, loc, doActions=True ): 1602 | if (instring[loc] == self.firstMatchChar and 1603 | (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1604 | return loc+self.matchLen, self.match 1605 | raise ParseException(instring, loc, self.errmsg, self) 1606 | _L = Literal 1607 | ParserElement.literalStringClass = Literal 1608 | 1609 | class Keyword(Token): 1610 | """Token to exactly match a specified string as a keyword, that is, it must be 1611 | immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1612 | Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1613 | Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1614 | Accepts two optional constructor arguments in addition to the keyword string: 1615 | C{identChars} is a string of characters that would be valid identifier characters, 1616 | defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1617 | matching, default is C{False}. 1618 | """ 1619 | DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1620 | 1621 | def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): 1622 | super(Keyword,self).__init__() 1623 | self.match = matchString 1624 | self.matchLen = len(matchString) 1625 | try: 1626 | self.firstMatchChar = matchString[0] 1627 | except IndexError: 1628 | warnings.warn("null string passed to Keyword; use Empty() instead", 1629 | SyntaxWarning, stacklevel=2) 1630 | self.name = '"%s"' % self.match 1631 | self.errmsg = "Expected " + self.name 1632 | self.mayReturnEmpty = False 1633 | self.mayIndexError = False 1634 | self.caseless = caseless 1635 | if caseless: 1636 | self.caselessmatch = matchString.upper() 1637 | identChars = identChars.upper() 1638 | self.identChars = set(identChars) 1639 | 1640 | def parseImpl( self, instring, loc, doActions=True ): 1641 | if self.caseless: 1642 | if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1643 | (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1644 | (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1645 | return loc+self.matchLen, self.match 1646 | else: 1647 | if (instring[loc] == self.firstMatchChar and 1648 | (self.matchLen==1 or instring.startswith(self.match,loc)) and 1649 | (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1650 | (loc == 0 or instring[loc-1] not in self.identChars) ): 1651 | return loc+self.matchLen, self.match 1652 | raise ParseException(instring, loc, self.errmsg, self) 1653 | 1654 | def copy(self): 1655 | c = super(Keyword,self).copy() 1656 | c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1657 | return c 1658 | 1659 | def setDefaultKeywordChars( chars ): 1660 | """Overrides the default Keyword chars 1661 | """ 1662 | Keyword.DEFAULT_KEYWORD_CHARS = chars 1663 | setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) 1664 | 1665 | class CaselessLiteral(Literal): 1666 | """Token to match a specified string, ignoring case of letters. 1667 | Note: the matched results will always be in the case of the given 1668 | match string, NOT the case of the input text. 1669 | """ 1670 | def __init__( self, matchString ): 1671 | super(CaselessLiteral,self).__init__( matchString.upper() ) 1672 | # Preserve the defining literal. 1673 | self.returnString = matchString 1674 | self.name = "'%s'" % self.returnString 1675 | self.errmsg = "Expected " + self.name 1676 | 1677 | def parseImpl( self, instring, loc, doActions=True ): 1678 | if instring[ loc:loc+self.matchLen ].upper() == self.match: 1679 | return loc+self.matchLen, self.returnString 1680 | raise ParseException(instring, loc, self.errmsg, self) 1681 | 1682 | class CaselessKeyword(Keyword): 1683 | def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): 1684 | super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) 1685 | 1686 | def parseImpl( self, instring, loc, doActions=True ): 1687 | if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1688 | (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1689 | return loc+self.matchLen, self.match 1690 | raise ParseException(instring, loc, self.errmsg, self) 1691 | 1692 | class Word(Token): 1693 | """Token for matching words composed of allowed character sets. 1694 | Defined with string containing all allowed initial characters, 1695 | an optional string containing allowed body characters (if omitted, 1696 | defaults to the initial character set), and an optional minimum, 1697 | maximum, and/or exact length. The default value for C{min} is 1 (a 1698 | minimum value < 1 is not valid); the default values for C{max} and C{exact} 1699 | are 0, meaning no maximum or exact length restriction. An optional 1700 | C{exclude} parameter can list characters that might be found in 1701 | the input C{bodyChars} string; useful to define a word of all printables 1702 | except for one or two characters, for instance. 1703 | """ 1704 | def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): 1705 | super(Word,self).__init__() 1706 | if excludeChars: 1707 | initChars = ''.join(c for c in initChars if c not in excludeChars) 1708 | if bodyChars: 1709 | bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1710 | self.initCharsOrig = initChars 1711 | self.initChars = set(initChars) 1712 | if bodyChars : 1713 | self.bodyCharsOrig = bodyChars 1714 | self.bodyChars = set(bodyChars) 1715 | else: 1716 | self.bodyCharsOrig = initChars 1717 | self.bodyChars = set(initChars) 1718 | 1719 | self.maxSpecified = max > 0 1720 | 1721 | if min < 1: 1722 | raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1723 | 1724 | self.minLen = min 1725 | 1726 | if max > 0: 1727 | self.maxLen = max 1728 | else: 1729 | self.maxLen = _MAX_INT 1730 | 1731 | if exact > 0: 1732 | self.maxLen = exact 1733 | self.minLen = exact 1734 | 1735 | self.name = _ustr(self) 1736 | self.errmsg = "Expected " + self.name 1737 | self.mayIndexError = False 1738 | self.asKeyword = asKeyword 1739 | 1740 | if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1741 | if self.bodyCharsOrig == self.initCharsOrig: 1742 | self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1743 | elif len(self.bodyCharsOrig) == 1: 1744 | self.reString = "%s[%s]*" % \ 1745 | (re.escape(self.initCharsOrig), 1746 | _escapeRegexRangeChars(self.bodyCharsOrig),) 1747 | else: 1748 | self.reString = "[%s][%s]*" % \ 1749 | (_escapeRegexRangeChars(self.initCharsOrig), 1750 | _escapeRegexRangeChars(self.bodyCharsOrig),) 1751 | if self.asKeyword: 1752 | self.reString = r"\b"+self.reString+r"\b" 1753 | try: 1754 | self.re = re.compile( self.reString ) 1755 | except: 1756 | self.re = None 1757 | 1758 | def parseImpl( self, instring, loc, doActions=True ): 1759 | if self.re: 1760 | result = self.re.match(instring,loc) 1761 | if not result: 1762 | raise ParseException(instring, loc, self.errmsg, self) 1763 | 1764 | loc = result.end() 1765 | return loc, result.group() 1766 | 1767 | if not(instring[ loc ] in self.initChars): 1768 | raise ParseException(instring, loc, self.errmsg, self) 1769 | 1770 | start = loc 1771 | loc += 1 1772 | instrlen = len(instring) 1773 | bodychars = self.bodyChars 1774 | maxloc = start + self.maxLen 1775 | maxloc = min( maxloc, instrlen ) 1776 | while loc < maxloc and instring[loc] in bodychars: 1777 | loc += 1 1778 | 1779 | throwException = False 1780 | if loc - start < self.minLen: 1781 | throwException = True 1782 | if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1783 | throwException = True 1784 | if self.asKeyword: 1785 | if (start>0 and instring[start-1] in bodychars) or (loc4: 1804 | return s[:4]+"..." 1805 | else: 1806 | return s 1807 | 1808 | if ( self.initCharsOrig != self.bodyCharsOrig ): 1809 | self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1810 | else: 1811 | self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1812 | 1813 | return self.strRepr 1814 | 1815 | 1816 | class Regex(Token): 1817 | """Token for matching strings that match a given regular expression. 1818 | Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1819 | """ 1820 | compiledREtype = type(re.compile("[A-Z]")) 1821 | def __init__( self, pattern, flags=0): 1822 | """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1823 | super(Regex,self).__init__() 1824 | 1825 | if isinstance(pattern, basestring): 1826 | if len(pattern) == 0: 1827 | warnings.warn("null string passed to Regex; use Empty() instead", 1828 | SyntaxWarning, stacklevel=2) 1829 | 1830 | self.pattern = pattern 1831 | self.flags = flags 1832 | 1833 | try: 1834 | self.re = re.compile(self.pattern, self.flags) 1835 | self.reString = self.pattern 1836 | except sre_constants.error: 1837 | warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1838 | SyntaxWarning, stacklevel=2) 1839 | raise 1840 | 1841 | elif isinstance(pattern, Regex.compiledREtype): 1842 | self.re = pattern 1843 | self.pattern = \ 1844 | self.reString = str(pattern) 1845 | self.flags = flags 1846 | 1847 | else: 1848 | raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1849 | 1850 | self.name = _ustr(self) 1851 | self.errmsg = "Expected " + self.name 1852 | self.mayIndexError = False 1853 | self.mayReturnEmpty = True 1854 | 1855 | def parseImpl( self, instring, loc, doActions=True ): 1856 | result = self.re.match(instring,loc) 1857 | if not result: 1858 | raise ParseException(instring, loc, self.errmsg, self) 1859 | 1860 | loc = result.end() 1861 | d = result.groupdict() 1862 | ret = ParseResults(result.group()) 1863 | if d: 1864 | for k in d: 1865 | ret[k] = d[k] 1866 | return loc,ret 1867 | 1868 | def __str__( self ): 1869 | try: 1870 | return super(Regex,self).__str__() 1871 | except: 1872 | pass 1873 | 1874 | if self.strRepr is None: 1875 | self.strRepr = "Re:(%s)" % repr(self.pattern) 1876 | 1877 | return self.strRepr 1878 | 1879 | 1880 | class QuotedString(Token): 1881 | """Token for matching strings that are delimited by quoting characters. 1882 | """ 1883 | def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): 1884 | """ 1885 | Defined with the following parameters: 1886 | - quoteChar - string of one or more characters defining the quote delimiting string 1887 | - escChar - character to escape quotes, typically backslash (default=None) 1888 | - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1889 | - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1890 | - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1891 | - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1892 | """ 1893 | super(QuotedString,self).__init__() 1894 | 1895 | # remove white space from quote chars - wont work anyway 1896 | quoteChar = quoteChar.strip() 1897 | if len(quoteChar) == 0: 1898 | warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1899 | raise SyntaxError() 1900 | 1901 | if endQuoteChar is None: 1902 | endQuoteChar = quoteChar 1903 | else: 1904 | endQuoteChar = endQuoteChar.strip() 1905 | if len(endQuoteChar) == 0: 1906 | warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1907 | raise SyntaxError() 1908 | 1909 | self.quoteChar = quoteChar 1910 | self.quoteCharLen = len(quoteChar) 1911 | self.firstQuoteChar = quoteChar[0] 1912 | self.endQuoteChar = endQuoteChar 1913 | self.endQuoteCharLen = len(endQuoteChar) 1914 | self.escChar = escChar 1915 | self.escQuote = escQuote 1916 | self.unquoteResults = unquoteResults 1917 | 1918 | if multiline: 1919 | self.flags = re.MULTILINE | re.DOTALL 1920 | self.pattern = r'%s(?:[^%s%s]' % \ 1921 | ( re.escape(self.quoteChar), 1922 | _escapeRegexRangeChars(self.endQuoteChar[0]), 1923 | (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1924 | else: 1925 | self.flags = 0 1926 | self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1927 | ( re.escape(self.quoteChar), 1928 | _escapeRegexRangeChars(self.endQuoteChar[0]), 1929 | (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1930 | if len(self.endQuoteChar) > 1: 1931 | self.pattern += ( 1932 | '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1933 | _escapeRegexRangeChars(self.endQuoteChar[i])) 1934 | for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 1935 | ) 1936 | if escQuote: 1937 | self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1938 | if escChar: 1939 | self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1940 | charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') 1941 | self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) 1942 | self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1943 | 1944 | try: 1945 | self.re = re.compile(self.pattern, self.flags) 1946 | self.reString = self.pattern 1947 | except sre_constants.error: 1948 | warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1949 | SyntaxWarning, stacklevel=2) 1950 | raise 1951 | 1952 | self.name = _ustr(self) 1953 | self.errmsg = "Expected " + self.name 1954 | self.mayIndexError = False 1955 | self.mayReturnEmpty = True 1956 | 1957 | def parseImpl( self, instring, loc, doActions=True ): 1958 | result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1959 | if not result: 1960 | raise ParseException(instring, loc, self.errmsg, self) 1961 | 1962 | loc = result.end() 1963 | ret = result.group() 1964 | 1965 | if self.unquoteResults: 1966 | 1967 | # strip off quotes 1968 | ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1969 | 1970 | if isinstance(ret,basestring): 1971 | # replace escaped characters 1972 | if self.escChar: 1973 | ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1974 | 1975 | # replace escaped quotes 1976 | if self.escQuote: 1977 | ret = ret.replace(self.escQuote, self.endQuoteChar) 1978 | 1979 | return loc, ret 1980 | 1981 | def __str__( self ): 1982 | try: 1983 | return super(QuotedString,self).__str__() 1984 | except: 1985 | pass 1986 | 1987 | if self.strRepr is None: 1988 | self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1989 | 1990 | return self.strRepr 1991 | 1992 | 1993 | class CharsNotIn(Token): 1994 | """Token for matching words composed of characters *not* in a given set. 1995 | Defined with string containing all disallowed characters, and an optional 1996 | minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 1997 | minimum value < 1 is not valid); the default values for C{max} and C{exact} 1998 | are 0, meaning no maximum or exact length restriction. 1999 | """ 2000 | def __init__( self, notChars, min=1, max=0, exact=0 ): 2001 | super(CharsNotIn,self).__init__() 2002 | self.skipWhitespace = False 2003 | self.notChars = notChars 2004 | 2005 | if min < 1: 2006 | raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2007 | 2008 | self.minLen = min 2009 | 2010 | if max > 0: 2011 | self.maxLen = max 2012 | else: 2013 | self.maxLen = _MAX_INT 2014 | 2015 | if exact > 0: 2016 | self.maxLen = exact 2017 | self.minLen = exact 2018 | 2019 | self.name = _ustr(self) 2020 | self.errmsg = "Expected " + self.name 2021 | self.mayReturnEmpty = ( self.minLen == 0 ) 2022 | self.mayIndexError = False 2023 | 2024 | def parseImpl( self, instring, loc, doActions=True ): 2025 | if instring[loc] in self.notChars: 2026 | raise ParseException(instring, loc, self.errmsg, self) 2027 | 2028 | start = loc 2029 | loc += 1 2030 | notchars = self.notChars 2031 | maxlen = min( start+self.maxLen, len(instring) ) 2032 | while loc < maxlen and \ 2033 | (instring[loc] not in notchars): 2034 | loc += 1 2035 | 2036 | if loc - start < self.minLen: 2037 | raise ParseException(instring, loc, self.errmsg, self) 2038 | 2039 | return loc, instring[start:loc] 2040 | 2041 | def __str__( self ): 2042 | try: 2043 | return super(CharsNotIn, self).__str__() 2044 | except: 2045 | pass 2046 | 2047 | if self.strRepr is None: 2048 | if len(self.notChars) > 4: 2049 | self.strRepr = "!W:(%s...)" % self.notChars[:4] 2050 | else: 2051 | self.strRepr = "!W:(%s)" % self.notChars 2052 | 2053 | return self.strRepr 2054 | 2055 | class White(Token): 2056 | """Special matching class for matching whitespace. Normally, whitespace is ignored 2057 | by pyparsing grammars. This class is included when some whitespace structures 2058 | are significant. Define with a string containing the whitespace characters to be 2059 | matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2060 | as defined for the C{L{Word}} class.""" 2061 | whiteStrs = { 2062 | " " : "", 2063 | "\t": "", 2064 | "\n": "", 2065 | "\r": "", 2066 | "\f": "", 2067 | } 2068 | def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 2069 | super(White,self).__init__() 2070 | self.matchWhite = ws 2071 | self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2072 | #~ self.leaveWhitespace() 2073 | self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2074 | self.mayReturnEmpty = True 2075 | self.errmsg = "Expected " + self.name 2076 | 2077 | self.minLen = min 2078 | 2079 | if max > 0: 2080 | self.maxLen = max 2081 | else: 2082 | self.maxLen = _MAX_INT 2083 | 2084 | if exact > 0: 2085 | self.maxLen = exact 2086 | self.minLen = exact 2087 | 2088 | def parseImpl( self, instring, loc, doActions=True ): 2089 | if not(instring[ loc ] in self.matchWhite): 2090 | raise ParseException(instring, loc, self.errmsg, self) 2091 | start = loc 2092 | loc += 1 2093 | maxloc = start + self.maxLen 2094 | maxloc = min( maxloc, len(instring) ) 2095 | while loc < maxloc and instring[loc] in self.matchWhite: 2096 | loc += 1 2097 | 2098 | if loc - start < self.minLen: 2099 | raise ParseException(instring, loc, self.errmsg, self) 2100 | 2101 | return loc, instring[start:loc] 2102 | 2103 | 2104 | class _PositionToken(Token): 2105 | def __init__( self ): 2106 | super(_PositionToken,self).__init__() 2107 | self.name=self.__class__.__name__ 2108 | self.mayReturnEmpty = True 2109 | self.mayIndexError = False 2110 | 2111 | class GoToColumn(_PositionToken): 2112 | """Token to advance to a specific column of input text; useful for tabular report scraping.""" 2113 | def __init__( self, colno ): 2114 | super(GoToColumn,self).__init__() 2115 | self.col = colno 2116 | 2117 | def preParse( self, instring, loc ): 2118 | if col(loc,instring) != self.col: 2119 | instrlen = len(instring) 2120 | if self.ignoreExprs: 2121 | loc = self._skipIgnorables( instring, loc ) 2122 | while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2123 | loc += 1 2124 | return loc 2125 | 2126 | def parseImpl( self, instring, loc, doActions=True ): 2127 | thiscol = col( loc, instring ) 2128 | if thiscol > self.col: 2129 | raise ParseException( instring, loc, "Text not in expected column", self ) 2130 | newloc = loc + self.col - thiscol 2131 | ret = instring[ loc: newloc ] 2132 | return newloc, ret 2133 | 2134 | class LineStart(_PositionToken): 2135 | """Matches if current position is at the beginning of a line within the parse string""" 2136 | def __init__( self ): 2137 | super(LineStart,self).__init__() 2138 | self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2139 | self.errmsg = "Expected start of line" 2140 | 2141 | def preParse( self, instring, loc ): 2142 | preloc = super(LineStart,self).preParse(instring,loc) 2143 | if instring[preloc] == "\n": 2144 | loc += 1 2145 | return loc 2146 | 2147 | def parseImpl( self, instring, loc, doActions=True ): 2148 | if not( loc==0 or 2149 | (loc == self.preParse( instring, 0 )) or 2150 | (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2151 | raise ParseException(instring, loc, self.errmsg, self) 2152 | return loc, [] 2153 | 2154 | class LineEnd(_PositionToken): 2155 | """Matches if current position is at the end of a line within the parse string""" 2156 | def __init__( self ): 2157 | super(LineEnd,self).__init__() 2158 | self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2159 | self.errmsg = "Expected end of line" 2160 | 2161 | def parseImpl( self, instring, loc, doActions=True ): 2162 | if loc len(instring): 2197 | return loc, [] 2198 | else: 2199 | raise ParseException(instring, loc, self.errmsg, self) 2200 | 2201 | class WordStart(_PositionToken): 2202 | """Matches if the current position is at the beginning of a Word, and 2203 | is not preceded by any character in a given set of C{wordChars} 2204 | (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2205 | use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2206 | the string being parsed, or at the beginning of a line. 2207 | """ 2208 | def __init__(self, wordChars = printables): 2209 | super(WordStart,self).__init__() 2210 | self.wordChars = set(wordChars) 2211 | self.errmsg = "Not at the start of a word" 2212 | 2213 | def parseImpl(self, instring, loc, doActions=True ): 2214 | if loc != 0: 2215 | if (instring[loc-1] in self.wordChars or 2216 | instring[loc] not in self.wordChars): 2217 | raise ParseException(instring, loc, self.errmsg, self) 2218 | return loc, [] 2219 | 2220 | class WordEnd(_PositionToken): 2221 | """Matches if the current position is at the end of a Word, and 2222 | is not followed by any character in a given set of C{wordChars} 2223 | (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2224 | use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2225 | the string being parsed, or at the end of a line. 2226 | """ 2227 | def __init__(self, wordChars = printables): 2228 | super(WordEnd,self).__init__() 2229 | self.wordChars = set(wordChars) 2230 | self.skipWhitespace = False 2231 | self.errmsg = "Not at the end of a word" 2232 | 2233 | def parseImpl(self, instring, loc, doActions=True ): 2234 | instrlen = len(instring) 2235 | if instrlen>0 and loc maxExcLoc: 2436 | maxException = err 2437 | maxExcLoc = err.loc 2438 | except IndexError: 2439 | if len(instring) > maxExcLoc: 2440 | maxException = ParseException(instring,len(instring),e.errmsg,self) 2441 | maxExcLoc = len(instring) 2442 | else: 2443 | if loc2 > maxMatchLoc: 2444 | maxMatchLoc = loc2 2445 | maxMatchExp = e 2446 | 2447 | if maxMatchLoc < 0: 2448 | if maxException is not None: 2449 | raise maxException 2450 | else: 2451 | raise ParseException(instring, loc, "no defined alternatives to match", self) 2452 | 2453 | return maxMatchExp._parse( instring, loc, doActions ) 2454 | 2455 | def __ixor__(self, other ): 2456 | if isinstance( other, basestring ): 2457 | other = ParserElement.literalStringClass( other ) 2458 | return self.append( other ) #Or( [ self, other ] ) 2459 | 2460 | def __str__( self ): 2461 | if hasattr(self,"name"): 2462 | return self.name 2463 | 2464 | if self.strRepr is None: 2465 | self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2466 | 2467 | return self.strRepr 2468 | 2469 | def checkRecursion( self, parseElementList ): 2470 | subRecCheckList = parseElementList[:] + [ self ] 2471 | for e in self.exprs: 2472 | e.checkRecursion( subRecCheckList ) 2473 | 2474 | 2475 | class MatchFirst(ParseExpression): 2476 | """Requires that at least one C{ParseExpression} is found. 2477 | If two expressions match, the first one listed is the one that will match. 2478 | May be constructed using the C{'|'} operator. 2479 | """ 2480 | def __init__( self, exprs, savelist = False ): 2481 | super(MatchFirst,self).__init__(exprs, savelist) 2482 | if exprs: 2483 | self.mayReturnEmpty = False 2484 | for e in self.exprs: 2485 | if e.mayReturnEmpty: 2486 | self.mayReturnEmpty = True 2487 | break 2488 | else: 2489 | self.mayReturnEmpty = True 2490 | 2491 | def parseImpl( self, instring, loc, doActions=True ): 2492 | maxExcLoc = -1 2493 | maxException = None 2494 | for e in self.exprs: 2495 | try: 2496 | ret = e._parse( instring, loc, doActions ) 2497 | return ret 2498 | except ParseException as err: 2499 | if err.loc > maxExcLoc: 2500 | maxException = err 2501 | maxExcLoc = err.loc 2502 | except IndexError: 2503 | if len(instring) > maxExcLoc: 2504 | maxException = ParseException(instring,len(instring),e.errmsg,self) 2505 | maxExcLoc = len(instring) 2506 | 2507 | # only got here if no expression matched, raise exception for match that made it the furthest 2508 | else: 2509 | if maxException is not None: 2510 | raise maxException 2511 | else: 2512 | raise ParseException(instring, loc, "no defined alternatives to match", self) 2513 | 2514 | def __ior__(self, other ): 2515 | if isinstance( other, basestring ): 2516 | other = ParserElement.literalStringClass( other ) 2517 | return self.append( other ) #MatchFirst( [ self, other ] ) 2518 | 2519 | def __str__( self ): 2520 | if hasattr(self,"name"): 2521 | return self.name 2522 | 2523 | if self.strRepr is None: 2524 | self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2525 | 2526 | return self.strRepr 2527 | 2528 | def checkRecursion( self, parseElementList ): 2529 | subRecCheckList = parseElementList[:] + [ self ] 2530 | for e in self.exprs: 2531 | e.checkRecursion( subRecCheckList ) 2532 | 2533 | 2534 | class Each(ParseExpression): 2535 | """Requires all given C{ParseExpression}s to be found, but in any order. 2536 | Expressions may be separated by whitespace. 2537 | May be constructed using the C{'&'} operator. 2538 | """ 2539 | def __init__( self, exprs, savelist = True ): 2540 | super(Each,self).__init__(exprs, savelist) 2541 | self.mayReturnEmpty = True 2542 | for e in self.exprs: 2543 | if not e.mayReturnEmpty: 2544 | self.mayReturnEmpty = False 2545 | break 2546 | self.skipWhitespace = True 2547 | self.initExprGroups = True 2548 | 2549 | def parseImpl( self, instring, loc, doActions=True ): 2550 | if self.initExprGroups: 2551 | opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2552 | opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 2553 | self.optionals = opt1 + opt2 2554 | self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2555 | self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2556 | self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2557 | self.required += self.multirequired 2558 | self.initExprGroups = False 2559 | tmpLoc = loc 2560 | tmpReqd = self.required[:] 2561 | tmpOpt = self.optionals[:] 2562 | matchOrder = [] 2563 | 2564 | keepMatching = True 2565 | while keepMatching: 2566 | tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2567 | failed = [] 2568 | for e in tmpExprs: 2569 | try: 2570 | tmpLoc = e.tryParse( instring, tmpLoc ) 2571 | except ParseException: 2572 | failed.append(e) 2573 | else: 2574 | matchOrder.append(e) 2575 | if e in tmpReqd: 2576 | tmpReqd.remove(e) 2577 | elif e in tmpOpt: 2578 | tmpOpt.remove(e) 2579 | if len(failed) == len(tmpExprs): 2580 | keepMatching = False 2581 | 2582 | if tmpReqd: 2583 | missing = ", ".join(_ustr(e) for e in tmpReqd) 2584 | raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2585 | 2586 | # add any unmatched Optionals, in case they have default values defined 2587 | matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2588 | 2589 | resultlist = [] 2590 | for e in matchOrder: 2591 | loc,results = e._parse(instring,loc,doActions) 2592 | resultlist.append(results) 2593 | 2594 | finalResults = ParseResults([]) 2595 | for r in resultlist: 2596 | dups = {} 2597 | for k in r.keys(): 2598 | if k in finalResults.keys(): 2599 | tmp = ParseResults(finalResults[k]) 2600 | tmp += ParseResults(r[k]) 2601 | dups[k] = tmp 2602 | finalResults += ParseResults(r) 2603 | for k,v in dups.items(): 2604 | finalResults[k] = v 2605 | return loc, finalResults 2606 | 2607 | def __str__( self ): 2608 | if hasattr(self,"name"): 2609 | return self.name 2610 | 2611 | if self.strRepr is None: 2612 | self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2613 | 2614 | return self.strRepr 2615 | 2616 | def checkRecursion( self, parseElementList ): 2617 | subRecCheckList = parseElementList[:] + [ self ] 2618 | for e in self.exprs: 2619 | e.checkRecursion( subRecCheckList ) 2620 | 2621 | 2622 | class ParseElementEnhance(ParserElement): 2623 | """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" 2624 | def __init__( self, expr, savelist=False ): 2625 | super(ParseElementEnhance,self).__init__(savelist) 2626 | if isinstance( expr, basestring ): 2627 | expr = Literal(expr) 2628 | self.expr = expr 2629 | self.strRepr = None 2630 | if expr is not None: 2631 | self.mayIndexError = expr.mayIndexError 2632 | self.mayReturnEmpty = expr.mayReturnEmpty 2633 | self.setWhitespaceChars( expr.whiteChars ) 2634 | self.skipWhitespace = expr.skipWhitespace 2635 | self.saveAsList = expr.saveAsList 2636 | self.callPreparse = expr.callPreparse 2637 | self.ignoreExprs.extend(expr.ignoreExprs) 2638 | 2639 | def parseImpl( self, instring, loc, doActions=True ): 2640 | if self.expr is not None: 2641 | return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2642 | else: 2643 | raise ParseException("",loc,self.errmsg,self) 2644 | 2645 | def leaveWhitespace( self ): 2646 | self.skipWhitespace = False 2647 | self.expr = self.expr.copy() 2648 | if self.expr is not None: 2649 | self.expr.leaveWhitespace() 2650 | return self 2651 | 2652 | def ignore( self, other ): 2653 | if isinstance( other, Suppress ): 2654 | if other not in self.ignoreExprs: 2655 | super( ParseElementEnhance, self).ignore( other ) 2656 | if self.expr is not None: 2657 | self.expr.ignore( self.ignoreExprs[-1] ) 2658 | else: 2659 | super( ParseElementEnhance, self).ignore( other ) 2660 | if self.expr is not None: 2661 | self.expr.ignore( self.ignoreExprs[-1] ) 2662 | return self 2663 | 2664 | def streamline( self ): 2665 | super(ParseElementEnhance,self).streamline() 2666 | if self.expr is not None: 2667 | self.expr.streamline() 2668 | return self 2669 | 2670 | def checkRecursion( self, parseElementList ): 2671 | if self in parseElementList: 2672 | raise RecursiveGrammarException( parseElementList+[self] ) 2673 | subRecCheckList = parseElementList[:] + [ self ] 2674 | if self.expr is not None: 2675 | self.expr.checkRecursion( subRecCheckList ) 2676 | 2677 | def validate( self, validateTrace=[] ): 2678 | tmp = validateTrace[:]+[self] 2679 | if self.expr is not None: 2680 | self.expr.validate(tmp) 2681 | self.checkRecursion( [] ) 2682 | 2683 | def __str__( self ): 2684 | try: 2685 | return super(ParseElementEnhance,self).__str__() 2686 | except: 2687 | pass 2688 | 2689 | if self.strRepr is None and self.expr is not None: 2690 | self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2691 | return self.strRepr 2692 | 2693 | 2694 | class FollowedBy(ParseElementEnhance): 2695 | """Lookahead matching of the given parse expression. C{FollowedBy} 2696 | does *not* advance the parsing position within the input string, it only 2697 | verifies that the specified parse expression matches at the current 2698 | position. C{FollowedBy} always returns a null token list.""" 2699 | def __init__( self, expr ): 2700 | super(FollowedBy,self).__init__(expr) 2701 | self.mayReturnEmpty = True 2702 | 2703 | def parseImpl( self, instring, loc, doActions=True ): 2704 | self.expr.tryParse( instring, loc ) 2705 | return loc, [] 2706 | 2707 | 2708 | class NotAny(ParseElementEnhance): 2709 | """Lookahead to disallow matching with the given parse expression. C{NotAny} 2710 | does *not* advance the parsing position within the input string, it only 2711 | verifies that the specified parse expression does *not* match at the current 2712 | position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2713 | always returns a null token list. May be constructed using the '~' operator.""" 2714 | def __init__( self, expr ): 2715 | super(NotAny,self).__init__(expr) 2716 | #~ self.leaveWhitespace() 2717 | self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2718 | self.mayReturnEmpty = True 2719 | self.errmsg = "Found unwanted token, "+_ustr(self.expr) 2720 | 2721 | def parseImpl( self, instring, loc, doActions=True ): 2722 | try: 2723 | self.expr.tryParse( instring, loc ) 2724 | except (ParseException,IndexError): 2725 | pass 2726 | else: 2727 | raise ParseException(instring, loc, self.errmsg, self) 2728 | return loc, [] 2729 | 2730 | def __str__( self ): 2731 | if hasattr(self,"name"): 2732 | return self.name 2733 | 2734 | if self.strRepr is None: 2735 | self.strRepr = "~{" + _ustr(self.expr) + "}" 2736 | 2737 | return self.strRepr 2738 | 2739 | 2740 | class ZeroOrMore(ParseElementEnhance): 2741 | """Optional repetition of zero or more of the given expression.""" 2742 | def __init__( self, expr ): 2743 | super(ZeroOrMore,self).__init__(expr) 2744 | self.mayReturnEmpty = True 2745 | 2746 | def parseImpl( self, instring, loc, doActions=True ): 2747 | tokens = [] 2748 | try: 2749 | loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2750 | hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2751 | while 1: 2752 | if hasIgnoreExprs: 2753 | preloc = self._skipIgnorables( instring, loc ) 2754 | else: 2755 | preloc = loc 2756 | loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2757 | if tmptokens or tmptokens.keys(): 2758 | tokens += tmptokens 2759 | except (ParseException,IndexError): 2760 | pass 2761 | 2762 | return loc, tokens 2763 | 2764 | def __str__( self ): 2765 | if hasattr(self,"name"): 2766 | return self.name 2767 | 2768 | if self.strRepr is None: 2769 | self.strRepr = "[" + _ustr(self.expr) + "]..." 2770 | 2771 | return self.strRepr 2772 | 2773 | def setResultsName( self, name, listAllMatches=False ): 2774 | ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2775 | ret.saveAsList = True 2776 | return ret 2777 | 2778 | 2779 | class OneOrMore(ParseElementEnhance): 2780 | """Repetition of one or more of the given expression.""" 2781 | def parseImpl( self, instring, loc, doActions=True ): 2782 | # must be at least one 2783 | loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2784 | try: 2785 | hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2786 | while 1: 2787 | if hasIgnoreExprs: 2788 | preloc = self._skipIgnorables( instring, loc ) 2789 | else: 2790 | preloc = loc 2791 | loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2792 | if tmptokens or tmptokens.keys(): 2793 | tokens += tmptokens 2794 | except (ParseException,IndexError): 2795 | pass 2796 | 2797 | return loc, tokens 2798 | 2799 | def __str__( self ): 2800 | if hasattr(self,"name"): 2801 | return self.name 2802 | 2803 | if self.strRepr is None: 2804 | self.strRepr = "{" + _ustr(self.expr) + "}..." 2805 | 2806 | return self.strRepr 2807 | 2808 | def setResultsName( self, name, listAllMatches=False ): 2809 | ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2810 | ret.saveAsList = True 2811 | return ret 2812 | 2813 | class _NullToken(object): 2814 | def __bool__(self): 2815 | return False 2816 | __nonzero__ = __bool__ 2817 | def __str__(self): 2818 | return "" 2819 | 2820 | _optionalNotMatched = _NullToken() 2821 | class Optional(ParseElementEnhance): 2822 | """Optional matching of the given expression. 2823 | A default return string can also be specified, if the optional expression 2824 | is not found. 2825 | """ 2826 | def __init__( self, exprs, default=_optionalNotMatched ): 2827 | super(Optional,self).__init__( exprs, savelist=False ) 2828 | self.defaultValue = default 2829 | self.mayReturnEmpty = True 2830 | 2831 | def parseImpl( self, instring, loc, doActions=True ): 2832 | try: 2833 | loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2834 | except (ParseException,IndexError): 2835 | if self.defaultValue is not _optionalNotMatched: 2836 | if self.expr.resultsName: 2837 | tokens = ParseResults([ self.defaultValue ]) 2838 | tokens[self.expr.resultsName] = self.defaultValue 2839 | else: 2840 | tokens = [ self.defaultValue ] 2841 | else: 2842 | tokens = [] 2843 | return loc, tokens 2844 | 2845 | def __str__( self ): 2846 | if hasattr(self,"name"): 2847 | return self.name 2848 | 2849 | if self.strRepr is None: 2850 | self.strRepr = "[" + _ustr(self.expr) + "]" 2851 | 2852 | return self.strRepr 2853 | 2854 | 2855 | class SkipTo(ParseElementEnhance): 2856 | """Token for skipping over all undefined text until the matched expression is found. 2857 | If C{include} is set to true, the matched expression is also parsed (the skipped text 2858 | and matched expression are returned as a 2-element list). The C{ignore} 2859 | argument is used to define grammars (typically quoted strings and comments) that 2860 | might contain false matches. 2861 | """ 2862 | def __init__( self, other, include=False, ignore=None, failOn=None ): 2863 | super( SkipTo, self ).__init__( other ) 2864 | self.ignoreExpr = ignore 2865 | self.mayReturnEmpty = True 2866 | self.mayIndexError = False 2867 | self.includeMatch = include 2868 | self.asList = False 2869 | if failOn is not None and isinstance(failOn, basestring): 2870 | self.failOn = Literal(failOn) 2871 | else: 2872 | self.failOn = failOn 2873 | self.errmsg = "No match found for "+_ustr(self.expr) 2874 | 2875 | def parseImpl( self, instring, loc, doActions=True ): 2876 | startLoc = loc 2877 | instrlen = len(instring) 2878 | expr = self.expr 2879 | failParse = False 2880 | while loc <= instrlen: 2881 | try: 2882 | if self.failOn: 2883 | try: 2884 | self.failOn.tryParse(instring, loc) 2885 | except ParseBaseException: 2886 | pass 2887 | else: 2888 | failParse = True 2889 | raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2890 | failParse = False 2891 | if self.ignoreExpr is not None: 2892 | while 1: 2893 | try: 2894 | loc = self.ignoreExpr.tryParse(instring,loc) 2895 | # print("found ignoreExpr, advance to", loc) 2896 | except ParseBaseException: 2897 | break 2898 | expr._parse( instring, loc, doActions=False, callPreParse=False ) 2899 | skipText = instring[startLoc:loc] 2900 | if self.includeMatch: 2901 | loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2902 | if mat: 2903 | skipRes = ParseResults( skipText ) 2904 | skipRes += mat 2905 | return loc, [ skipRes ] 2906 | else: 2907 | return loc, [ skipText ] 2908 | else: 2909 | return loc, [ skipText ] 2910 | except (ParseException,IndexError): 2911 | if failParse: 2912 | raise 2913 | else: 2914 | loc += 1 2915 | raise ParseException(instring, loc, self.errmsg, self) 2916 | 2917 | class Forward(ParseElementEnhance): 2918 | """Forward declaration of an expression to be defined later - 2919 | used for recursive grammars, such as algebraic infix notation. 2920 | When the expression is known, it is assigned to the C{Forward} variable using the '<<=' operator. 2921 | 2922 | Note: take care when assigning to C{Forward} not to overlook precedence of operators. 2923 | Specifically, '|' has a lower precedence than '<<=', so that:: 2924 | fwdExpr <<= a | b | c 2925 | will actually be evaluated as:: 2926 | (fwdExpr <<= a) | b | c 2927 | thereby leaving b and c out as parseable alternatives. It is recommended that you 2928 | explicitly group the values inserted into the C{Forward}:: 2929 | fwdExpr <<= (a | b | c) 2930 | Converting to use the '<<=' operator instead will avoid this problem. 2931 | """ 2932 | def __init__( self, other=None ): 2933 | super(Forward,self).__init__( other, savelist=False ) 2934 | 2935 | def __ilshift__( self, other ): 2936 | if isinstance( other, basestring ): 2937 | other = ParserElement.literalStringClass(other) 2938 | self.expr = other 2939 | self.mayReturnEmpty = other.mayReturnEmpty 2940 | self.strRepr = None 2941 | self.mayIndexError = self.expr.mayIndexError 2942 | self.mayReturnEmpty = self.expr.mayReturnEmpty 2943 | self.setWhitespaceChars( self.expr.whiteChars ) 2944 | self.skipWhitespace = self.expr.skipWhitespace 2945 | self.saveAsList = self.expr.saveAsList 2946 | self.ignoreExprs.extend(self.expr.ignoreExprs) 2947 | return self 2948 | 2949 | def __lshift__(self, other): 2950 | warnings.warn("Operator '<<=' is deprecated, use '<<=' instead", 2951 | DeprecationWarning,stacklevel=2) 2952 | self <<= other 2953 | return None 2954 | 2955 | def leaveWhitespace( self ): 2956 | self.skipWhitespace = False 2957 | return self 2958 | 2959 | def streamline( self ): 2960 | if not self.streamlined: 2961 | self.streamlined = True 2962 | if self.expr is not None: 2963 | self.expr.streamline() 2964 | return self 2965 | 2966 | def validate( self, validateTrace=[] ): 2967 | if self not in validateTrace: 2968 | tmp = validateTrace[:]+[self] 2969 | if self.expr is not None: 2970 | self.expr.validate(tmp) 2971 | self.checkRecursion([]) 2972 | 2973 | def __str__( self ): 2974 | if hasattr(self,"name"): 2975 | return self.name 2976 | 2977 | self._revertClass = self.__class__ 2978 | self.__class__ = _ForwardNoRecurse 2979 | try: 2980 | if self.expr is not None: 2981 | retString = _ustr(self.expr) 2982 | else: 2983 | retString = "None" 2984 | finally: 2985 | self.__class__ = self._revertClass 2986 | return self.__class__.__name__ + ": " + retString 2987 | 2988 | def copy(self): 2989 | if self.expr is not None: 2990 | return super(Forward,self).copy() 2991 | else: 2992 | ret = Forward() 2993 | ret <<= self 2994 | return ret 2995 | 2996 | class _ForwardNoRecurse(Forward): 2997 | def __str__( self ): 2998 | return "..." 2999 | 3000 | class TokenConverter(ParseElementEnhance): 3001 | """Abstract subclass of C{ParseExpression}, for converting parsed results.""" 3002 | def __init__( self, expr, savelist=False ): 3003 | super(TokenConverter,self).__init__( expr )#, savelist ) 3004 | self.saveAsList = False 3005 | 3006 | class Upcase(TokenConverter): 3007 | """Converter to upper case all matching tokens.""" 3008 | def __init__(self, *args): 3009 | super(Upcase,self).__init__(*args) 3010 | warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 3011 | DeprecationWarning,stacklevel=2) 3012 | 3013 | def postParse( self, instring, loc, tokenlist ): 3014 | return list(map( str.upper, tokenlist )) 3015 | 3016 | 3017 | class Combine(TokenConverter): 3018 | """Converter to concatenate all matching tokens to a single string. 3019 | By default, the matching patterns must also be contiguous in the input string; 3020 | this can be disabled by specifying C{'adjacent=False'} in the constructor. 3021 | """ 3022 | def __init__( self, expr, joinString="", adjacent=True ): 3023 | super(Combine,self).__init__( expr ) 3024 | # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3025 | if adjacent: 3026 | self.leaveWhitespace() 3027 | self.adjacent = adjacent 3028 | self.skipWhitespace = True 3029 | self.joinString = joinString 3030 | self.callPreparse = True 3031 | 3032 | def ignore( self, other ): 3033 | if self.adjacent: 3034 | ParserElement.ignore(self, other) 3035 | else: 3036 | super( Combine, self).ignore( other ) 3037 | return self 3038 | 3039 | def postParse( self, instring, loc, tokenlist ): 3040 | retToks = tokenlist.copy() 3041 | del retToks[:] 3042 | retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3043 | 3044 | if self.resultsName and len(retToks.keys())>0: 3045 | return [ retToks ] 3046 | else: 3047 | return retToks 3048 | 3049 | class Group(TokenConverter): 3050 | """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.""" 3051 | def __init__( self, expr ): 3052 | super(Group,self).__init__( expr ) 3053 | self.saveAsList = True 3054 | 3055 | def postParse( self, instring, loc, tokenlist ): 3056 | return [ tokenlist ] 3057 | 3058 | class Dict(TokenConverter): 3059 | """Converter to return a repetitive expression as a list, but also as a dictionary. 3060 | Each element can also be referenced using the first token in the expression as its key. 3061 | Useful for tabular report scraping when the first column can be used as a item key. 3062 | """ 3063 | def __init__( self, exprs ): 3064 | super(Dict,self).__init__( exprs ) 3065 | self.saveAsList = True 3066 | 3067 | def postParse( self, instring, loc, tokenlist ): 3068 | for i,tok in enumerate(tokenlist): 3069 | if len(tok) == 0: 3070 | continue 3071 | ikey = tok[0] 3072 | if isinstance(ikey,int): 3073 | ikey = _ustr(tok[0]).strip() 3074 | if len(tok)==1: 3075 | tokenlist[ikey] = _ParseResultsWithOffset("",i) 3076 | elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3077 | tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3078 | else: 3079 | dictvalue = tok.copy() #ParseResults(i) 3080 | del dictvalue[0] 3081 | if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 3082 | tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3083 | else: 3084 | tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3085 | 3086 | if self.resultsName: 3087 | return [ tokenlist ] 3088 | else: 3089 | return tokenlist 3090 | 3091 | 3092 | class Suppress(TokenConverter): 3093 | """Converter for ignoring the results of a parsed expression.""" 3094 | def postParse( self, instring, loc, tokenlist ): 3095 | return [] 3096 | 3097 | def suppress( self ): 3098 | return self 3099 | 3100 | 3101 | class OnlyOnce(object): 3102 | """Wrapper for parse actions, to ensure they are only called once.""" 3103 | def __init__(self, methodCall): 3104 | self.callable = _trim_arity(methodCall) 3105 | self.called = False 3106 | def __call__(self,s,l,t): 3107 | if not self.called: 3108 | results = self.callable(s,l,t) 3109 | self.called = True 3110 | return results 3111 | raise ParseException(s,l,"") 3112 | def reset(self): 3113 | self.called = False 3114 | 3115 | def traceParseAction(f): 3116 | """Decorator for debugging parse actions.""" 3117 | f = _trim_arity(f) 3118 | def z(*paArgs): 3119 | thisFunc = f.func_name 3120 | s,l,t = paArgs[-3:] 3121 | if len(paArgs)>3: 3122 | thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3123 | sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3124 | try: 3125 | ret = f(*paArgs) 3126 | except Exception as exc: 3127 | sys.stderr.write( "<<=leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3128 | raise 3129 | sys.stderr.write( "<<=leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3130 | return ret 3131 | try: 3132 | z.__name__ = f.__name__ 3133 | except AttributeError: 3134 | pass 3135 | return z 3136 | 3137 | # 3138 | # global helpers 3139 | # 3140 | def delimitedList( expr, delim=",", combine=False ): 3141 | """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3142 | By default, the list elements and delimiters can have intervening whitespace, and 3143 | comments, but this can be overridden by passing C{combine=True} in the constructor. 3144 | If C{combine} is set to C{True}, the matching tokens are returned as a single token 3145 | string, with the delimiters included; otherwise, the matching tokens are returned 3146 | as a list of tokens, with the delimiters suppressed. 3147 | """ 3148 | dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3149 | if combine: 3150 | return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3151 | else: 3152 | return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 3153 | 3154 | def countedArray( expr, intExpr=None ): 3155 | """Helper to define a counted list of expressions. 3156 | This helper defines a pattern of the form:: 3157 | integer expr expr expr... 3158 | where the leading integer tells how many expr expressions follow. 3159 | The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3160 | """ 3161 | arrayExpr = Forward() 3162 | def countFieldParseAction(s,l,t): 3163 | n = t[0] 3164 | arrayExpr <<= (n and Group(And([expr]*n)) or Group(empty)) 3165 | return [] 3166 | if intExpr is None: 3167 | intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3168 | else: 3169 | intExpr = intExpr.copy() 3170 | intExpr.setName("arrayLen") 3171 | intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3172 | return ( intExpr + arrayExpr ) 3173 | 3174 | def _flatten(L): 3175 | ret = [] 3176 | for i in L: 3177 | if isinstance(i,list): 3178 | ret.extend(_flatten(i)) 3179 | else: 3180 | ret.append(i) 3181 | return ret 3182 | 3183 | def matchPreviousLiteral(expr): 3184 | """Helper to define an expression that is indirectly defined from 3185 | the tokens matched in a previous expression, that is, it looks 3186 | for a 'repeat' of a previous expression. For example:: 3187 | first = Word(nums) 3188 | second = matchPreviousLiteral(first) 3189 | matchExpr = first + ":" + second 3190 | will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3191 | previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3192 | If this is not desired, use C{matchPreviousExpr}. 3193 | Do *not* use with packrat parsing enabled. 3194 | """ 3195 | rep = Forward() 3196 | def copyTokenToRepeater(s,l,t): 3197 | if t: 3198 | if len(t) == 1: 3199 | rep <<= t[0] 3200 | else: 3201 | # flatten t tokens 3202 | tflat = _flatten(t.asList()) 3203 | rep <<= And( [ Literal(tt) for tt in tflat ] ) 3204 | else: 3205 | rep <<= Empty() 3206 | expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3207 | return rep 3208 | 3209 | def matchPreviousExpr(expr): 3210 | """Helper to define an expression that is indirectly defined from 3211 | the tokens matched in a previous expression, that is, it looks 3212 | for a 'repeat' of a previous expression. For example:: 3213 | first = Word(nums) 3214 | second = matchPreviousExpr(first) 3215 | matchExpr = first + ":" + second 3216 | will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3217 | expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3218 | the expressions are evaluated first, and then compared, so 3219 | C{"1"} is compared with C{"10"}. 3220 | Do *not* use with packrat parsing enabled. 3221 | """ 3222 | rep = Forward() 3223 | e2 = expr.copy() 3224 | rep <<= e2 3225 | def copyTokenToRepeater(s,l,t): 3226 | matchTokens = _flatten(t.asList()) 3227 | def mustMatchTheseTokens(s,l,t): 3228 | theseTokens = _flatten(t.asList()) 3229 | if theseTokens != matchTokens: 3230 | raise ParseException("",0,"") 3231 | rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3232 | expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3233 | return rep 3234 | 3235 | def _escapeRegexRangeChars(s): 3236 | #~ escape these chars: ^-] 3237 | for c in r"\^-]": 3238 | s = s.replace(c,_bslash+c) 3239 | s = s.replace("\n",r"\n") 3240 | s = s.replace("\t",r"\t") 3241 | return _ustr(s) 3242 | 3243 | def oneOf( strs, caseless=False, useRegex=True ): 3244 | """Helper to quickly define a set of alternative Literals, and makes sure to do 3245 | longest-first testing when there is a conflict, regardless of the input order, 3246 | but returns a C{L{MatchFirst}} for best performance. 3247 | 3248 | Parameters: 3249 | - strs - a string of space-delimited literals, or a list of string literals 3250 | - caseless - (default=False) - treat all literals as caseless 3251 | - useRegex - (default=True) - as an optimization, will generate a Regex 3252 | object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3253 | if creating a C{Regex} raises an exception) 3254 | """ 3255 | if caseless: 3256 | isequal = ( lambda a,b: a.upper() == b.upper() ) 3257 | masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3258 | parseElementClass = CaselessLiteral 3259 | else: 3260 | isequal = ( lambda a,b: a == b ) 3261 | masks = ( lambda a,b: b.startswith(a) ) 3262 | parseElementClass = Literal 3263 | 3264 | if isinstance(strs,(list,tuple)): 3265 | symbols = list(strs[:]) 3266 | elif isinstance(strs,basestring): 3267 | symbols = strs.split() 3268 | else: 3269 | warnings.warn("Invalid argument to oneOf, expected string or list", 3270 | SyntaxWarning, stacklevel=2) 3271 | 3272 | i = 0 3273 | while i < len(symbols)-1: 3274 | cur = symbols[i] 3275 | for j,other in enumerate(symbols[i+1:]): 3276 | if ( isequal(other, cur) ): 3277 | del symbols[i+j+1] 3278 | break 3279 | elif ( masks(cur, other) ): 3280 | del symbols[i+j+1] 3281 | symbols.insert(i,other) 3282 | cur = other 3283 | break 3284 | else: 3285 | i += 1 3286 | 3287 | if not caseless and useRegex: 3288 | #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3289 | try: 3290 | if len(symbols)==len("".join(symbols)): 3291 | return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ) 3292 | else: 3293 | return Regex( "|".join(re.escape(sym) for sym in symbols) ) 3294 | except: 3295 | warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3296 | SyntaxWarning, stacklevel=2) 3297 | 3298 | 3299 | # last resort, just use MatchFirst 3300 | return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) 3301 | 3302 | def dictOf( key, value ): 3303 | """Helper to easily and clearly define a dictionary by specifying the respective patterns 3304 | for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3305 | in the proper order. The key pattern can include delimiting markers or punctuation, 3306 | as long as they are suppressed, thereby leaving the significant key text. The value 3307 | pattern can include named results, so that the C{Dict} results can include named token 3308 | fields. 3309 | """ 3310 | return Dict( ZeroOrMore( Group ( key + value ) ) ) 3311 | 3312 | def originalTextFor(expr, asString=True): 3313 | """Helper to return the original, untokenized text for a given expression. Useful to 3314 | restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3315 | revert separate tokens with intervening whitespace back to the original matching 3316 | input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 3317 | require the inspect module to chase up the call stack. By default, returns a 3318 | string containing the original parsed text. 3319 | 3320 | If the optional C{asString} argument is passed as C{False}, then the return value is a 3321 | C{L{ParseResults}} containing any results names that were originally matched, and a 3322 | single token containing the original matched text from the input string. So if 3323 | the expression passed to C{L{originalTextFor}} contains expressions with defined 3324 | results names, you must set C{asString} to C{False} if you want to preserve those 3325 | results name values.""" 3326 | locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3327 | endlocMarker = locMarker.copy() 3328 | endlocMarker.callPreparse = False 3329 | matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3330 | if asString: 3331 | extractText = lambda s,l,t: s[t._original_start:t._original_end] 3332 | else: 3333 | def extractText(s,l,t): 3334 | del t[:] 3335 | t.insert(0, s[t._original_start:t._original_end]) 3336 | del t["_original_start"] 3337 | del t["_original_end"] 3338 | matchExpr.setParseAction(extractText) 3339 | return matchExpr 3340 | 3341 | def ungroup(expr): 3342 | """Helper to undo pyparsing's default grouping of And expressions, even 3343 | if all but one are non-empty.""" 3344 | return TokenConverter(expr).setParseAction(lambda t:t[0]) 3345 | 3346 | # convenience constants for positional expressions 3347 | empty = Empty().setName("empty") 3348 | lineStart = LineStart().setName("lineStart") 3349 | lineEnd = LineEnd().setName("lineEnd") 3350 | stringStart = StringStart().setName("stringStart") 3351 | stringEnd = StringEnd().setName("stringEnd") 3352 | 3353 | _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3354 | _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3355 | _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3356 | _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) 3357 | _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3358 | _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3359 | 3360 | _expanded = lambda p: (isinstance(p,ParseResults) and ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) or p) 3361 | 3362 | def srange(s): 3363 | r"""Helper to easily define string ranges for use in Word construction. Borrows 3364 | syntax from regexp '[]' string range definitions:: 3365 | srange("[0-9]") -> "0123456789" 3366 | srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3367 | srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3368 | The input string must be enclosed in []'s, and the returned string is the expanded 3369 | character set joined into a single string. 3370 | The values enclosed in the []'s may be:: 3371 | a single character 3372 | an escaped character with a leading backslash (such as \- or \]) 3373 | an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3374 | (\0x## is also supported for backwards compatibility) 3375 | an escaped octal character with a leading '\0' (\041, which is a '!' character) 3376 | a range of any of the above, separated by a dash ('a-z', etc.) 3377 | any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3378 | """ 3379 | try: 3380 | return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3381 | except: 3382 | return "" 3383 | 3384 | def matchOnlyAtCol(n): 3385 | """Helper method for defining parse actions that require matching at a specific 3386 | column in the input text. 3387 | """ 3388 | def verifyCol(strg,locn,toks): 3389 | if col(locn,strg) != n: 3390 | raise ParseException(strg,locn,"matched token not at column %d" % n) 3391 | return verifyCol 3392 | 3393 | def replaceWith(replStr): 3394 | """Helper method for common parse actions that simply return a literal value. Especially 3395 | useful when used with C{L{transformString}()}. 3396 | """ 3397 | def _replFunc(*args): 3398 | return [replStr] 3399 | return _replFunc 3400 | 3401 | def removeQuotes(s,l,t): 3402 | """Helper parse action for removing quotation marks from parsed quoted strings. 3403 | To use, add this parse action to quoted string using:: 3404 | quotedString.setParseAction( removeQuotes ) 3405 | """ 3406 | return t[0][1:-1] 3407 | 3408 | def upcaseTokens(s,l,t): 3409 | """Helper parse action to convert tokens to upper case.""" 3410 | return [ tt.upper() for tt in map(_ustr,t) ] 3411 | 3412 | def downcaseTokens(s,l,t): 3413 | """Helper parse action to convert tokens to lower case.""" 3414 | return [ tt.lower() for tt in map(_ustr,t) ] 3415 | 3416 | def keepOriginalText(s,startLoc,t): 3417 | """DEPRECATED - use new helper method C{L{originalTextFor}}. 3418 | Helper parse action to preserve original parsed text, 3419 | overriding any nested parse actions.""" 3420 | try: 3421 | endloc = getTokensEndLoc() 3422 | except ParseException: 3423 | raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3424 | del t[:] 3425 | t += ParseResults(s[startLoc:endloc]) 3426 | return t 3427 | 3428 | def getTokensEndLoc(): 3429 | """Method to be called from within a parse action to determine the end 3430 | location of the parsed tokens.""" 3431 | import inspect 3432 | fstack = inspect.stack() 3433 | try: 3434 | # search up the stack (through intervening argument normalizers) for correct calling routine 3435 | for f in fstack[2:]: 3436 | if f[3] == "_parseNoCache": 3437 | endloc = f[0].f_locals["loc"] 3438 | return endloc 3439 | else: 3440 | raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3441 | finally: 3442 | del fstack 3443 | 3444 | def _makeTags(tagStr, xml): 3445 | """Internal helper to construct opening and closing tag expressions, given a tag name""" 3446 | if isinstance(tagStr,basestring): 3447 | resname = tagStr 3448 | tagStr = Keyword(tagStr, caseless=not xml) 3449 | else: 3450 | resname = tagStr.name 3451 | 3452 | tagAttrName = Word(alphas,alphanums+"_-:") 3453 | if (xml): 3454 | tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3455 | openTag = Suppress("<") + tagStr("tag") + \ 3456 | Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3457 | Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3458 | else: 3459 | printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3460 | tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3461 | openTag = Suppress("<") + tagStr("tag") + \ 3462 | Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3463 | Optional( Suppress("=") + tagAttrValue ) ))) + \ 3464 | Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3465 | closeTag = Combine(_L("") 3466 | 3467 | openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3468 | closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % tagStr) 3469 | openTag.tag = resname 3470 | closeTag.tag = resname 3471 | return openTag, closeTag 3472 | 3473 | def makeHTMLTags(tagStr): 3474 | """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3475 | return _makeTags( tagStr, False ) 3476 | 3477 | def makeXMLTags(tagStr): 3478 | """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3479 | return _makeTags( tagStr, True ) 3480 | 3481 | def withAttribute(*args,**attrDict): 3482 | """Helper to create a validating parse action to be used with start tags created 3483 | with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3484 | with a required attribute value, to avoid false matches on common tags such as 3485 | C{} or C{
}. 3486 | 3487 | Call C{withAttribute} with a series of attribute names and values. Specify the list 3488 | of filter attributes names and values as: 3489 | - keyword arguments, as in C{(align="right")}, or 3490 | - as an explicit dict with C{**} operator, when an attribute name is also a Python 3491 | reserved word, as in C{**{"class":"Customer", "align":"right"}} 3492 | - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3493 | For attribute names with a namespace prefix, you must use the second form. Attribute 3494 | names are matched insensitive to upper/lower case. 3495 | 3496 | To verify that the attribute exists, but without specifying a value, pass 3497 | C{withAttribute.ANY_VALUE} as the value. 3498 | """ 3499 | if args: 3500 | attrs = args[:] 3501 | else: 3502 | attrs = attrDict.items() 3503 | attrs = [(k,v) for k,v in attrs] 3504 | def pa(s,l,tokens): 3505 | for attrName,attrValue in attrs: 3506 | if attrName not in tokens: 3507 | raise ParseException(s,l,"no matching attribute " + attrName) 3508 | if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3509 | raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3510 | (attrName, tokens[attrName], attrValue)) 3511 | return pa 3512 | withAttribute.ANY_VALUE = object() 3513 | 3514 | opAssoc = _Constants() 3515 | opAssoc.LEFT = object() 3516 | opAssoc.RIGHT = object() 3517 | 3518 | def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ): 3519 | """Helper method for constructing grammars of expressions made up of 3520 | operators working in a precedence hierarchy. Operators may be unary or 3521 | binary, left- or right-associative. Parse actions can also be attached 3522 | to operator expressions. 3523 | 3524 | Parameters: 3525 | - baseExpr - expression representing the most basic element for the nested 3526 | - opList - list of tuples, one for each operator precedence level in the 3527 | expression grammar; each tuple is of the form 3528 | (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3529 | - opExpr is the pyparsing expression for the operator; 3530 | may also be a string, which will be converted to a Literal; 3531 | if numTerms is 3, opExpr is a tuple of two expressions, for the 3532 | two operators separating the 3 terms 3533 | - numTerms is the number of terms for this operator (must 3534 | be 1, 2, or 3) 3535 | - rightLeftAssoc is the indicator whether the operator is 3536 | right or left associative, using the pyparsing-defined 3537 | constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3538 | - parseAction is the parse action to be associated with 3539 | expressions matching this operator expression (the 3540 | parse action tuple member may be omitted) 3541 | - lpar - expression for matching left-parentheses (default=Suppress('(')) 3542 | - rpar - expression for matching right-parentheses (default=Suppress(')')) 3543 | """ 3544 | ret = Forward() 3545 | lastExpr = baseExpr | ( lpar + ret + rpar ) 3546 | for i,operDef in enumerate(opList): 3547 | opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3548 | if arity == 3: 3549 | if opExpr is None or len(opExpr) != 2: 3550 | raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3551 | opExpr1, opExpr2 = opExpr 3552 | thisExpr = Forward()#.setName("expr%d" % i) 3553 | if rightLeftAssoc == opAssoc.LEFT: 3554 | if arity == 1: 3555 | matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3556 | elif arity == 2: 3557 | if opExpr is not None: 3558 | matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3559 | else: 3560 | matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3561 | elif arity == 3: 3562 | matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3563 | Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3564 | else: 3565 | raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3566 | elif rightLeftAssoc == opAssoc.RIGHT: 3567 | if arity == 1: 3568 | # try to avoid LR with this extra test 3569 | if not isinstance(opExpr, Optional): 3570 | opExpr = Optional(opExpr) 3571 | matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3572 | elif arity == 2: 3573 | if opExpr is not None: 3574 | matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3575 | else: 3576 | matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3577 | elif arity == 3: 3578 | matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3579 | Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3580 | else: 3581 | raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3582 | else: 3583 | raise ValueError("operator must indicate right or left associativity") 3584 | if pa: 3585 | matchExpr.setParseAction( pa ) 3586 | thisExpr <<= ( matchExpr | lastExpr ) 3587 | lastExpr = thisExpr 3588 | ret <<= lastExpr 3589 | return ret 3590 | operatorPrecedence = infixNotation 3591 | 3592 | dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3593 | sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3594 | quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3595 | unicodeString = Combine(_L('u') + quotedString.copy()) 3596 | 3597 | def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): 3598 | """Helper method for defining nested lists enclosed in opening and closing 3599 | delimiters ("(" and ")" are the default). 3600 | 3601 | Parameters: 3602 | - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3603 | - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3604 | - content - expression for items within the nested lists (default=None) 3605 | - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3606 | 3607 | If an expression is not provided for the content argument, the nested 3608 | expression will capture all whitespace-delimited content between delimiters 3609 | as a list of separate values. 3610 | 3611 | Use the C{ignoreExpr} argument to define expressions that may contain 3612 | opening or closing characters that should not be treated as opening 3613 | or closing characters for nesting, such as quotedString or a comment 3614 | expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3615 | The default is L{quotedString}, but if no expressions are to be ignored, 3616 | then pass C{None} for this argument. 3617 | """ 3618 | if opener == closer: 3619 | raise ValueError("opening and closing strings cannot be the same") 3620 | if content is None: 3621 | if isinstance(opener,basestring) and isinstance(closer,basestring): 3622 | if len(opener) == 1 and len(closer)==1: 3623 | if ignoreExpr is not None: 3624 | content = (Combine(OneOrMore(~ignoreExpr + 3625 | CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3626 | ).setParseAction(lambda t:t[0].strip())) 3627 | else: 3628 | content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3629 | ).setParseAction(lambda t:t[0].strip())) 3630 | else: 3631 | if ignoreExpr is not None: 3632 | content = (Combine(OneOrMore(~ignoreExpr + 3633 | ~Literal(opener) + ~Literal(closer) + 3634 | CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3635 | ).setParseAction(lambda t:t[0].strip())) 3636 | else: 3637 | content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3638 | CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3639 | ).setParseAction(lambda t:t[0].strip())) 3640 | else: 3641 | raise ValueError("opening and closing arguments must be strings if no content expression is given") 3642 | ret = Forward() 3643 | if ignoreExpr is not None: 3644 | ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3645 | else: 3646 | ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3647 | return ret 3648 | 3649 | def indentedBlock(blockStatementExpr, indentStack, indent=True): 3650 | """Helper method for defining space-delimited indentation blocks, such as 3651 | those used to define block statements in Python source code. 3652 | 3653 | Parameters: 3654 | - blockStatementExpr - expression defining syntax of statement that 3655 | is repeated within the indented block 3656 | - indentStack - list created by caller to manage indentation stack 3657 | (multiple statementWithIndentedBlock expressions within a single grammar 3658 | should share a common indentStack) 3659 | - indent - boolean indicating whether block must be indented beyond the 3660 | the current level; set to False for block of left-most statements 3661 | (default=True) 3662 | 3663 | A valid block must contain at least one C{blockStatement}. 3664 | """ 3665 | def checkPeerIndent(s,l,t): 3666 | if l >= len(s): return 3667 | curCol = col(l,s) 3668 | if curCol != indentStack[-1]: 3669 | if curCol > indentStack[-1]: 3670 | raise ParseFatalException(s,l,"illegal nesting") 3671 | raise ParseException(s,l,"not a peer entry") 3672 | 3673 | def checkSubIndent(s,l,t): 3674 | curCol = col(l,s) 3675 | if curCol > indentStack[-1]: 3676 | indentStack.append( curCol ) 3677 | else: 3678 | raise ParseException(s,l,"not a subentry") 3679 | 3680 | def checkUnindent(s,l,t): 3681 | if l >= len(s): return 3682 | curCol = col(l,s) 3683 | if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3684 | raise ParseException(s,l,"not an unindent") 3685 | indentStack.pop() 3686 | 3687 | NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3688 | INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3689 | PEER = Empty().setParseAction(checkPeerIndent) 3690 | UNDENT = Empty().setParseAction(checkUnindent) 3691 | if indent: 3692 | smExpr = Group( Optional(NL) + 3693 | #~ FollowedBy(blockStatementExpr) + 3694 | INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3695 | else: 3696 | smExpr = Group( Optional(NL) + 3697 | (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3698 | blockStatementExpr.ignore(_bslash + LineEnd()) 3699 | return smExpr 3700 | 3701 | alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3702 | punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3703 | 3704 | anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3705 | commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3706 | _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3707 | replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3708 | 3709 | # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3710 | cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3711 | 3712 | htmlComment = Regex(r"") 3713 | restOfLine = Regex(r".*").leaveWhitespace() 3714 | dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3715 | cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?