Then:

" 11 | url = "" 12 | download_url="" 13 | description="programming language in Igbo" 14 | long_description = """ 15 | .. contents:: 16 | :depth: 2 17 | 18 | Introduction 19 | -------------- 20 | 21 | "Igbo is an indigenous language popularly spoken in Nigeria, Ibolang is a full 22 | extension of the Igbo language in Python. 23 | With Ibolang, you can write and run python like programs in Igbo 24 | 25 | Ibolang acts like python 3 and plays like python 3, it maintains all the python syntax 26 | and methods. 27 | user could use it to learn programming in their native language. 28 | 29 | Check examples here. 30 | 31 | * https://github.com/orc-1/ibolang/examples 32 | 33 | Install 34 | ---------- 35 | 36 | If you'd like to play Ibolang with full features, you should install Ibolang. 37 | 38 | You could use pip or easy_install command to install Ibolang:: 39 | $ pip install Ibolang 40 | 41 | or 42 | 43 | $ easy_install -U Ibolang 44 | 45 | to use easy_install command, you should install distribute module for python 3 first: 46 | 47 | http://pypi.python.org/pypi/distribute/ 48 | 49 | And check your system path params if it contains python3.x/bin path. 50 | 51 | ex: edit .bashrc to include "/Library/Frameworks/Python.framework/Versions/3.x/bin" in your PATH parameter. 52 | 53 | For sytem running multiple version of python, you are better of using a virtual enviroment 54 | with Ibolang:: 55 | $ conda create -n Ibolang python==3.XX 56 | 57 | or using Virtualenv 58 | 59 | $ virtualenv ibolang python==3.XX 60 | 61 | Lastly you can clone the repo using this url: navigate to the folder path and run python setup.py 62 | Copy the source files into your script folder, you should highly consider using 63 | a virtual enviroment if you are using this option and the previous options are better 64 | off 65 | 66 | 67 | 68 | Change Log 69 | ------------- 70 | 71 | You could view the ChangeLog to see what's new in these version. 72 | 73 | * http://code.google.com/p/zhpy/source/browse/CHANGELOG.txt 74 | 75 | """ 76 | -------------------------------------------------------------------------------- /ibl_pip/setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup, find_packages 3 | except ImportError: 4 | from ez_setup import use_setuptools 5 | use_setuptools() 6 | from setuptools import setup, find_packages 7 | 8 | from pkg_resources import DistributionNotFound 9 | 10 | import sys 11 | import os 12 | import glob 13 | import release 14 | #execfile('release.py') 15 | 16 | # setup params 17 | required_modules = ["setuptools"] 18 | #if mac, install readline 19 | #if(sys.platform=="darwin"): 20 | # required_modules.append("readline >= 2.6.4") 21 | 22 | # nose is used for test 23 | extra_modules = {} 24 | 25 | setup( 26 | name="ibolang", 27 | version=release.version, 28 | author=release.author, 29 | author_email=release.email, 30 | download_url=release.download_url, 31 | license=license, 32 | keywords = "traditional, simplified, Igbo, Afrocode, language, tokenize", 33 | description=release.description, 34 | long_description=release.long_description, 35 | url=release.url, 36 | zip_safe=False, 37 | install_requires = required_modules, 38 | extras_require = extra_modules, 39 | include_package_data = True, 40 | packages=find_packages(exclude=["ez_setup", 'examples', 'apidocs', "tests"]), 41 | entry_points = """ 42 | [console_scripts] 43 | ibolang = ibolang:commandline 44 | 45 | """, 46 | classifiers = [ 47 | 'Development Status :: Beta', 48 | 'Environment :: Console', 49 | 'Intended Audience :: Education', 50 | 'Intended Audience :: Developers', 51 | 'Intended Audience :: System Administrators', 52 | 'License :: OSI Approved :: MIT License', 53 | 'Natural Language :: Igbo (Traditional)', 54 | 'Operating System :: OS Independent', 55 | 'Programming Language :: Python', 56 | 'Topic :: Software Development :: Libraries :: Python Modules', 57 | 'Topic :: Software Development :: Code Generators'], 58 | #test_suite = 'nose.collector', 59 | ) 60 | 61 | -------------------------------------------------------------------------------- /ibolang.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | import os 6 | import runpy 7 | import code 8 | from core import transpile 9 | 10 | def commandline(): 11 | """IgboLang, the programming language in Igbo 12 | 13 | usages: 14 | ibolang enter REPL 15 | ibolang [file.ibl] execute IgboLang script 16 | """ 17 | if len(sys.argv) > 2: 18 | print(commandline.__doc__) 19 | sys.exit(1) 20 | 21 | elif len(sys.argv) == 2: 22 | file_path = sys.argv[1] 23 | 24 | if not os.path.exists(file_path): 25 | print("ibl: file '%s' does not exists" % file_path) 26 | sys.exit(1) 27 | 28 | sys.path[0] = os.path.dirname(os.path.join(os.getcwd(), file_path)) 29 | 30 | with open(file_path) as ibolang: 31 | python = transpile(src=ibolang) 32 | code_object = compile(python, file_path, "exec") 33 | runpy._run_module_code(code_object, mod_name="__main__") 34 | 35 | else: 36 | sys.ps1 = "ibl>> " 37 | banner = "IgboLang, the programming language in Igbo (Interactive Interpreter)" 38 | code.interact(banner=banner, readfunc=transpile) 39 | 40 | 41 | if __name__=="__main__": 42 | commandline() 43 | -------------------------------------------------------------------------------- /ig_tran.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Igbo keyword dictionaries 5 | 6 | This is the MIT license: 7 | http://www.opensource.org/licenses/mit-license.php 8 | 9 | Copyright (c) 2019~ Roland|Chima and contributors. 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy 12 | of this software and associated documentation files (the "Software"), to 13 | deal in the Software without restriction, including without limitation the 14 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 15 | sell copies of the Software, and to permit persons to whom the Software is 16 | furnished to do so, subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in 19 | all copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 | THE SOFTWARE. 28 | """ 29 | 30 | 31 | 32 | from igbolang import IgboPlugin 33 | 34 | # Igbo keywords 35 | class igbo_keyword(IgboPlugin): 36 | """ 37 | python igbo keyword 38 | """ 39 | title = "kiiwords wuru n'ime" 40 | description = "kiiwords wuru n'ime Python" 41 | keyword = { 42 | # logic 43 | "ma":"and", 44 | "obu":"or", 45 | "ezi": "True", 46 | "asi":"False", 47 | "odighi":"None", 48 | 49 | # def 50 | "kowa":"def", 51 | "klas":"class", 52 | "onwe":"self", 53 | "uwa":"global", 54 | 55 | # import 56 | "site":"from", 57 | "ibubata":"import", 58 | "dika":"as", 59 | 60 | # flow 61 | "ibia":"return", 62 | "gafere":"pass", 63 | "bulie":"raise", 64 | "gaaba":"continue", 65 | 66 | # control 67 | "oburu":"if", 68 | "ozobu":"elif", 69 | "ozo":"else", 70 | 71 | # for loop 72 | "maka":"for", 73 | "nime":"in", 74 | "obughi nime":"not in", 75 | 76 | # while loop 77 | "mgbe":"while", 78 | "ikwusi":"break", 79 | 80 | # try 81 | "trai":"try", 82 | "ewezuga":"except", 83 | "nikpaazu":"finally", #n'ikpaazu 84 | "ikwuputa":"assert", 85 | 86 | # build in methods 87 | "exek":"exec", 88 | "lamuda":"lambda", 89 | "deputa":"print", 90 | "na":"with", 91 | "meputa":"yield", ################ 92 | } 93 | 94 | 95 | class igbo_buildin_method(IgboPlugin): 96 | """ 97 | python igbo methods 98 | """ 99 | title = "odiniime oru" #internal python functions 100 | description = "odiniime oru Python" 101 | keyword = { 102 | "ntinye":"input", 103 | 104 | # build-in types 105 | "stl":"str", 106 | "bool":"bool", 107 | "ndeputa": "list", 108 | "dicti":"dict", 109 | "tupul":"tuple", 110 | "seti":"set", 111 | "frozenseti":"frozenset", 112 | "chl":"chr", 113 | "ord":"ord", 114 | "failu":"file", 115 | 116 | # number methods 117 | "int":"int", 118 | "float":"float", 119 | "complexi":"complex", 120 | "hex":"hex", 121 | "abs":"abs", 122 | "cmp":"cmp", 123 | 124 | # string methods 125 | "malitena":"startswith", 126 | "mechiena":"endswith", 127 | "sonye":"join", 128 | "Kewaa":"split", 129 | "dochie":"replace", 130 | "enkoodu":"encoding", ############## 131 | "dekoodu":"decoding", ############## 132 | 133 | # list methods 134 | "tinye":"append", 135 | "igbati":"extend", 136 | "itinye":"insert", 137 | "pop":"pop", ######################### 138 | "nkeozo":"next", 139 | "wepu":"remove", 140 | "revarsi":"reverse", 141 | "guo":"count", 142 | "ndeksi":"index", 143 | "sot":"sort", 144 | 145 | # dict methods 146 | "kii":"keys", 147 | "uru":"values", 148 | "ihe":"items", 149 | "melite":"update", ################# 150 | "kopi":"copy", 151 | 152 | # set methods 153 | "anyado":"clear", ################ 154 | "igbako":"add", 155 | "tufuo":"discard", 156 | "mjikota":"union", 157 | "nrutu":"intersection", 158 | "odiiche":"difference", #ọdịiche 159 | "symmetric_difference":"symmetric_difference", 160 | 161 | # file methods 162 | "meghe":"open", 163 | "guoba":"read", #gụọ comflict 164 | "dee":"write", 165 | "guoline":"readline", ############ 166 | "guolines":"readlines", ############ 167 | "mechie":"close", 168 | 169 | # OO 170 | "callable":"callable", 171 | "dir":"dir", 172 | "inweattr":"getattr", 173 | "iheattr":"hasattr", 174 | "setiattr":"setattr", 175 | "aku":"property", 176 | 177 | # build in functions 178 | "lenz":"len", 179 | "maz":"max", 180 | "min":"min", 181 | 182 | # build in methods 183 | "enumeratia":"enumerate", 184 | "eval":"eval", 185 | "iyo":"filter", 186 | "maapu":"map", 187 | "renji":"range", 188 | "xrenji":"xrange", 189 | "mgbako":"sum", 190 | "udi":"type", 191 | "ihe":"object", 192 | "ziip":"zip", 193 | "inyeaka":"help", 194 | "obodo":"locals", 195 | "uwas":"globals", 196 | "usoroklass":"classmethod", ####Unclear_translation 197 | } 198 | 199 | 200 | class igbo_exception(IgboPlugin): 201 | """ 202 | python igbo exceptions 203 | Built-in exception keyword 204 | """ 205 | title = "wezuga" 206 | description = "wezuga kiiword diniime Python" 207 | keyword = { 208 | "Naani":"Exception", 209 | "Mmejo":"Error", 210 | # error 211 | "MmejoAlithmetic":"ArithmeticError", 212 | "MmejoAssertion":"AssertionError", 213 | "MmejoAttribute":"AttributeError", 214 | "NdumoduDeprecation":"DeprecationWarning", 215 | "MmejoEO":"EOFError", 216 | "MmejoEnvironment":"EnvironmentError", 217 | "MmejoFloatingPoint":"FloatingPointError", 218 | "MmejoIO":"IOError", 219 | "MmejoIbubata":"ImportError", 220 | "MmejoIndentation":"IndentationError", 221 | "MmejoIndex":"IndexError", 222 | "MmejoKii":"KeyError", 223 | "KiiboardIntellupt":"KeyboardInterrupt", 224 | "MmejoLookup":"LookupError", 225 | "MmejoMemory":"MemoryError", 226 | "MmejoAha":"NameError", 227 | "AdighiImplemented":"NotImplemented", 228 | "MmejoAdighiImplemented":"NotImplementedError", 229 | "MmejoOS":"OSError", 230 | "MmejoOverflow":"OverflowError", 231 | "NdumoduOverflow":"OverflowWarning", 232 | "MmejoReference":"ReferenceError", 233 | "MmejoRuntime":"RuntimeError", 234 | "NdumoduRuntime":"RuntimeWarning", 235 | "MmejoStandard":"StandardError", 236 | "KwusiIteration":"StopIteration", 237 | "MmejoSyntax":"SyntaxError", 238 | "NdumoduSyntax":"SyntaxWarning", 239 | "MmejoSystem":"SystemError", 240 | "SystemEgzit":"SystemExit", 241 | "MmejoType":"TypeError", 242 | "MmejoTab":"TabError", 243 | "MmejoUnboundLocal":"UnboundLocalError", 244 | "MmejoUnicode":"UnicodeError", 245 | "NdumoduUser":"UserWarning", 246 | "MmejoValue":"ValueError", 247 | "Ndumodu":"Warning", 248 | "MmejoWindows":"WindowsError", 249 | "MmejoZeroDivision":"ZeroDivisionError", 250 | "MmejoUnicodeDecode":"UnicodeDecodeError", 251 | } 252 | 253 | 254 | class igbolang(IgboPlugin): 255 | """ 256 | ibolang igbo keyword plugin 257 | """ 258 | title = "ibpy" 259 | description = "kiiword diniime iby" 260 | keyword = { 261 | "ibpy":"ibpy", 262 | "Isiusoro":'if __name__=="__main__"', 263 | # must do 'from ibpy import ib_exec'/' first 264 | "ibpyexec":"ib_exec", 265 | 266 | # logic 267 | "==":"==", 268 | "!==":"!=", 269 | "obughi": "not", 270 | "bu":"is", 271 | "obughi bu":"is not", 272 | 273 | 274 | # private 275 | "doc":"doc", 276 | "init":"init", 277 | "del":"del", 278 | "repr":"repr", 279 | "inwa":"test", 280 | } 281 | 282 | #enter simplified Igbo dict here 283 | class igbo_sys(IgboPlugin): 284 | """ 285 | ibpy sys module simplified Igbo plugin 286 | """ 287 | title = "sistem" 288 | description = "modulu sistem" 289 | keyword = {"sys":"sys", 290 | "mbipute":"version", 291 | "argv":"argv", 292 | "egzit":"exit", 293 | "getfilesystemencoding":"getfilesystemencoding", 294 | "modulu":"modules", 295 | "platform":"platform", 296 | "stderr":"stderr", 297 | "stdin":"stdin", 298 | "stdout":"stdout", 299 | 300 | # sys path with list methods 301 | "uzo":"path", 302 | } 303 | 304 | class igbo_traceback(IgboPlugin): 305 | """ 306 | ibpy traceback simplified Igbo plugin 307 | """ 308 | title = "sistem" 309 | description = "modulu sistem" 310 | keyword = {"ikowasi obughi bu":"is not defined", 311 | "aha":"name", 312 | "akara":"line", 313 | "Failu":"File", 314 | "abaghiuru":"invalid", 315 | "syntax":"syntax", 316 | } 317 | 318 | # [ibpy.igbodict] 319 | keyword = igbo_keyword() 320 | method = igbo_buildin_method() 321 | exception = igbo_exception() 322 | ibpy= igbolang() 323 | sys = igbo_sys() 324 | trace = igbo_traceback() 325 | 326 | #tools = [igbokeyword, igbomethod, igboexception, igbozhpy, igbosys] 327 | #trace = [igbokeyword, igbomethod, igboexception, igbotrace, igbosys] 328 | trans = dict(keyword.keyword, **method.keyword) 329 | trans = dict(trans, **exception.keyword) 330 | trans = dict(trans, **ibpy.keyword) 331 | trans = dict(trans, **sys.keyword) 332 | trans = dict(trans, **trace.keyword) 333 | -------------------------------------------------------------------------------- /igbolang.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Igbo keyword dictionaries 5 | 6 | This is the MIT license: 7 | http://www.opensource.org/licenses/mit-license.php 8 | 9 | Copyright (c) 2019~ Roland|Chima and contributors. 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy 12 | of this software and associated documentation files (the "Software"), to 13 | deal in the Software without restriction, including without limitation the 14 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 15 | sell copies of the Software, and to permit persons to whom the Software is 16 | furnished to do so, subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in 19 | all copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 | THE SOFTWARE. 28 | """ 29 | 30 | 31 | # Universal keywords repository 32 | #: always run annotator before access worddict 33 | worddict = {} 34 | #: Traditional Igbo keywords repository 35 | igbodict = {} 36 | 37 | 38 | class IgboPlugin(object): 39 | """ 40 | basic plugin class 41 | """ 42 | pass 43 | 44 | 45 | def revert_dict(lang_dict): 46 | """make a reverse dictionary from the input dictionary 47 | 48 | >>> revert_dict({'a':'1', 'b':'2'}) 49 | {'1': 'a', '2': 'b'} 50 | """ 51 | rev_dict = {} 52 | dict_keys = lang_dict.keys() 53 | dict_keys.reverse() 54 | #map(rev_dict.update, map(lambda i: {lang_dict[i]:i}, dict_keys)) 55 | for i in dict_keys: 56 | rev_dict.update({lang_dict[i]:i}) 57 | return rev_dict 58 | -------------------------------------------------------------------------------- /plint.ibl: -------------------------------------------------------------------------------- 1 | deputa("UwaHello ~ Igbo Amaka") 2 | -------------------------------------------------------------------------------- /pyparsing.py: -------------------------------------------------------------------------------- 1 | # module pyparsing.py 2 | # 3 | # Copyright (c) 2003-2009 Paul T. McGuire 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining 6 | # a copy of this software and associated documentation files (the 7 | # "Software"), to deal in the Software without restriction, including 8 | # without limitation the rights to use, copy, modify, merge, publish, 9 | # distribute, sublicense, and/or sell copies of the Software, and to 10 | # permit persons to whom the Software is furnished to do so, subject to 11 | # the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | #from __future__ import generators 25 | 26 | __doc__ = \ 27 | """ 28 | pyparsing module - Classes and methods to define and execute parsing grammars 29 | 30 | The pyparsing module is an alternative approach to creating and executing simple grammars, 31 | vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you 32 | don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 33 | provides a library of classes that you use to construct the grammar directly in Python. 34 | 35 | Here is a program to parse "Hello, World!" (or any greeting of the form ", !"):: 36 | 37 | from pyparsing import Word, alphas 38 | 39 | # define grammar of a greeting 40 | greet = Word( alphas ) + "," + Word( alphas ) + "!" 41 | 42 | hello = "Hello, World!" 43 | print hello, "->", greet.parseString( hello ) 44 | 45 | The program outputs the following:: 46 | 47 | Hello, World! -> ['Hello', ',', 'World', '!'] 48 | 49 | The Python representation of the grammar is quite readable, owing to the self-explanatory 50 | class names, and the use of '+', '|' and '^' operators. 51 | 52 | The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 53 | object with named attributes. 54 | 55 | The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 56 | - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) 57 | - quoted strings 58 | - embedded comments 59 | """ 60 | 61 | __version__ = "1.5.2" 62 | __versionTime__ = "17 February 2009 19:45" 63 | __author__ = "Paul McGuire " 64 | 65 | import string 66 | from weakref import ref as wkref 67 | import copy 68 | import sys 69 | import warnings 70 | import re 71 | import sre_constants 72 | #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 73 | 74 | __all__ = [ 75 | 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 76 | 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 77 | 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 78 | 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 79 | 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 80 | 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 81 | 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 82 | 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 83 | 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 84 | 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 85 | 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 86 | 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 87 | 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 88 | 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 89 | 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 90 | 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 91 | 'indentedBlock', 'originalTextFor', 92 | ] 93 | 94 | 95 | """ 96 | Detect if we are running version 3.X and make appropriate changes 97 | Robert A. Clark 98 | """ 99 | if sys.version_info[0] > 2: 100 | _PY3K = True 101 | _MAX_INT = sys.maxsize 102 | basestring = str 103 | else: 104 | _PY3K = False 105 | _MAX_INT = sys.maxint 106 | 107 | if not _PY3K: 108 | def _ustr(obj): 109 | """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 110 | str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 111 | then < returns the unicode object | encodes it with the default encoding | ... >. 112 | """ 113 | if isinstance(obj,unicode): 114 | return obj 115 | 116 | try: 117 | # If this works, then _ustr(obj) has the same behaviour as str(obj), so 118 | # it won't break any existing code. 119 | return str(obj) 120 | 121 | except UnicodeEncodeError: 122 | # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 123 | # state that "The return value must be a string object". However, does a 124 | # unicode object (being a subclass of basestring) count as a "string 125 | # object"? 126 | # If so, then return a unicode object: 127 | return unicode(obj) 128 | # Else encode it... but how? There are many choices... :) 129 | # Replace unprintables with escape codes? 130 | #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 131 | # Replace unprintables with question marks? 132 | #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 133 | # ... 134 | else: 135 | _ustr = str 136 | unichr = chr 137 | 138 | if not _PY3K: 139 | def _str2dict(strg): 140 | return dict( [(c,0) for c in strg] ) 141 | else: 142 | _str2dict = set 143 | 144 | def _xml_escape(data): 145 | """Escape &, <, >, ", ', etc. in a string of data.""" 146 | 147 | # ampersand must be replaced first 148 | from_symbols = '&><"\'' 149 | to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 150 | for from_,to_ in zip(from_symbols, to_symbols): 151 | data = data.replace(from_, to_) 152 | return data 153 | 154 | class _Constants(object): 155 | pass 156 | 157 | if not _PY3K: 158 | alphas = string.lowercase + string.uppercase 159 | else: 160 | alphas = string.ascii_lowercase + string.ascii_uppercase 161 | nums = string.digits 162 | hexnums = nums + "ABCDEFabcdef" 163 | alphanums = alphas + nums 164 | _bslash = chr(92) 165 | printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 166 | 167 | class ParseBaseException(Exception): 168 | """base exception class for all parsing runtime exceptions""" 169 | # Performance tuning: we construct a *lot* of these, so keep this 170 | # constructor as small and fast as possible 171 | def __init__( self, pstr, loc=0, msg=None, elem=None ): 172 | self.loc = loc 173 | if msg is None: 174 | self.msg = pstr 175 | self.pstr = "" 176 | else: 177 | self.msg = msg 178 | self.pstr = pstr 179 | self.parserElement = elem 180 | 181 | def __getattr__( self, aname ): 182 | """supported attributes by name are: 183 | - lineno - returns the line number of the exception text 184 | - col - returns the column number of the exception text 185 | - line - returns the line containing the exception text 186 | """ 187 | if( aname == "lineno" ): 188 | return lineno( self.loc, self.pstr ) 189 | elif( aname in ("col", "column") ): 190 | return col( self.loc, self.pstr ) 191 | elif( aname == "line" ): 192 | return line( self.loc, self.pstr ) 193 | else: 194 | raise AttributeError(aname) 195 | 196 | def __str__( self ): 197 | return "%s (at char %d), (line:%d, col:%d)" % \ 198 | ( self.msg, self.loc, self.lineno, self.column ) 199 | def __repr__( self ): 200 | return _ustr(self) 201 | def markInputline( self, markerString = ">!<" ): 202 | """Extracts the exception line from the input string, and marks 203 | the location of the exception with a special symbol. 204 | """ 205 | line_str = self.line 206 | line_column = self.column - 1 207 | if markerString: 208 | line_str = "".join( [line_str[:line_column], 209 | markerString, line_str[line_column:]]) 210 | return line_str.strip() 211 | def __dir__(self): 212 | return "loc msg pstr parserElement lineno col line " \ 213 | "markInputLine __str__ __repr__".split() 214 | 215 | class ParseException(ParseBaseException): 216 | """exception thrown when parse expressions don't match class; 217 | supported attributes by name are: 218 | - lineno - returns the line number of the exception text 219 | - col - returns the column number of the exception text 220 | - line - returns the line containing the exception text 221 | """ 222 | pass 223 | 224 | class ParseFatalException(ParseBaseException): 225 | """user-throwable exception thrown when inconsistent parse content 226 | is found; stops all parsing immediately""" 227 | pass 228 | 229 | class ParseSyntaxException(ParseFatalException): 230 | """just like ParseFatalException, but thrown internally when an 231 | ErrorStop indicates that parsing is to stop immediately because 232 | an unbacktrackable syntax error has been found""" 233 | def __init__(self, pe): 234 | super(ParseSyntaxException, self).__init__( 235 | pe.pstr, pe.loc, pe.msg, pe.parserElement) 236 | 237 | #~ class ReparseException(ParseBaseException): 238 | #~ """Experimental class - parse actions can raise this exception to cause 239 | #~ pyparsing to reparse the input string: 240 | #~ - with a modified input string, and/or 241 | #~ - with a modified start location 242 | #~ Set the values of the ReparseException in the constructor, and raise the 243 | #~ exception in a parse action to cause pyparsing to use the new string/location. 244 | #~ Setting the values as None causes no change to be made. 245 | #~ """ 246 | #~ def __init_( self, newstring, restartLoc ): 247 | #~ self.newParseText = newstring 248 | #~ self.reparseLoc = restartLoc 249 | 250 | class RecursiveGrammarException(Exception): 251 | """exception thrown by validate() if the grammar could be improperly recursive""" 252 | def __init__( self, parseElementList ): 253 | self.parseElementTrace = parseElementList 254 | 255 | def __str__( self ): 256 | return "RecursiveGrammarException: %s" % self.parseElementTrace 257 | 258 | class _ParseResultsWithOffset(object): 259 | def __init__(self,p1,p2): 260 | self.tup = (p1,p2) 261 | def __getitem__(self,i): 262 | return self.tup[i] 263 | def __repr__(self): 264 | return repr(self.tup) 265 | def setOffset(self,i): 266 | self.tup = (self.tup[0],i) 267 | 268 | class ParseResults(object): 269 | """Structured parse results, to provide multiple means of access to the parsed data: 270 | - as a list (len(results)) 271 | - by list index (results[0], results[1], etc.) 272 | - by attribute (results.) 273 | """ 274 | __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" ) 275 | def __new__(cls, toklist, name=None, asList=True, modal=True ): 276 | if isinstance(toklist, cls): 277 | return toklist 278 | retobj = object.__new__(cls) 279 | retobj.__doinit = True 280 | return retobj 281 | 282 | # Performance tuning: we construct a *lot* of these, so keep this 283 | # constructor as small and fast as possible 284 | def __init__( self, toklist, name=None, asList=True, modal=True ): 285 | if self.__doinit: 286 | self.__doinit = False 287 | self.__name = None 288 | self.__parent = None 289 | self.__accumNames = {} 290 | if isinstance(toklist, list): 291 | self.__toklist = toklist[:] 292 | else: 293 | self.__toklist = [toklist] 294 | self.__tokdict = dict() 295 | 296 | if name: 297 | if not modal: 298 | self.__accumNames[name] = 0 299 | if isinstance(name,int): 300 | name = _ustr(name) # will always return a str, but use _ustr for consistency 301 | self.__name = name 302 | if not toklist in (None,'',[]): 303 | if isinstance(toklist,basestring): 304 | toklist = [ toklist ] 305 | if asList: 306 | if isinstance(toklist,ParseResults): 307 | self[name] = _ParseResultsWithOffset(toklist.copy(),0) 308 | else: 309 | self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 310 | self[name].__name = name 311 | else: 312 | try: 313 | self[name] = toklist[0] 314 | except (KeyError,TypeError,IndexError): 315 | self[name] = toklist 316 | 317 | def __getitem__( self, i ): 318 | if isinstance( i, (int,slice) ): 319 | return self.__toklist[i] 320 | else: 321 | if i not in self.__accumNames: 322 | return self.__tokdict[i][-1][0] 323 | else: 324 | return ParseResults([ v[0] for v in self.__tokdict[i] ]) 325 | 326 | def __setitem__( self, k, v ): 327 | if isinstance(v,_ParseResultsWithOffset): 328 | self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 329 | sub = v[0] 330 | elif isinstance(k,int): 331 | self.__toklist[k] = v 332 | sub = v 333 | else: 334 | self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 335 | sub = v 336 | if isinstance(sub,ParseResults): 337 | sub.__parent = wkref(self) 338 | 339 | def __delitem__( self, i ): 340 | if isinstance(i,(int,slice)): 341 | mylen = len( self.__toklist ) 342 | del self.__toklist[i] 343 | 344 | # convert int to slice 345 | if isinstance(i, int): 346 | if i < 0: 347 | i += mylen 348 | i = slice(i, i+1) 349 | # get removed indices 350 | removed = list(range(*i.indices(mylen))) 351 | removed.reverse() 352 | # fixup indices in token dictionary 353 | for name in self.__tokdict: 354 | occurrences = self.__tokdict[name] 355 | for j in removed: 356 | for k, (value, position) in enumerate(occurrences): 357 | occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 358 | else: 359 | del self.__tokdict[i] 360 | 361 | def __contains__( self, k ): 362 | return k in self.__tokdict 363 | 364 | def __len__( self ): return len( self.__toklist ) 365 | def __bool__(self): return len( self.__toklist ) > 0 366 | __nonzero__ = __bool__ 367 | def __iter__( self ): return iter( self.__toklist ) 368 | def __reversed__( self ): return iter( reversed(self.__toklist) ) 369 | def keys( self ): 370 | """Returns all named result keys.""" 371 | return self.__tokdict.keys() 372 | 373 | def pop( self, index=-1 ): 374 | """Removes and returns item at specified index (default=last). 375 | Will work with either numeric indices or dict-key indicies.""" 376 | ret = self[index] 377 | del self[index] 378 | return ret 379 | 380 | def get(self, key, defaultValue=None): 381 | """Returns named result matching the given key, or if there is no 382 | such name, then returns the given defaultValue or None if no 383 | defaultValue is specified.""" 384 | if key in self: 385 | return self[key] 386 | else: 387 | return defaultValue 388 | 389 | def insert( self, index, insStr ): 390 | self.__toklist.insert(index, insStr) 391 | # fixup indices in token dictionary 392 | for name in self.__tokdict: 393 | occurrences = self.__tokdict[name] 394 | for k, (value, position) in enumerate(occurrences): 395 | occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 396 | 397 | def items( self ): 398 | """Returns all named result keys and values as a list of tuples.""" 399 | return [(k,self[k]) for k in self.__tokdict] 400 | 401 | def values( self ): 402 | """Returns all named result values.""" 403 | return [ v[-1][0] for v in self.__tokdict.values() ] 404 | 405 | def __getattr__( self, name ): 406 | if name not in self.__slots__: 407 | if name in self.__tokdict: 408 | if name not in self.__accumNames: 409 | return self.__tokdict[name][-1][0] 410 | else: 411 | return ParseResults([ v[0] for v in self.__tokdict[name] ]) 412 | else: 413 | return "" 414 | return None 415 | 416 | def __add__( self, other ): 417 | ret = self.copy() 418 | ret += other 419 | return ret 420 | 421 | def __iadd__( self, other ): 422 | if other.__tokdict: 423 | offset = len(self.__toklist) 424 | addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 425 | otheritems = other.__tokdict.items() 426 | otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 427 | for (k,vlist) in otheritems for v in vlist] 428 | for k,v in otherdictitems: 429 | self[k] = v 430 | if isinstance(v[0],ParseResults): 431 | v[0].__parent = wkref(self) 432 | 433 | self.__toklist += other.__toklist 434 | self.__accumNames.update( other.__accumNames ) 435 | del other 436 | return self 437 | 438 | def __repr__( self ): 439 | return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 440 | 441 | def __str__( self ): 442 | out = "[" 443 | sep = "" 444 | for i in self.__toklist: 445 | if isinstance(i, ParseResults): 446 | out += sep + _ustr(i) 447 | else: 448 | out += sep + repr(i) 449 | sep = ", " 450 | out += "]" 451 | return out 452 | 453 | def _asStringList( self, sep='' ): 454 | out = [] 455 | for item in self.__toklist: 456 | if out and sep: 457 | out.append(sep) 458 | if isinstance( item, ParseResults ): 459 | out += item._asStringList() 460 | else: 461 | out.append( _ustr(item) ) 462 | return out 463 | 464 | def asList( self ): 465 | """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 466 | out = [] 467 | for res in self.__toklist: 468 | if isinstance(res,ParseResults): 469 | out.append( res.asList() ) 470 | else: 471 | out.append( res ) 472 | return out 473 | 474 | def asDict( self ): 475 | """Returns the named parse results as dictionary.""" 476 | return dict( self.items() ) 477 | 478 | def copy( self ): 479 | """Returns a new copy of a ParseResults object.""" 480 | ret = ParseResults( self.__toklist ) 481 | ret.__tokdict = self.__tokdict.copy() 482 | ret.__parent = self.__parent 483 | ret.__accumNames.update( self.__accumNames ) 484 | ret.__name = self.__name 485 | return ret 486 | 487 | def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 488 | """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 489 | nl = "\n" 490 | out = [] 491 | namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 492 | for v in vlist ] ) 493 | nextLevelIndent = indent + " " 494 | 495 | # collapse out indents if formatting is not desired 496 | if not formatted: 497 | indent = "" 498 | nextLevelIndent = "" 499 | nl = "" 500 | 501 | selfTag = None 502 | if doctag is not None: 503 | selfTag = doctag 504 | else: 505 | if self.__name: 506 | selfTag = self.__name 507 | 508 | if not selfTag: 509 | if namedItemsOnly: 510 | return "" 511 | else: 512 | selfTag = "ITEM" 513 | 514 | out += [ nl, indent, "<", selfTag, ">" ] 515 | 516 | worklist = self.__toklist 517 | for i,res in enumerate(worklist): 518 | if isinstance(res,ParseResults): 519 | if i in namedItems: 520 | out += [ res.asXML(namedItems[i], 521 | namedItemsOnly and doctag is None, 522 | nextLevelIndent, 523 | formatted)] 524 | else: 525 | out += [ res.asXML(None, 526 | namedItemsOnly and doctag is None, 527 | nextLevelIndent, 528 | formatted)] 529 | else: 530 | # individual token, see if there is a name for it 531 | resTag = None 532 | if i in namedItems: 533 | resTag = namedItems[i] 534 | if not resTag: 535 | if namedItemsOnly: 536 | continue 537 | else: 538 | resTag = "ITEM" 539 | xmlBodyText = _xml_escape(_ustr(res)) 540 | out += [ nl, nextLevelIndent, "<", resTag, ">", 541 | xmlBodyText, 542 | "" ] 543 | 544 | out += [ nl, indent, "" ] 545 | return "".join(out) 546 | 547 | def __lookup(self,sub): 548 | for k,vlist in self.__tokdict.items(): 549 | for v,loc in vlist: 550 | if sub is v: 551 | return k 552 | return None 553 | 554 | def getName(self): 555 | """Returns the results name for this token expression.""" 556 | if self.__name: 557 | return self.__name 558 | elif self.__parent: 559 | par = self.__parent() 560 | if par: 561 | return par.__lookup(self) 562 | else: 563 | return None 564 | elif (len(self) == 1 and 565 | len(self.__tokdict) == 1 and 566 | self.__tokdict.values()[0][0][1] in (0,-1)): 567 | return self.__tokdict.keys()[0] 568 | else: 569 | return None 570 | 571 | def dump(self,indent='',depth=0): 572 | """Diagnostic method for listing out the contents of a ParseResults. 573 | Accepts an optional indent argument so that this string can be embedded 574 | in a nested display of other data.""" 575 | out = [] 576 | out.append( indent+_ustr(self.asList()) ) 577 | keys = self.items() 578 | keys.sort() 579 | for k,v in keys: 580 | if out: 581 | out.append('\n') 582 | out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 583 | if isinstance(v,ParseResults): 584 | if v.keys(): 585 | #~ out.append('\n') 586 | out.append( v.dump(indent,depth+1) ) 587 | #~ out.append('\n') 588 | else: 589 | out.append(_ustr(v)) 590 | else: 591 | out.append(_ustr(v)) 592 | #~ out.append('\n') 593 | return "".join(out) 594 | 595 | # add support for pickle protocol 596 | def __getstate__(self): 597 | return ( self.__toklist, 598 | ( self.__tokdict.copy(), 599 | self.__parent is not None and self.__parent() or None, 600 | self.__accumNames, 601 | self.__name ) ) 602 | 603 | def __setstate__(self,state): 604 | self.__toklist = state[0] 605 | self.__tokdict, \ 606 | par, \ 607 | inAccumNames, \ 608 | self.__name = state[1] 609 | self.__accumNames = {} 610 | self.__accumNames.update(inAccumNames) 611 | if par is not None: 612 | self.__parent = wkref(par) 613 | else: 614 | self.__parent = None 615 | 616 | def __dir__(self): 617 | return dir(super(ParseResults,self)) + self.keys() 618 | 619 | def col (loc,strg): 620 | """Returns current column within a string, counting newlines as line separators. 621 | The first column is number 1. 622 | 623 | Note: the default parsing behavior is to expand tabs in the input string 624 | before starting the parsing process. See L{I{ParserElement.parseString}} for more information 625 | on parsing strings containing s, and suggested methods to maintain a 626 | consistent view of the parsed string, the parse location, and line and column 627 | positions within the parsed string. 628 | """ 629 | return (loc} for more information 637 | on parsing strings containing s, and suggested methods to maintain a 638 | consistent view of the parsed string, the parse location, and line and column 639 | positions within the parsed string. 640 | """ 641 | return strg.count("\n",0,loc) + 1 642 | 643 | def line( loc, strg ): 644 | """Returns the line of text containing loc within a string, counting newlines as line separators. 645 | """ 646 | lastCR = strg.rfind("\n", 0, loc) 647 | nextCR = strg.find("\n", loc) 648 | if nextCR > 0: 649 | return strg[lastCR+1:nextCR] 650 | else: 651 | return strg[lastCR+1:] 652 | 653 | def _defaultStartDebugAction( instring, loc, expr ): 654 | print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 655 | 656 | def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ): 657 | print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 658 | 659 | def _defaultExceptionDebugAction( instring, loc, expr, exc ): 660 | print ("Exception raised:" + _ustr(exc)) 661 | 662 | def nullDebugAction(*args): 663 | """'Do-nothing' debug action, to suppress debugging output during parsing.""" 664 | pass 665 | 666 | class ParserElement(object): 667 | """Abstract base level parser element class.""" 668 | DEFAULT_WHITE_CHARS = " \n\t\r" 669 | 670 | def setDefaultWhitespaceChars( chars ): 671 | """Overrides the default whitespace chars 672 | """ 673 | ParserElement.DEFAULT_WHITE_CHARS = chars 674 | setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 675 | 676 | def __init__( self, savelist=False ): 677 | self.parseAction = list() 678 | self.failAction = None 679 | #~ self.name = "" # don't define self.name, let subclasses try/except upcall 680 | self.strRepr = None 681 | self.resultsName = None 682 | self.saveAsList = savelist 683 | self.skipWhitespace = True 684 | self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 685 | self.copyDefaultWhiteChars = True 686 | self.mayReturnEmpty = False # used when checking for left-recursion 687 | self.keepTabs = False 688 | self.ignoreExprs = list() 689 | self.debug = False 690 | self.streamlined = False 691 | self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 692 | self.errmsg = "" 693 | self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 694 | self.debugActions = ( None, None, None ) #custom debug actions 695 | self.re = None 696 | self.callPreparse = True # used to avoid redundant calls to preParse 697 | self.callDuringTry = False 698 | 699 | def copy( self ): 700 | """Make a copy of this ParserElement. Useful for defining different parse actions 701 | for the same parsing pattern, using copies of the original parse element.""" 702 | cpy = copy.copy( self ) 703 | cpy.parseAction = self.parseAction[:] 704 | cpy.ignoreExprs = self.ignoreExprs[:] 705 | if self.copyDefaultWhiteChars: 706 | cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 707 | return cpy 708 | 709 | def setName( self, name ): 710 | """Define name for this expression, for use in debugging.""" 711 | self.name = name 712 | self.errmsg = "Expected " + self.name 713 | if hasattr(self,"exception"): 714 | self.exception.msg = self.errmsg 715 | return self 716 | 717 | def setResultsName( self, name, listAllMatches=False ): 718 | """Define name for referencing matching tokens as a nested attribute 719 | of the returned parse results. 720 | NOTE: this returns a *copy* of the original ParserElement object; 721 | this is so that the client can define a basic element, such as an 722 | integer, and reference it in multiple places with different names. 723 | """ 724 | newself = self.copy() 725 | newself.resultsName = name 726 | newself.modalResults = not listAllMatches 727 | return newself 728 | 729 | def setBreak(self,breakFlag = True): 730 | """Method to invoke the Python pdb debugger when this element is 731 | about to be parsed. Set breakFlag to True to enable, False to 732 | disable. 733 | """ 734 | if breakFlag: 735 | _parseMethod = self._parse 736 | def breaker(instring, loc, doActions=True, callPreParse=True): 737 | import pdb 738 | pdb.set_trace() 739 | return _parseMethod( instring, loc, doActions, callPreParse ) 740 | breaker._originalParseMethod = _parseMethod 741 | self._parse = breaker 742 | else: 743 | if hasattr(self._parse,"_originalParseMethod"): 744 | self._parse = self._parse._originalParseMethod 745 | return self 746 | 747 | def _normalizeParseActionArgs( f ): 748 | """Internal method used to decorate parse actions that take fewer than 3 arguments, 749 | so that all parse actions can be called as f(s,l,t).""" 750 | STAR_ARGS = 4 751 | 752 | try: 753 | restore = None 754 | if isinstance(f,type): 755 | restore = f 756 | f = f.__init__ 757 | if not _PY3K: 758 | codeObj = f.func_code 759 | else: 760 | codeObj = f.code 761 | if codeObj.co_flags & STAR_ARGS: 762 | return f 763 | numargs = codeObj.co_argcount 764 | if not _PY3K: 765 | if hasattr(f,"im_self"): 766 | numargs -= 1 767 | else: 768 | if hasattr(f,"__self__"): 769 | numargs -= 1 770 | if restore: 771 | f = restore 772 | except AttributeError: 773 | try: 774 | if not _PY3K: 775 | call_im_func_code = f.__call__.im_func.func_code 776 | else: 777 | call_im_func_code = f.__code__ 778 | 779 | # not a function, must be a callable object, get info from the 780 | # im_func binding of its bound __call__ method 781 | if call_im_func_code.co_flags & STAR_ARGS: 782 | return f 783 | numargs = call_im_func_code.co_argcount 784 | if not _PY3K: 785 | if hasattr(f.__call__,"im_self"): 786 | numargs -= 1 787 | else: 788 | if hasattr(f.__call__,"__self__"): 789 | numargs -= 0 790 | except AttributeError: 791 | if not _PY3K: 792 | call_func_code = f.__call__.func_code 793 | else: 794 | call_func_code = f.__call__.__code__ 795 | # not a bound method, get info directly from __call__ method 796 | if call_func_code.co_flags & STAR_ARGS: 797 | return f 798 | numargs = call_func_code.co_argcount 799 | if not _PY3K: 800 | if hasattr(f.__call__,"im_self"): 801 | numargs -= 1 802 | else: 803 | if hasattr(f.__call__,"__self__"): 804 | numargs -= 1 805 | 806 | 807 | #~ print ("adding function %s with %d args" % (f.func_name,numargs)) 808 | if numargs == 3: 809 | return f 810 | else: 811 | if numargs > 3: 812 | def tmp(s,l,t): 813 | return f(f.__call__.__self__, s,l,t) 814 | if numargs == 2: 815 | def tmp(s,l,t): 816 | return f(l,t) 817 | elif numargs == 1: 818 | def tmp(s,l,t): 819 | return f(t) 820 | else: #~ numargs == 0: 821 | def tmp(s,l,t): 822 | return f() 823 | try: 824 | tmp.__name__ = f.__name__ 825 | except (AttributeError,TypeError): 826 | # no need for special handling if attribute doesnt exist 827 | pass 828 | try: 829 | tmp.__doc__ = f.__doc__ 830 | except (AttributeError,TypeError): 831 | # no need for special handling if attribute doesnt exist 832 | pass 833 | try: 834 | tmp.__dict__.update(f.__dict__) 835 | except (AttributeError,TypeError): 836 | # no need for special handling if attribute doesnt exist 837 | pass 838 | return tmp 839 | _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 840 | 841 | def setParseAction( self, *fns, **kwargs ): 842 | """Define action to perform when successfully matching parse element definition. 843 | Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), 844 | fn(loc,toks), fn(toks), or just fn(), where: 845 | - s = the original string being parsed (see note below) 846 | - loc = the location of the matching substring 847 | - toks = a list of the matched tokens, packaged as a ParseResults object 848 | If the functions in fns modify the tokens, they can return them as the return 849 | value from fn, and the modified list of tokens will replace the original. 850 | Otherwise, fn does not need to return any value. 851 | 852 | Note: the default parsing behavior is to expand tabs in the input string 853 | before starting the parsing process. See L{I{parseString}} for more information 854 | on parsing strings containing s, and suggested methods to maintain a 855 | consistent view of the parsed string, the parse location, and line and column 856 | positions within the parsed string. 857 | """ 858 | self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 859 | self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 860 | return self 861 | 862 | def addParseAction( self, *fns, **kwargs ): 863 | """Add parse action to expression's list of parse actions. See L{I{setParseAction}}.""" 864 | self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 865 | self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 866 | return self 867 | 868 | def setFailAction( self, fn ): 869 | """Define action to perform if parsing fails at this expression. 870 | Fail acton fn is a callable function that takes the arguments 871 | fn(s,loc,expr,err) where: 872 | - s = string being parsed 873 | - loc = location where expression match was attempted and failed 874 | - expr = the parse expression that failed 875 | - err = the exception thrown 876 | The function returns no value. It may throw ParseFatalException 877 | if it is desired to stop parsing immediately.""" 878 | self.failAction = fn 879 | return self 880 | 881 | def _skipIgnorables( self, instring, loc ): 882 | exprsFound = True 883 | while exprsFound: 884 | exprsFound = False 885 | for e in self.ignoreExprs: 886 | try: 887 | while 1: 888 | loc,dummy = e._parse( instring, loc ) 889 | exprsFound = True 890 | except ParseException: 891 | pass 892 | return loc 893 | 894 | def preParse( self, instring, loc ): 895 | if self.ignoreExprs: 896 | loc = self._skipIgnorables( instring, loc ) 897 | 898 | if self.skipWhitespace: 899 | wt = self.whiteChars 900 | instrlen = len(instring) 901 | while loc < instrlen and instring[loc] in wt: 902 | loc += 1 903 | 904 | return loc 905 | 906 | def parseImpl( self, instring, loc, doActions=True ): 907 | return loc, [] 908 | 909 | def postParse( self, instring, loc, tokenlist ): 910 | return tokenlist 911 | 912 | #~ @profile 913 | def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 914 | debugging = ( self.debug ) #and doActions ) 915 | 916 | if debugging or self.failAction: 917 | #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 918 | if (self.debugActions[0] ): 919 | self.debugActions[0]( instring, loc, self ) 920 | if callPreParse and self.callPreparse: 921 | preloc = self.preParse( instring, loc ) 922 | else: 923 | preloc = loc 924 | tokensStart = loc 925 | try: 926 | try: 927 | loc,tokens = self.parseImpl( instring, preloc, doActions ) 928 | except IndexError: 929 | raise ParseException( instring, len(instring), self.errmsg, self ) 930 | except ParseBaseException, err: 931 | #~ print ("Exception raised:", err) 932 | if self.debugActions[2]: 933 | self.debugActions[2]( instring, tokensStart, self, err ) 934 | if self.failAction: 935 | self.failAction( instring, tokensStart, self, err ) 936 | raise 937 | else: 938 | if callPreParse and self.callPreparse: 939 | preloc = self.preParse( instring, loc ) 940 | else: 941 | preloc = loc 942 | tokensStart = loc 943 | if self.mayIndexError or loc >= len(instring): 944 | try: 945 | loc,tokens = self.parseImpl( instring, preloc, doActions ) 946 | except IndexError: 947 | raise ParseException( instring, len(instring), self.errmsg, self ) 948 | else: 949 | loc,tokens = self.parseImpl( instring, preloc, doActions ) 950 | 951 | tokens = self.postParse( instring, loc, tokens ) 952 | 953 | retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 954 | if self.parseAction and (doActions or self.callDuringTry): 955 | if debugging: 956 | try: 957 | for fn in self.parseAction: 958 | tokens = fn( instring, tokensStart, retTokens ) 959 | if tokens is not None: 960 | retTokens = ParseResults( tokens, 961 | self.resultsName, 962 | asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 963 | modal=self.modalResults ) 964 | except ParseBaseException, err: 965 | #~ print "Exception raised in user parse action:", err 966 | if (self.debugActions[2] ): 967 | self.debugActions[2]( instring, tokensStart, self, err ) 968 | raise 969 | else: 970 | for fn in self.parseAction: 971 | tokens = fn( instring, tokensStart, retTokens ) 972 | if tokens is not None: 973 | retTokens = ParseResults( tokens, 974 | self.resultsName, 975 | asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 976 | modal=self.modalResults ) 977 | 978 | if debugging: 979 | #~ print ("Matched",self,"->",retTokens.asList()) 980 | if (self.debugActions[1] ): 981 | self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 982 | 983 | return loc, retTokens 984 | 985 | def tryParse( self, instring, loc ): 986 | try: 987 | return self._parse( instring, loc, doActions=False )[0] 988 | except ParseFatalException: 989 | raise ParseException( instring, loc, self.errmsg, self) 990 | 991 | # this method gets repeatedly called during backtracking with the same arguments - 992 | # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 993 | def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 994 | lookup = (self,instring,loc,callPreParse,doActions) 995 | if lookup in ParserElement._exprArgCache: 996 | value = ParserElement._exprArgCache[ lookup ] 997 | if isinstance(value,Exception): 998 | raise value 999 | return value 1000 | else: 1001 | try: 1002 | value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1003 | ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1004 | return value 1005 | except ParseBaseException, pe: 1006 | ParserElement._exprArgCache[ lookup ] = pe 1007 | raise 1008 | 1009 | _parse = _parseNoCache 1010 | 1011 | # argument cache for optimizing repeated calls when backtracking through recursive expressions 1012 | _exprArgCache = {} 1013 | def resetCache(): 1014 | ParserElement._exprArgCache.clear() 1015 | resetCache = staticmethod(resetCache) 1016 | 1017 | _packratEnabled = False 1018 | def enablePackrat(): 1019 | """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1020 | Repeated parse attempts at the same string location (which happens 1021 | often in many complex grammars) can immediately return a cached value, 1022 | instead of re-executing parsing/validating code. Memoizing is done of 1023 | both valid results and parsing exceptions. 1024 | 1025 | This speedup may break existing programs that use parse actions that 1026 | have side-effects. For this reason, packrat parsing is disabled when 1027 | you first import pyparsing. To activate the packrat feature, your 1028 | program must call the class method ParserElement.enablePackrat(). If 1029 | your program uses psyco to "compile as you go", you must call 1030 | enablePackrat before calling psyco.full(). If you do not do this, 1031 | Python will crash. For best results, call enablePackrat() immediately 1032 | after importing pyparsing. 1033 | """ 1034 | if not ParserElement._packratEnabled: 1035 | ParserElement._packratEnabled = True 1036 | ParserElement._parse = ParserElement._parseCache 1037 | enablePackrat = staticmethod(enablePackrat) 1038 | 1039 | def parseString( self, instring, parseAll=False ): 1040 | """Execute the parse expression with the given string. 1041 | This is the main interface to the client code, once the complete 1042 | expression has been built. 1043 | 1044 | If you want the grammar to require that the entire input string be 1045 | successfully parsed, then set parseAll to True (equivalent to ending 1046 | the grammar with StringEnd()). 1047 | 1048 | Note: parseString implicitly calls expandtabs() on the input string, 1049 | in order to report proper column numbers in parse actions. 1050 | If the input string contains tabs and 1051 | the grammar uses parse actions that use the loc argument to index into the 1052 | string being parsed, you can ensure you have a consistent view of the input 1053 | string by: 1054 | - calling parseWithTabs on your grammar before calling parseString 1055 | (see L{I{parseWithTabs}}) 1056 | - define your parse action using the full (s,loc,toks) signature, and 1057 | reference the input string using the parse action's s argument 1058 | - explictly expand the tabs in your input string before calling 1059 | parseString 1060 | """ 1061 | ParserElement.resetCache() 1062 | if not self.streamlined: 1063 | self.streamline() 1064 | #~ self.saveAsList = True 1065 | for e in self.ignoreExprs: 1066 | e.streamline() 1067 | if not self.keepTabs: 1068 | instring = instring.expandtabs() 1069 | try: 1070 | loc, tokens = self._parse( instring, 0 ) 1071 | if parseAll: 1072 | loc = self.preParse( instring, loc ) 1073 | StringEnd()._parse( instring, loc ) 1074 | except ParseBaseException, exc: 1075 | # catch and re-raise exception from here, clears out pyparsing internal stack trace 1076 | raise exc 1077 | else: 1078 | return tokens 1079 | 1080 | def scanString( self, instring, maxMatches=_MAX_INT ): 1081 | """Scan the input string for expression matches. Each match will return the 1082 | matching tokens, start location, and end location. May be called with optional 1083 | maxMatches argument, to clip scanning after 'n' matches are found. 1084 | 1085 | Note that the start and end locations are reported relative to the string 1086 | being parsed. See L{I{parseString}} for more information on parsing 1087 | strings with embedded tabs.""" 1088 | if not self.streamlined: 1089 | self.streamline() 1090 | for e in self.ignoreExprs: 1091 | e.streamline() 1092 | 1093 | if not self.keepTabs: 1094 | instring = _ustr(instring).expandtabs() 1095 | instrlen = len(instring) 1096 | loc = 0 1097 | preparseFn = self.preParse 1098 | parseFn = self._parse 1099 | ParserElement.resetCache() 1100 | matches = 0 1101 | try: 1102 | while loc <= instrlen and matches < maxMatches: 1103 | try: 1104 | preloc = preparseFn( instring, loc ) 1105 | nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1106 | except ParseException: 1107 | loc = preloc+1 1108 | else: 1109 | matches += 1 1110 | yield tokens, preloc, nextLoc 1111 | loc = nextLoc 1112 | except ParseBaseException, pe: 1113 | raise pe 1114 | 1115 | def transformString( self, instring ): 1116 | """Extension to scanString, to modify matching text with modified tokens that may 1117 | be returned from a parse action. To use transformString, define a grammar and 1118 | attach a parse action to it that modifies the returned token list. 1119 | Invoking transformString() on a target string will then scan for matches, 1120 | and replace the matched text patterns according to the logic in the parse 1121 | action. transformString() returns the resulting transformed string.""" 1122 | out = [] 1123 | lastE = 0 1124 | # force preservation of s, to minimize unwanted transformation of string, and to 1125 | # keep string locs straight between transformString and scanString 1126 | self.keepTabs = True 1127 | try: 1128 | for t,s,e in self.scanString( instring ): 1129 | out.append( instring[lastE:s] ) 1130 | if t: 1131 | if isinstance(t,ParseResults): 1132 | out += t.asList() 1133 | elif isinstance(t,list): 1134 | out += t 1135 | else: 1136 | out.append(t) 1137 | lastE = e 1138 | out.append(instring[lastE:]) 1139 | return "".join(map(_ustr,out)) 1140 | except ParseBaseException, pe: 1141 | raise pe 1142 | 1143 | def searchString( self, instring, maxMatches=_MAX_INT ): 1144 | """Another extension to scanString, simplifying the access to the tokens found 1145 | to match the given parse expression. May be called with optional 1146 | maxMatches argument, to clip searching after 'n' matches are found. 1147 | """ 1148 | try: 1149 | return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1150 | except ParseBaseException, pe: 1151 | raise pe 1152 | 1153 | def __add__(self, other ): 1154 | """Implementation of + operator - returns And""" 1155 | if isinstance( other, basestring ): 1156 | other = Literal( other ) 1157 | if not isinstance( other, ParserElement ): 1158 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1159 | SyntaxWarning, stacklevel=2) 1160 | return None 1161 | return And( [ self, other ] ) 1162 | 1163 | def __radd__(self, other ): 1164 | """Implementation of + operator when left operand is not a ParserElement""" 1165 | if isinstance( other, basestring ): 1166 | other = Literal( other ) 1167 | if not isinstance( other, ParserElement ): 1168 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1169 | SyntaxWarning, stacklevel=2) 1170 | return None 1171 | return other + self 1172 | 1173 | def __sub__(self, other): 1174 | """Implementation of - operator, returns And with error stop""" 1175 | if isinstance( other, basestring ): 1176 | other = Literal( other ) 1177 | if not isinstance( other, ParserElement ): 1178 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1179 | SyntaxWarning, stacklevel=2) 1180 | return None 1181 | return And( [ self, And._ErrorStop(), other ] ) 1182 | 1183 | def __rsub__(self, other ): 1184 | """Implementation of - operator when left operand is not a ParserElement""" 1185 | if isinstance( other, basestring ): 1186 | other = Literal( other ) 1187 | if not isinstance( other, ParserElement ): 1188 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1189 | SyntaxWarning, stacklevel=2) 1190 | return None 1191 | return other - self 1192 | 1193 | def __mul__(self,other): 1194 | if isinstance(other,int): 1195 | minElements, optElements = other,0 1196 | elif isinstance(other,tuple): 1197 | other = (other + (None, None))[:2] 1198 | if other[0] is None: 1199 | other = (0, other[1]) 1200 | if isinstance(other[0],int) and other[1] is None: 1201 | if other[0] == 0: 1202 | return ZeroOrMore(self) 1203 | if other[0] == 1: 1204 | return OneOrMore(self) 1205 | else: 1206 | return self*other[0] + ZeroOrMore(self) 1207 | elif isinstance(other[0],int) and isinstance(other[1],int): 1208 | minElements, optElements = other 1209 | optElements -= minElements 1210 | else: 1211 | raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1212 | else: 1213 | raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1214 | 1215 | if minElements < 0: 1216 | raise ValueError("cannot multiply ParserElement by negative value") 1217 | if optElements < 0: 1218 | raise ValueError("second tuple value must be greater or equal to first tuple value") 1219 | if minElements == optElements == 0: 1220 | raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1221 | 1222 | if (optElements): 1223 | def makeOptionalList(n): 1224 | if n>1: 1225 | return Optional(self + makeOptionalList(n-1)) 1226 | else: 1227 | return Optional(self) 1228 | if minElements: 1229 | if minElements == 1: 1230 | ret = self + makeOptionalList(optElements) 1231 | else: 1232 | ret = And([self]*minElements) + makeOptionalList(optElements) 1233 | else: 1234 | ret = makeOptionalList(optElements) 1235 | else: 1236 | if minElements == 1: 1237 | ret = self 1238 | else: 1239 | ret = And([self]*minElements) 1240 | return ret 1241 | 1242 | def __rmul__(self, other): 1243 | return self.__mul__(other) 1244 | 1245 | def __or__(self, other ): 1246 | """Implementation of | operator - returns MatchFirst""" 1247 | if isinstance( other, basestring ): 1248 | other = Literal( other ) 1249 | if not isinstance( other, ParserElement ): 1250 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1251 | SyntaxWarning, stacklevel=2) 1252 | return None 1253 | return MatchFirst( [ self, other ] ) 1254 | 1255 | def __ror__(self, other ): 1256 | """Implementation of | operator when left operand is not a ParserElement""" 1257 | if isinstance( other, basestring ): 1258 | other = Literal( other ) 1259 | if not isinstance( other, ParserElement ): 1260 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1261 | SyntaxWarning, stacklevel=2) 1262 | return None 1263 | return other | self 1264 | 1265 | def __xor__(self, other ): 1266 | """Implementation of ^ operator - returns Or""" 1267 | if isinstance( other, basestring ): 1268 | other = Literal( other ) 1269 | if not isinstance( other, ParserElement ): 1270 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1271 | SyntaxWarning, stacklevel=2) 1272 | return None 1273 | return Or( [ self, other ] ) 1274 | 1275 | def __rxor__(self, other ): 1276 | """Implementation of ^ operator when left operand is not a ParserElement""" 1277 | if isinstance( other, basestring ): 1278 | other = Literal( other ) 1279 | if not isinstance( other, ParserElement ): 1280 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1281 | SyntaxWarning, stacklevel=2) 1282 | return None 1283 | return other ^ self 1284 | 1285 | def __and__(self, other ): 1286 | """Implementation of & operator - returns Each""" 1287 | if isinstance( other, basestring ): 1288 | other = Literal( other ) 1289 | if not isinstance( other, ParserElement ): 1290 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1291 | SyntaxWarning, stacklevel=2) 1292 | return None 1293 | return Each( [ self, other ] ) 1294 | 1295 | def __rand__(self, other ): 1296 | """Implementation of & operator when left operand is not a ParserElement""" 1297 | if isinstance( other, basestring ): 1298 | other = Literal( other ) 1299 | if not isinstance( other, ParserElement ): 1300 | warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1301 | SyntaxWarning, stacklevel=2) 1302 | return None 1303 | return other & self 1304 | 1305 | def __invert__( self ): 1306 | """Implementation of ~ operator - returns NotAny""" 1307 | return NotAny( self ) 1308 | 1309 | def __call__(self, name): 1310 | """Shortcut for setResultsName, with listAllMatches=default:: 1311 | userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1312 | could be written as:: 1313 | userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1314 | """ 1315 | return self.setResultsName(name) 1316 | 1317 | def suppress( self ): 1318 | """Suppresses the output of this ParserElement; useful to keep punctuation from 1319 | cluttering up returned output. 1320 | """ 1321 | return Suppress( self ) 1322 | 1323 | def leaveWhitespace( self ): 1324 | """Disables the skipping of whitespace before matching the characters in the 1325 | ParserElement's defined pattern. This is normally only used internally by 1326 | the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1327 | """ 1328 | self.skipWhitespace = False 1329 | return self 1330 | 1331 | def setWhitespaceChars( self, chars ): 1332 | """Overrides the default whitespace chars 1333 | """ 1334 | self.skipWhitespace = True 1335 | self.whiteChars = chars 1336 | self.copyDefaultWhiteChars = False 1337 | return self 1338 | 1339 | def parseWithTabs( self ): 1340 | """Overrides default behavior to expand s to spaces before parsing the input string. 1341 | Must be called before parseString when the input grammar contains elements that 1342 | match characters.""" 1343 | self.keepTabs = True 1344 | return self 1345 | 1346 | def ignore( self, other ): 1347 | """Define expression to be ignored (e.g., comments) while doing pattern 1348 | matching; may be called repeatedly, to define multiple comment or other 1349 | ignorable patterns. 1350 | """ 1351 | if isinstance( other, Suppress ): 1352 | if other not in self.ignoreExprs: 1353 | self.ignoreExprs.append( other ) 1354 | else: 1355 | self.ignoreExprs.append( Suppress( other ) ) 1356 | return self 1357 | 1358 | def setDebugActions( self, startAction, successAction, exceptionAction ): 1359 | """Enable display of debugging messages while doing pattern matching.""" 1360 | self.debugActions = (startAction or _defaultStartDebugAction, 1361 | successAction or _defaultSuccessDebugAction, 1362 | exceptionAction or _defaultExceptionDebugAction) 1363 | self.debug = True 1364 | return self 1365 | 1366 | def setDebug( self, flag=True ): 1367 | """Enable display of debugging messages while doing pattern matching. 1368 | Set flag to True to enable, False to disable.""" 1369 | if flag: 1370 | self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1371 | else: 1372 | self.debug = False 1373 | return self 1374 | 1375 | def __str__( self ): 1376 | return self.name 1377 | 1378 | def __repr__( self ): 1379 | return _ustr(self) 1380 | 1381 | def streamline( self ): 1382 | self.streamlined = True 1383 | self.strRepr = None 1384 | return self 1385 | 1386 | def checkRecursion( self, parseElementList ): 1387 | pass 1388 | 1389 | def validate( self, validateTrace=[] ): 1390 | """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1391 | self.checkRecursion( [] ) 1392 | 1393 | def parseFile( self, file_or_filename, parseAll=False ): 1394 | """Execute the parse expression on the given file or filename. 1395 | If a filename is specified (instead of a file object), 1396 | the entire file is opened, read, and closed before parsing. 1397 | """ 1398 | try: 1399 | file_contents = file_or_filename.read() 1400 | except AttributeError: 1401 | f = open(file_or_filename, "rb") 1402 | file_contents = f.read() 1403 | f.close() 1404 | try: 1405 | return self.parseString(file_contents, parseAll) 1406 | except ParseBaseException, exc: 1407 | # catch and re-raise exception from here, clears out pyparsing internal stack trace 1408 | raise exc 1409 | 1410 | def getException(self): 1411 | return ParseException("",0,self.errmsg,self) 1412 | 1413 | def __getattr__(self,aname): 1414 | if aname == "myException": 1415 | self.myException = ret = self.getException(); 1416 | return ret; 1417 | else: 1418 | raise AttributeError("no such attribute " + aname) 1419 | 1420 | def __eq__(self,other): 1421 | if isinstance(other, ParserElement): 1422 | return self is other or self.__dict__ == other.__dict__ 1423 | elif isinstance(other, basestring): 1424 | try: 1425 | self.parseString(_ustr(other), parseAll=True) 1426 | return True 1427 | except ParseBaseException: 1428 | return False 1429 | else: 1430 | return super(ParserElement,self)==other 1431 | 1432 | def __ne__(self,other): 1433 | return not (self == other) 1434 | 1435 | def __hash__(self): 1436 | return hash(id(self)) 1437 | 1438 | def __req__(self,other): 1439 | return self == other 1440 | 1441 | def __rne__(self,other): 1442 | return not (self == other) 1443 | 1444 | 1445 | class Token(ParserElement): 1446 | """Abstract ParserElement subclass, for defining atomic matching patterns.""" 1447 | def __init__( self ): 1448 | super(Token,self).__init__( savelist=False ) 1449 | #self.myException = ParseException("",0,"",self) 1450 | 1451 | def setName(self, name): 1452 | s = super(Token,self).setName(name) 1453 | self.errmsg = "Expected " + self.name 1454 | #s.myException.msg = self.errmsg 1455 | return s 1456 | 1457 | 1458 | class Empty(Token): 1459 | """An empty token, will always match.""" 1460 | def __init__( self ): 1461 | super(Empty,self).__init__() 1462 | self.name = "Empty" 1463 | self.mayReturnEmpty = True 1464 | self.mayIndexError = False 1465 | 1466 | 1467 | class NoMatch(Token): 1468 | """A token that will never match.""" 1469 | def __init__( self ): 1470 | super(NoMatch,self).__init__() 1471 | self.name = "NoMatch" 1472 | self.mayReturnEmpty = True 1473 | self.mayIndexError = False 1474 | self.errmsg = "Unmatchable token" 1475 | #self.myException.msg = self.errmsg 1476 | 1477 | def parseImpl( self, instring, loc, doActions=True ): 1478 | exc = self.myException 1479 | exc.loc = loc 1480 | exc.pstr = instring 1481 | raise exc 1482 | 1483 | 1484 | class Literal(Token): 1485 | """Token to exactly match a specified string.""" 1486 | def __init__( self, matchString ): 1487 | super(Literal,self).__init__() 1488 | self.match = matchString 1489 | self.matchLen = len(matchString) 1490 | try: 1491 | self.firstMatchChar = matchString[0] 1492 | except IndexError: 1493 | warnings.warn("null string passed to Literal; use Empty() instead", 1494 | SyntaxWarning, stacklevel=2) 1495 | self.__class__ = Empty 1496 | self.name = '"%s"' % _ustr(self.match) 1497 | self.errmsg = "Expected " + self.name 1498 | self.mayReturnEmpty = False 1499 | #self.myException.msg = self.errmsg 1500 | self.mayIndexError = False 1501 | 1502 | # Performance tuning: this routine gets called a *lot* 1503 | # if this is a single character match string and the first character matches, 1504 | # short-circuit as quickly as possible, and avoid calling startswith 1505 | #~ @profile 1506 | def parseImpl( self, instring, loc, doActions=True ): 1507 | if (instring[loc] == self.firstMatchChar and 1508 | (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1509 | return loc+self.matchLen, self.match 1510 | #~ raise ParseException( instring, loc, self.errmsg ) 1511 | exc = self.myException 1512 | exc.loc = loc 1513 | exc.pstr = instring 1514 | raise exc 1515 | _L = Literal 1516 | 1517 | class Keyword(Token): 1518 | """Token to exactly match a specified string as a keyword, that is, it must be 1519 | immediately followed by a non-keyword character. Compare with Literal:: 1520 | Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 1521 | Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 1522 | Accepts two optional constructor arguments in addition to the keyword string: 1523 | identChars is a string of characters that would be valid identifier characters, 1524 | defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive 1525 | matching, default is False. 1526 | """ 1527 | DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1528 | 1529 | def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ): 1530 | super(Keyword,self).__init__() 1531 | self.match = matchString 1532 | self.matchLen = len(matchString) 1533 | try: 1534 | self.firstMatchChar = matchString[0] 1535 | except IndexError: 1536 | warnings.warn("null string passed to Keyword; use Empty() instead", 1537 | SyntaxWarning, stacklevel=2) 1538 | self.name = '"%s"' % self.match 1539 | self.errmsg = "Expected " + self.name 1540 | self.mayReturnEmpty = False 1541 | #self.myException.msg = self.errmsg 1542 | self.mayIndexError = False 1543 | self.caseless = caseless 1544 | if caseless: 1545 | self.caselessmatch = matchString.upper() 1546 | identChars = identChars.upper() 1547 | self.identChars = _str2dict(identChars) 1548 | 1549 | def parseImpl( self, instring, loc, doActions=True ): 1550 | if self.caseless: 1551 | if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1552 | (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1553 | (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1554 | return loc+self.matchLen, self.match 1555 | else: 1556 | if (instring[loc] == self.firstMatchChar and 1557 | (self.matchLen==1 or instring.startswith(self.match,loc)) and 1558 | (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1559 | (loc == 0 or instring[loc-1] not in self.identChars) ): 1560 | return loc+self.matchLen, self.match 1561 | #~ raise ParseException( instring, loc, self.errmsg ) 1562 | exc = self.myException 1563 | exc.loc = loc 1564 | exc.pstr = instring 1565 | raise exc 1566 | 1567 | def copy(self): 1568 | c = super(Keyword,self).copy() 1569 | c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1570 | return c 1571 | 1572 | def setDefaultKeywordChars( chars ): 1573 | """Overrides the default Keyword chars 1574 | """ 1575 | Keyword.DEFAULT_KEYWORD_CHARS = chars 1576 | setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) 1577 | 1578 | class CaselessLiteral(Literal): 1579 | """Token to match a specified string, ignoring case of letters. 1580 | Note: the matched results will always be in the case of the given 1581 | match string, NOT the case of the input text. 1582 | """ 1583 | def __init__( self, matchString ): 1584 | super(CaselessLiteral,self).__init__( matchString.upper() ) 1585 | # Preserve the defining literal. 1586 | self.returnString = matchString 1587 | self.name = "'%s'" % self.returnString 1588 | self.errmsg = "Expected " + self.name 1589 | #self.myException.msg = self.errmsg 1590 | 1591 | def parseImpl( self, instring, loc, doActions=True ): 1592 | if instring[ loc:loc+self.matchLen ].upper() == self.match: 1593 | return loc+self.matchLen, self.returnString 1594 | #~ raise ParseException( instring, loc, self.errmsg ) 1595 | exc = self.myException 1596 | exc.loc = loc 1597 | exc.pstr = instring 1598 | raise exc 1599 | 1600 | class CaselessKeyword(Keyword): 1601 | def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ): 1602 | super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True ) 1603 | 1604 | def parseImpl( self, instring, loc, doActions=True ): 1605 | if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1606 | (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1607 | return loc+self.matchLen, self.match 1608 | #~ raise ParseException( instring, loc, self.errmsg ) 1609 | exc = self.myException 1610 | exc.loc = loc 1611 | exc.pstr = instring 1612 | raise exc 1613 | 1614 | class Word(Token): 1615 | """Token for matching words composed of allowed character sets. 1616 | Defined with string containing all allowed initial characters, 1617 | an optional string containing allowed body characters (if omitted, 1618 | defaults to the initial character set), and an optional minimum, 1619 | maximum, and/or exact length. The default value for min is 1 (a 1620 | minimum value < 1 is not valid); the default values for max and exact 1621 | are 0, meaning no maximum or exact length restriction. 1622 | """ 1623 | def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ): 1624 | super(Word,self).__init__() 1625 | self.initCharsOrig = initChars 1626 | self.initChars = _str2dict(initChars) 1627 | if bodyChars : 1628 | self.bodyCharsOrig = bodyChars 1629 | self.bodyChars = _str2dict(bodyChars) 1630 | else: 1631 | self.bodyCharsOrig = initChars 1632 | self.bodyChars = _str2dict(initChars) 1633 | 1634 | self.maxSpecified = max > 0 1635 | 1636 | if min < 1: 1637 | raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1638 | 1639 | self.minLen = min 1640 | 1641 | if max > 0: 1642 | self.maxLen = max 1643 | else: 1644 | self.maxLen = _MAX_INT 1645 | 1646 | if exact > 0: 1647 | self.maxLen = exact 1648 | self.minLen = exact 1649 | 1650 | self.name = _ustr(self) 1651 | self.errmsg = "Expected " + self.name 1652 | #self.myException.msg = self.errmsg 1653 | self.mayIndexError = False 1654 | self.asKeyword = asKeyword 1655 | 1656 | if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1657 | if self.bodyCharsOrig == self.initCharsOrig: 1658 | self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1659 | elif len(self.bodyCharsOrig) == 1: 1660 | self.reString = "%s[%s]*" % \ 1661 | (re.escape(self.initCharsOrig), 1662 | _escapeRegexRangeChars(self.bodyCharsOrig),) 1663 | else: 1664 | self.reString = "[%s][%s]*" % \ 1665 | (_escapeRegexRangeChars(self.initCharsOrig), 1666 | _escapeRegexRangeChars(self.bodyCharsOrig),) 1667 | if self.asKeyword: 1668 | self.reString = r"\b"+self.reString+r"\b" 1669 | try: 1670 | self.re = re.compile( self.reString ) 1671 | except: 1672 | self.re = None 1673 | 1674 | def parseImpl( self, instring, loc, doActions=True ): 1675 | if self.re: 1676 | result = self.re.match(instring,loc) 1677 | if not result: 1678 | exc = self.myException 1679 | exc.loc = loc 1680 | exc.pstr = instring 1681 | raise exc 1682 | 1683 | loc = result.end() 1684 | return loc,result.group() 1685 | 1686 | if not(instring[ loc ] in self.initChars): 1687 | #~ raise ParseException( instring, loc, self.errmsg ) 1688 | exc = self.myException 1689 | exc.loc = loc 1690 | exc.pstr = instring 1691 | raise exc 1692 | start = loc 1693 | loc += 1 1694 | instrlen = len(instring) 1695 | bodychars = self.bodyChars 1696 | maxloc = start + self.maxLen 1697 | maxloc = min( maxloc, instrlen ) 1698 | while loc < maxloc and instring[loc] in bodychars: 1699 | loc += 1 1700 | 1701 | throwException = False 1702 | if loc - start < self.minLen: 1703 | throwException = True 1704 | if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1705 | throwException = True 1706 | if self.asKeyword: 1707 | if (start>0 and instring[start-1] in bodychars) or (loc4: 1730 | return s[:4]+"..." 1731 | else: 1732 | return s 1733 | 1734 | if ( self.initCharsOrig != self.bodyCharsOrig ): 1735 | self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1736 | else: 1737 | self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1738 | 1739 | return self.strRepr 1740 | 1741 | 1742 | class Regex(Token): 1743 | """Token for matching strings that match a given regular expression. 1744 | Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1745 | """ 1746 | def __init__( self, pattern, flags=0): 1747 | """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 1748 | super(Regex,self).__init__() 1749 | 1750 | if len(pattern) == 0: 1751 | warnings.warn("null string passed to Regex; use Empty() instead", 1752 | SyntaxWarning, stacklevel=2) 1753 | 1754 | self.pattern = pattern 1755 | self.flags = flags 1756 | 1757 | try: 1758 | self.re = re.compile(self.pattern, self.flags) 1759 | self.reString = self.pattern 1760 | except sre_constants.error: 1761 | warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1762 | SyntaxWarning, stacklevel=2) 1763 | raise 1764 | 1765 | self.name = _ustr(self) 1766 | self.errmsg = "Expected " + self.name 1767 | #self.myException.msg = self.errmsg 1768 | self.mayIndexError = False 1769 | self.mayReturnEmpty = True 1770 | 1771 | def parseImpl( self, instring, loc, doActions=True ): 1772 | result = self.re.match(instring,loc) 1773 | if not result: 1774 | exc = self.myException 1775 | exc.loc = loc 1776 | exc.pstr = instring 1777 | raise exc 1778 | 1779 | loc = result.end() 1780 | d = result.groupdict() 1781 | ret = ParseResults(result.group()) 1782 | if d: 1783 | for k in d: 1784 | ret[k] = d[k] 1785 | return loc,ret 1786 | 1787 | def __str__( self ): 1788 | try: 1789 | return super(Regex,self).__str__() 1790 | except: 1791 | pass 1792 | 1793 | if self.strRepr is None: 1794 | self.strRepr = "Re:(%s)" % repr(self.pattern) 1795 | 1796 | return self.strRepr 1797 | 1798 | 1799 | class QuotedString(Token): 1800 | """Token for matching strings that are delimited by quoting characters. 1801 | """ 1802 | def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): 1803 | """ 1804 | Defined with the following parameters: 1805 | - quoteChar - string of one or more characters defining the quote delimiting string 1806 | - escChar - character to escape quotes, typically backslash (default=None) 1807 | - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1808 | - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1809 | - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1810 | - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1811 | """ 1812 | super(QuotedString,self).__init__() 1813 | 1814 | # remove white space from quote chars - wont work anyway 1815 | quoteChar = quoteChar.strip() 1816 | if len(quoteChar) == 0: 1817 | warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1818 | raise SyntaxError() 1819 | 1820 | if endQuoteChar is None: 1821 | endQuoteChar = quoteChar 1822 | else: 1823 | endQuoteChar = endQuoteChar.strip() 1824 | if len(endQuoteChar) == 0: 1825 | warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1826 | raise SyntaxError() 1827 | 1828 | self.quoteChar = quoteChar 1829 | self.quoteCharLen = len(quoteChar) 1830 | self.firstQuoteChar = quoteChar[0] 1831 | self.endQuoteChar = endQuoteChar 1832 | self.endQuoteCharLen = len(endQuoteChar) 1833 | self.escChar = escChar 1834 | self.escQuote = escQuote 1835 | self.unquoteResults = unquoteResults 1836 | 1837 | if multiline: 1838 | self.flags = re.MULTILINE | re.DOTALL 1839 | self.pattern = r'%s(?:[^%s%s]' % \ 1840 | ( re.escape(self.quoteChar), 1841 | _escapeRegexRangeChars(self.endQuoteChar[0]), 1842 | (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1843 | else: 1844 | self.flags = 0 1845 | self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1846 | ( re.escape(self.quoteChar), 1847 | _escapeRegexRangeChars(self.endQuoteChar[0]), 1848 | (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1849 | if len(self.endQuoteChar) > 1: 1850 | self.pattern += ( 1851 | '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1852 | _escapeRegexRangeChars(self.endQuoteChar[i])) 1853 | for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1854 | ) 1855 | if escQuote: 1856 | self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1857 | if escChar: 1858 | self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1859 | self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1860 | self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1861 | 1862 | try: 1863 | self.re = re.compile(self.pattern, self.flags) 1864 | self.reString = self.pattern 1865 | except sre_constants.error: 1866 | warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1867 | SyntaxWarning, stacklevel=2) 1868 | raise 1869 | 1870 | self.name = _ustr(self) 1871 | self.errmsg = "Expected " + self.name 1872 | #self.myException.msg = self.errmsg 1873 | self.mayIndexError = False 1874 | self.mayReturnEmpty = True 1875 | 1876 | def parseImpl( self, instring, loc, doActions=True ): 1877 | result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1878 | if not result: 1879 | exc = self.myException 1880 | exc.loc = loc 1881 | exc.pstr = instring 1882 | raise exc 1883 | 1884 | loc = result.end() 1885 | ret = result.group() 1886 | 1887 | if self.unquoteResults: 1888 | 1889 | # strip off quotes 1890 | ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1891 | 1892 | if isinstance(ret,basestring): 1893 | # replace escaped characters 1894 | if self.escChar: 1895 | ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1896 | 1897 | # replace escaped quotes 1898 | if self.escQuote: 1899 | ret = ret.replace(self.escQuote, self.endQuoteChar) 1900 | 1901 | return loc, ret 1902 | 1903 | def __str__( self ): 1904 | try: 1905 | return super(QuotedString,self).__str__() 1906 | except: 1907 | pass 1908 | 1909 | if self.strRepr is None: 1910 | self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1911 | 1912 | return self.strRepr 1913 | 1914 | 1915 | class CharsNotIn(Token): 1916 | """Token for matching words composed of characters *not* in a given set. 1917 | Defined with string containing all disallowed characters, and an optional 1918 | minimum, maximum, and/or exact length. The default value for min is 1 (a 1919 | minimum value < 1 is not valid); the default values for max and exact 1920 | are 0, meaning no maximum or exact length restriction. 1921 | """ 1922 | def __init__( self, notChars, min=1, max=0, exact=0 ): 1923 | super(CharsNotIn,self).__init__() 1924 | self.skipWhitespace = False 1925 | self.notChars = notChars 1926 | 1927 | if min < 1: 1928 | raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1929 | 1930 | self.minLen = min 1931 | 1932 | if max > 0: 1933 | self.maxLen = max 1934 | else: 1935 | self.maxLen = _MAX_INT 1936 | 1937 | if exact > 0: 1938 | self.maxLen = exact 1939 | self.minLen = exact 1940 | 1941 | self.name = _ustr(self) 1942 | self.errmsg = "Expected " + self.name 1943 | self.mayReturnEmpty = ( self.minLen == 0 ) 1944 | #self.myException.msg = self.errmsg 1945 | self.mayIndexError = False 1946 | 1947 | def parseImpl( self, instring, loc, doActions=True ): 1948 | if instring[loc] in self.notChars: 1949 | #~ raise ParseException( instring, loc, self.errmsg ) 1950 | exc = self.myException 1951 | exc.loc = loc 1952 | exc.pstr = instring 1953 | raise exc 1954 | 1955 | start = loc 1956 | loc += 1 1957 | notchars = self.notChars 1958 | maxlen = min( start+self.maxLen, len(instring) ) 1959 | while loc < maxlen and \ 1960 | (instring[loc] not in notchars): 1961 | loc += 1 1962 | 1963 | if loc - start < self.minLen: 1964 | #~ raise ParseException( instring, loc, self.errmsg ) 1965 | exc = self.myException 1966 | exc.loc = loc 1967 | exc.pstr = instring 1968 | raise exc 1969 | 1970 | return loc, instring[start:loc] 1971 | 1972 | def __str__( self ): 1973 | try: 1974 | return super(CharsNotIn, self).__str__() 1975 | except: 1976 | pass 1977 | 1978 | if self.strRepr is None: 1979 | if len(self.notChars) > 4: 1980 | self.strRepr = "!W:(%s...)" % self.notChars[:4] 1981 | else: 1982 | self.strRepr = "!W:(%s)" % self.notChars 1983 | 1984 | return self.strRepr 1985 | 1986 | class White(Token): 1987 | """Special matching class for matching whitespace. Normally, whitespace is ignored 1988 | by pyparsing grammars. This class is included when some whitespace structures 1989 | are significant. Define with a string containing the whitespace characters to be 1990 | matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments, 1991 | as defined for the Word class.""" 1992 | whiteStrs = { 1993 | " " : "", 1994 | "\t": "", 1995 | "\n": "", 1996 | "\r": "", 1997 | "\f": "", 1998 | } 1999 | def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 2000 | super(White,self).__init__() 2001 | self.matchWhite = ws 2002 | self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 2003 | #~ self.leaveWhitespace() 2004 | self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 2005 | self.mayReturnEmpty = True 2006 | self.errmsg = "Expected " + self.name 2007 | #self.myException.msg = self.errmsg 2008 | 2009 | self.minLen = min 2010 | 2011 | if max > 0: 2012 | self.maxLen = max 2013 | else: 2014 | self.maxLen = _MAX_INT 2015 | 2016 | if exact > 0: 2017 | self.maxLen = exact 2018 | self.minLen = exact 2019 | 2020 | def parseImpl( self, instring, loc, doActions=True ): 2021 | if not(instring[ loc ] in self.matchWhite): 2022 | #~ raise ParseException( instring, loc, self.errmsg ) 2023 | exc = self.myException 2024 | exc.loc = loc 2025 | exc.pstr = instring 2026 | raise exc 2027 | start = loc 2028 | loc += 1 2029 | maxloc = start + self.maxLen 2030 | maxloc = min( maxloc, len(instring) ) 2031 | while loc < maxloc and instring[loc] in self.matchWhite: 2032 | loc += 1 2033 | 2034 | if loc - start < self.minLen: 2035 | #~ raise ParseException( instring, loc, self.errmsg ) 2036 | exc = self.myException 2037 | exc.loc = loc 2038 | exc.pstr = instring 2039 | raise exc 2040 | 2041 | return loc, instring[start:loc] 2042 | 2043 | 2044 | class _PositionToken(Token): 2045 | def __init__( self ): 2046 | super(_PositionToken,self).__init__() 2047 | self.name=self.__class__.__name__ 2048 | self.mayReturnEmpty = True 2049 | self.mayIndexError = False 2050 | 2051 | class GoToColumn(_PositionToken): 2052 | """Token to advance to a specific column of input text; useful for tabular report scraping.""" 2053 | def __init__( self, colno ): 2054 | super(GoToColumn,self).__init__() 2055 | self.col = colno 2056 | 2057 | def preParse( self, instring, loc ): 2058 | if col(loc,instring) != self.col: 2059 | instrlen = len(instring) 2060 | if self.ignoreExprs: 2061 | loc = self._skipIgnorables( instring, loc ) 2062 | while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2063 | loc += 1 2064 | return loc 2065 | 2066 | def parseImpl( self, instring, loc, doActions=True ): 2067 | thiscol = col( loc, instring ) 2068 | if thiscol > self.col: 2069 | raise ParseException( instring, loc, "Text not in expected column", self ) 2070 | newloc = loc + self.col - thiscol 2071 | ret = instring[ loc: newloc ] 2072 | return newloc, ret 2073 | 2074 | class LineStart(_PositionToken): 2075 | """Matches if current position is at the beginning of a line within the parse string""" 2076 | def __init__( self ): 2077 | super(LineStart,self).__init__() 2078 | self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2079 | self.errmsg = "Expected start of line" 2080 | #self.myException.msg = self.errmsg 2081 | 2082 | def preParse( self, instring, loc ): 2083 | preloc = super(LineStart,self).preParse(instring,loc) 2084 | if instring[preloc] == "\n": 2085 | loc += 1 2086 | return loc 2087 | 2088 | def parseImpl( self, instring, loc, doActions=True ): 2089 | if not( loc==0 or 2090 | (loc == self.preParse( instring, 0 )) or 2091 | (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2092 | #~ raise ParseException( instring, loc, "Expected start of line" ) 2093 | exc = self.myException 2094 | exc.loc = loc 2095 | exc.pstr = instring 2096 | raise exc 2097 | return loc, [] 2098 | 2099 | class LineEnd(_PositionToken): 2100 | """Matches if current position is at the end of a line within the parse string""" 2101 | def __init__( self ): 2102 | super(LineEnd,self).__init__() 2103 | self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2104 | self.errmsg = "Expected end of line" 2105 | #self.myException.msg = self.errmsg 2106 | 2107 | def parseImpl( self, instring, loc, doActions=True ): 2108 | if loc len(instring): 2160 | return loc, [] 2161 | else: 2162 | exc = self.myException 2163 | exc.loc = loc 2164 | exc.pstr = instring 2165 | raise exc 2166 | 2167 | class WordStart(_PositionToken): 2168 | """Matches if the current position is at the beginning of a Word, and 2169 | is not preceded by any character in a given set of wordChars 2170 | (default=printables). To emulate the \b behavior of regular expressions, 2171 | use WordStart(alphanums). WordStart will also match at the beginning of 2172 | the string being parsed, or at the beginning of a line. 2173 | """ 2174 | def __init__(self, wordChars = printables): 2175 | super(WordStart,self).__init__() 2176 | self.wordChars = _str2dict(wordChars) 2177 | self.errmsg = "Not at the start of a word" 2178 | 2179 | def parseImpl(self, instring, loc, doActions=True ): 2180 | if loc != 0: 2181 | if (instring[loc-1] in self.wordChars or 2182 | instring[loc] not in self.wordChars): 2183 | exc = self.myException 2184 | exc.loc = loc 2185 | exc.pstr = instring 2186 | raise exc 2187 | return loc, [] 2188 | 2189 | class WordEnd(_PositionToken): 2190 | """Matches if the current position is at the end of a Word, and 2191 | is not followed by any character in a given set of wordChars 2192 | (default=printables). To emulate the \b behavior of regular expressions, 2193 | use WordEnd(alphanums). WordEnd will also match at the end of 2194 | the string being parsed, or at the end of a line. 2195 | """ 2196 | def __init__(self, wordChars = printables): 2197 | super(WordEnd,self).__init__() 2198 | self.wordChars = _str2dict(wordChars) 2199 | self.skipWhitespace = False 2200 | self.errmsg = "Not at the end of a word" 2201 | 2202 | def parseImpl(self, instring, loc, doActions=True ): 2203 | instrlen = len(instring) 2204 | if instrlen>0 and loc maxExcLoc: 2401 | maxException = err 2402 | maxExcLoc = err.loc 2403 | except IndexError: 2404 | if len(instring) > maxExcLoc: 2405 | maxException = ParseException(instring,len(instring),e.errmsg,self) 2406 | maxExcLoc = len(instring) 2407 | else: 2408 | if loc2 > maxMatchLoc: 2409 | maxMatchLoc = loc2 2410 | maxMatchExp = e 2411 | 2412 | if maxMatchLoc < 0: 2413 | if maxException is not None: 2414 | raise maxException 2415 | else: 2416 | raise ParseException(instring, loc, "no defined alternatives to match", self) 2417 | 2418 | return maxMatchExp._parse( instring, loc, doActions ) 2419 | 2420 | def __ixor__(self, other ): 2421 | if isinstance( other, basestring ): 2422 | other = Literal( other ) 2423 | return self.append( other ) #Or( [ self, other ] ) 2424 | 2425 | def __str__( self ): 2426 | if hasattr(self,"name"): 2427 | return self.name 2428 | 2429 | if self.strRepr is None: 2430 | self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2431 | 2432 | return self.strRepr 2433 | 2434 | def checkRecursion( self, parseElementList ): 2435 | subRecCheckList = parseElementList[:] + [ self ] 2436 | for e in self.exprs: 2437 | e.checkRecursion( subRecCheckList ) 2438 | 2439 | 2440 | class MatchFirst(ParseExpression): 2441 | """Requires that at least one ParseExpression is found. 2442 | If two expressions match, the first one listed is the one that will match. 2443 | May be constructed using the '|' operator. 2444 | """ 2445 | def __init__( self, exprs, savelist = False ): 2446 | super(MatchFirst,self).__init__(exprs, savelist) 2447 | if exprs: 2448 | self.mayReturnEmpty = False 2449 | for e in self.exprs: 2450 | if e.mayReturnEmpty: 2451 | self.mayReturnEmpty = True 2452 | break 2453 | else: 2454 | self.mayReturnEmpty = True 2455 | 2456 | def parseImpl( self, instring, loc, doActions=True ): 2457 | maxExcLoc = -1 2458 | maxException = None 2459 | for e in self.exprs: 2460 | try: 2461 | ret = e._parse( instring, loc, doActions ) 2462 | return ret 2463 | except ParseException, err: 2464 | if err.loc > maxExcLoc: 2465 | maxException = err 2466 | maxExcLoc = err.loc 2467 | except IndexError: 2468 | if len(instring) > maxExcLoc: 2469 | maxException = ParseException(instring,len(instring),e.errmsg,self) 2470 | maxExcLoc = len(instring) 2471 | 2472 | # only got here if no expression matched, raise exception for match that made it the furthest 2473 | else: 2474 | if maxException is not None: 2475 | raise maxException 2476 | else: 2477 | raise ParseException(instring, loc, "no defined alternatives to match", self) 2478 | 2479 | def __ior__(self, other ): 2480 | if isinstance( other, basestring ): 2481 | other = Literal( other ) 2482 | return self.append( other ) #MatchFirst( [ self, other ] ) 2483 | 2484 | def __str__( self ): 2485 | if hasattr(self,"name"): 2486 | return self.name 2487 | 2488 | if self.strRepr is None: 2489 | self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2490 | 2491 | return self.strRepr 2492 | 2493 | def checkRecursion( self, parseElementList ): 2494 | subRecCheckList = parseElementList[:] + [ self ] 2495 | for e in self.exprs: 2496 | e.checkRecursion( subRecCheckList ) 2497 | 2498 | 2499 | class Each(ParseExpression): 2500 | """Requires all given ParseExpressions to be found, but in any order. 2501 | Expressions may be separated by whitespace. 2502 | May be constructed using the '&' operator. 2503 | """ 2504 | def __init__( self, exprs, savelist = True ): 2505 | super(Each,self).__init__(exprs, savelist) 2506 | self.mayReturnEmpty = True 2507 | for e in self.exprs: 2508 | if not e.mayReturnEmpty: 2509 | self.mayReturnEmpty = False 2510 | break 2511 | self.skipWhitespace = True 2512 | self.initExprGroups = True 2513 | 2514 | def parseImpl( self, instring, loc, doActions=True ): 2515 | if self.initExprGroups: 2516 | self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2517 | self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2518 | self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2519 | self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2520 | self.required += self.multirequired 2521 | self.initExprGroups = False 2522 | tmpLoc = loc 2523 | tmpReqd = self.required[:] 2524 | tmpOpt = self.optionals[:] 2525 | matchOrder = [] 2526 | 2527 | keepMatching = True 2528 | while keepMatching: 2529 | tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2530 | failed = [] 2531 | for e in tmpExprs: 2532 | try: 2533 | tmpLoc = e.tryParse( instring, tmpLoc ) 2534 | except ParseException: 2535 | failed.append(e) 2536 | else: 2537 | matchOrder.append(e) 2538 | if e in tmpReqd: 2539 | tmpReqd.remove(e) 2540 | elif e in tmpOpt: 2541 | tmpOpt.remove(e) 2542 | if len(failed) == len(tmpExprs): 2543 | keepMatching = False 2544 | 2545 | if tmpReqd: 2546 | missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2547 | raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2548 | 2549 | # add any unmatched Optionals, in case they have default values defined 2550 | matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) 2551 | 2552 | resultlist = [] 2553 | for e in matchOrder: 2554 | loc,results = e._parse(instring,loc,doActions) 2555 | resultlist.append(results) 2556 | 2557 | finalResults = ParseResults([]) 2558 | for r in resultlist: 2559 | dups = {} 2560 | for k in r.keys(): 2561 | if k in finalResults.keys(): 2562 | tmp = ParseResults(finalResults[k]) 2563 | tmp += ParseResults(r[k]) 2564 | dups[k] = tmp 2565 | finalResults += ParseResults(r) 2566 | for k,v in dups.items(): 2567 | finalResults[k] = v 2568 | return loc, finalResults 2569 | 2570 | def __str__( self ): 2571 | if hasattr(self,"name"): 2572 | return self.name 2573 | 2574 | if self.strRepr is None: 2575 | self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2576 | 2577 | return self.strRepr 2578 | 2579 | def checkRecursion( self, parseElementList ): 2580 | subRecCheckList = parseElementList[:] + [ self ] 2581 | for e in self.exprs: 2582 | e.checkRecursion( subRecCheckList ) 2583 | 2584 | 2585 | class ParseElementEnhance(ParserElement): 2586 | """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 2587 | def __init__( self, expr, savelist=False ): 2588 | super(ParseElementEnhance,self).__init__(savelist) 2589 | if isinstance( expr, basestring ): 2590 | expr = Literal(expr) 2591 | self.expr = expr 2592 | self.strRepr = None 2593 | if expr is not None: 2594 | self.mayIndexError = expr.mayIndexError 2595 | self.mayReturnEmpty = expr.mayReturnEmpty 2596 | self.setWhitespaceChars( expr.whiteChars ) 2597 | self.skipWhitespace = expr.skipWhitespace 2598 | self.saveAsList = expr.saveAsList 2599 | self.callPreparse = expr.callPreparse 2600 | self.ignoreExprs.extend(expr.ignoreExprs) 2601 | 2602 | def parseImpl( self, instring, loc, doActions=True ): 2603 | if self.expr is not None: 2604 | return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2605 | else: 2606 | raise ParseException("",loc,self.errmsg,self) 2607 | 2608 | def leaveWhitespace( self ): 2609 | self.skipWhitespace = False 2610 | self.expr = self.expr.copy() 2611 | if self.expr is not None: 2612 | self.expr.leaveWhitespace() 2613 | return self 2614 | 2615 | def ignore( self, other ): 2616 | if isinstance( other, Suppress ): 2617 | if other not in self.ignoreExprs: 2618 | super( ParseElementEnhance, self).ignore( other ) 2619 | if self.expr is not None: 2620 | self.expr.ignore( self.ignoreExprs[-1] ) 2621 | else: 2622 | super( ParseElementEnhance, self).ignore( other ) 2623 | if self.expr is not None: 2624 | self.expr.ignore( self.ignoreExprs[-1] ) 2625 | return self 2626 | 2627 | def streamline( self ): 2628 | super(ParseElementEnhance,self).streamline() 2629 | if self.expr is not None: 2630 | self.expr.streamline() 2631 | return self 2632 | 2633 | def checkRecursion( self, parseElementList ): 2634 | if self in parseElementList: 2635 | raise RecursiveGrammarException( parseElementList+[self] ) 2636 | subRecCheckList = parseElementList[:] + [ self ] 2637 | if self.expr is not None: 2638 | self.expr.checkRecursion( subRecCheckList ) 2639 | 2640 | def validate( self, validateTrace=[] ): 2641 | tmp = validateTrace[:]+[self] 2642 | if self.expr is not None: 2643 | self.expr.validate(tmp) 2644 | self.checkRecursion( [] ) 2645 | 2646 | def __str__( self ): 2647 | try: 2648 | return super(ParseElementEnhance,self).__str__() 2649 | except: 2650 | pass 2651 | 2652 | if self.strRepr is None and self.expr is not None: 2653 | self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2654 | return self.strRepr 2655 | 2656 | 2657 | class FollowedBy(ParseElementEnhance): 2658 | """Lookahead matching of the given parse expression. FollowedBy 2659 | does *not* advance the parsing position within the input string, it only 2660 | verifies that the specified parse expression matches at the current 2661 | position. FollowedBy always returns a null token list.""" 2662 | def __init__( self, expr ): 2663 | super(FollowedBy,self).__init__(expr) 2664 | self.mayReturnEmpty = True 2665 | 2666 | def parseImpl( self, instring, loc, doActions=True ): 2667 | self.expr.tryParse( instring, loc ) 2668 | return loc, [] 2669 | 2670 | 2671 | class NotAny(ParseElementEnhance): 2672 | """Lookahead to disallow matching with the given parse expression. NotAny 2673 | does *not* advance the parsing position within the input string, it only 2674 | verifies that the specified parse expression does *not* match at the current 2675 | position. Also, NotAny does *not* skip over leading whitespace. NotAny 2676 | always returns a null token list. May be constructed using the '~' operator.""" 2677 | def __init__( self, expr ): 2678 | super(NotAny,self).__init__(expr) 2679 | #~ self.leaveWhitespace() 2680 | self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2681 | self.mayReturnEmpty = True 2682 | self.errmsg = "Found unwanted token, "+_ustr(self.expr) 2683 | #self.myException = ParseException("",0,self.errmsg,self) 2684 | 2685 | def parseImpl( self, instring, loc, doActions=True ): 2686 | try: 2687 | self.expr.tryParse( instring, loc ) 2688 | except (ParseException,IndexError): 2689 | pass 2690 | else: 2691 | #~ raise ParseException(instring, loc, self.errmsg ) 2692 | exc = self.myException 2693 | exc.loc = loc 2694 | exc.pstr = instring 2695 | raise exc 2696 | return loc, [] 2697 | 2698 | def __str__( self ): 2699 | if hasattr(self,"name"): 2700 | return self.name 2701 | 2702 | if self.strRepr is None: 2703 | self.strRepr = "~{" + _ustr(self.expr) + "}" 2704 | 2705 | return self.strRepr 2706 | 2707 | 2708 | class ZeroOrMore(ParseElementEnhance): 2709 | """Optional repetition of zero or more of the given expression.""" 2710 | def __init__( self, expr ): 2711 | super(ZeroOrMore,self).__init__(expr) 2712 | self.mayReturnEmpty = True 2713 | 2714 | def parseImpl( self, instring, loc, doActions=True ): 2715 | tokens = [] 2716 | try: 2717 | loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2718 | hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2719 | while 1: 2720 | if hasIgnoreExprs: 2721 | preloc = self._skipIgnorables( instring, loc ) 2722 | else: 2723 | preloc = loc 2724 | loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2725 | if tmptokens or tmptokens.keys(): 2726 | tokens += tmptokens 2727 | except (ParseException,IndexError): 2728 | pass 2729 | 2730 | return loc, tokens 2731 | 2732 | def __str__( self ): 2733 | if hasattr(self,"name"): 2734 | return self.name 2735 | 2736 | if self.strRepr is None: 2737 | self.strRepr = "[" + _ustr(self.expr) + "]..." 2738 | 2739 | return self.strRepr 2740 | 2741 | def setResultsName( self, name, listAllMatches=False ): 2742 | ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2743 | ret.saveAsList = True 2744 | return ret 2745 | 2746 | 2747 | class OneOrMore(ParseElementEnhance): 2748 | """Repetition of one or more of the given expression.""" 2749 | def parseImpl( self, instring, loc, doActions=True ): 2750 | # must be at least one 2751 | loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2752 | try: 2753 | hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2754 | while 1: 2755 | if hasIgnoreExprs: 2756 | preloc = self._skipIgnorables( instring, loc ) 2757 | else: 2758 | preloc = loc 2759 | loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2760 | if tmptokens or tmptokens.keys(): 2761 | tokens += tmptokens 2762 | except (ParseException,IndexError): 2763 | pass 2764 | 2765 | return loc, tokens 2766 | 2767 | def __str__( self ): 2768 | if hasattr(self,"name"): 2769 | return self.name 2770 | 2771 | if self.strRepr is None: 2772 | self.strRepr = "{" + _ustr(self.expr) + "}..." 2773 | 2774 | return self.strRepr 2775 | 2776 | def setResultsName( self, name, listAllMatches=False ): 2777 | ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2778 | ret.saveAsList = True 2779 | return ret 2780 | 2781 | class _NullToken(object): 2782 | def __bool__(self): 2783 | return False 2784 | __nonzero__ = __bool__ 2785 | def __str__(self): 2786 | return "" 2787 | 2788 | _optionalNotMatched = _NullToken() 2789 | class Optional(ParseElementEnhance): 2790 | """Optional matching of the given expression. 2791 | A default return string can also be specified, if the optional expression 2792 | is not found. 2793 | """ 2794 | def __init__( self, exprs, default=_optionalNotMatched ): 2795 | super(Optional,self).__init__( exprs, savelist=False ) 2796 | self.defaultValue = default 2797 | self.mayReturnEmpty = True 2798 | 2799 | def parseImpl( self, instring, loc, doActions=True ): 2800 | try: 2801 | loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2802 | except (ParseException,IndexError): 2803 | if self.defaultValue is not _optionalNotMatched: 2804 | if self.expr.resultsName: 2805 | tokens = ParseResults([ self.defaultValue ]) 2806 | tokens[self.expr.resultsName] = self.defaultValue 2807 | else: 2808 | tokens = [ self.defaultValue ] 2809 | else: 2810 | tokens = [] 2811 | return loc, tokens 2812 | 2813 | def __str__( self ): 2814 | if hasattr(self,"name"): 2815 | return self.name 2816 | 2817 | if self.strRepr is None: 2818 | self.strRepr = "[" + _ustr(self.expr) + "]" 2819 | 2820 | return self.strRepr 2821 | 2822 | 2823 | class SkipTo(ParseElementEnhance): 2824 | """Token for skipping over all undefined text until the matched expression is found. 2825 | If include is set to true, the matched expression is also parsed (the skipped text 2826 | and matched expression are returned as a 2-element list). The ignore 2827 | argument is used to define grammars (typically quoted strings and comments) that 2828 | might contain false matches. 2829 | """ 2830 | def __init__( self, other, include=False, ignore=None, failOn=None ): 2831 | super( SkipTo, self ).__init__( other ) 2832 | self.ignoreExpr = ignore 2833 | self.mayReturnEmpty = True 2834 | self.mayIndexError = False 2835 | self.includeMatch = include 2836 | self.asList = False 2837 | if failOn is not None and isinstance(failOn, basestring): 2838 | self.failOn = Literal(failOn) 2839 | else: 2840 | self.failOn = failOn 2841 | self.errmsg = "No match found for "+_ustr(self.expr) 2842 | #self.myException = ParseException("",0,self.errmsg,self) 2843 | 2844 | def parseImpl( self, instring, loc, doActions=True ): 2845 | startLoc = loc 2846 | instrlen = len(instring) 2847 | expr = self.expr 2848 | failParse = False 2849 | while loc <= instrlen: 2850 | try: 2851 | if self.failOn: 2852 | try: 2853 | self.failOn.tryParse(instring, loc) 2854 | except ParseBaseException: 2855 | pass 2856 | else: 2857 | failParse = True 2858 | raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2859 | failParse = False 2860 | if self.ignoreExpr is not None: 2861 | while 1: 2862 | try: 2863 | loc = self.ignoreExpr.tryParse(instring,loc) 2864 | print "found ignoreExpr, advance to", loc 2865 | except ParseBaseException: 2866 | break 2867 | expr._parse( instring, loc, doActions=False, callPreParse=False ) 2868 | skipText = instring[startLoc:loc] 2869 | if self.includeMatch: 2870 | loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2871 | if mat: 2872 | skipRes = ParseResults( skipText ) 2873 | skipRes += mat 2874 | return loc, [ skipRes ] 2875 | else: 2876 | return loc, [ skipText ] 2877 | else: 2878 | return loc, [ skipText ] 2879 | except (ParseException,IndexError): 2880 | if failParse: 2881 | raise 2882 | else: 2883 | loc += 1 2884 | exc = self.myException 2885 | exc.loc = loc 2886 | exc.pstr = instring 2887 | raise exc 2888 | 2889 | class Forward(ParseElementEnhance): 2890 | """Forward declaration of an expression to be defined later - 2891 | used for recursive grammars, such as algebraic infix notation. 2892 | When the expression is known, it is assigned to the Forward variable using the '<<' operator. 2893 | 2894 | Note: take care when assigning to Forward not to overlook precedence of operators. 2895 | Specifically, '|' has a lower precedence than '<<', so that:: 2896 | fwdExpr << a | b | c 2897 | will actually be evaluated as:: 2898 | (fwdExpr << a) | b | c 2899 | thereby leaving b and c out as parseable alternatives. It is recommended that you 2900 | explicitly group the values inserted into the Forward:: 2901 | fwdExpr << (a | b | c) 2902 | """ 2903 | def __init__( self, other=None ): 2904 | super(Forward,self).__init__( other, savelist=False ) 2905 | 2906 | def __lshift__( self, other ): 2907 | if isinstance( other, basestring ): 2908 | other = Literal(other) 2909 | self.expr = other 2910 | self.mayReturnEmpty = other.mayReturnEmpty 2911 | self.strRepr = None 2912 | self.mayIndexError = self.expr.mayIndexError 2913 | self.mayReturnEmpty = self.expr.mayReturnEmpty 2914 | self.setWhitespaceChars( self.expr.whiteChars ) 2915 | self.skipWhitespace = self.expr.skipWhitespace 2916 | self.saveAsList = self.expr.saveAsList 2917 | self.ignoreExprs.extend(self.expr.ignoreExprs) 2918 | return None 2919 | 2920 | def leaveWhitespace( self ): 2921 | self.skipWhitespace = False 2922 | return self 2923 | 2924 | def streamline( self ): 2925 | if not self.streamlined: 2926 | self.streamlined = True 2927 | if self.expr is not None: 2928 | self.expr.streamline() 2929 | return self 2930 | 2931 | def validate( self, validateTrace=[] ): 2932 | if self not in validateTrace: 2933 | tmp = validateTrace[:]+[self] 2934 | if self.expr is not None: 2935 | self.expr.validate(tmp) 2936 | self.checkRecursion([]) 2937 | 2938 | def __str__( self ): 2939 | if hasattr(self,"name"): 2940 | return self.name 2941 | 2942 | self._revertClass = self.__class__ 2943 | self.__class__ = _ForwardNoRecurse 2944 | try: 2945 | if self.expr is not None: 2946 | retString = _ustr(self.expr) 2947 | else: 2948 | retString = "None" 2949 | finally: 2950 | self.__class__ = self._revertClass 2951 | return self.__class__.__name__ + ": " + retString 2952 | 2953 | def copy(self): 2954 | if self.expr is not None: 2955 | return super(Forward,self).copy() 2956 | else: 2957 | ret = Forward() 2958 | ret << self 2959 | return ret 2960 | 2961 | class _ForwardNoRecurse(Forward): 2962 | def __str__( self ): 2963 | return "..." 2964 | 2965 | class TokenConverter(ParseElementEnhance): 2966 | """Abstract subclass of ParseExpression, for converting parsed results.""" 2967 | def __init__( self, expr, savelist=False ): 2968 | super(TokenConverter,self).__init__( expr )#, savelist ) 2969 | self.saveAsList = False 2970 | 2971 | class Upcase(TokenConverter): 2972 | """Converter to upper case all matching tokens.""" 2973 | def __init__(self, *args): 2974 | super(Upcase,self).__init__(*args) 2975 | warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 2976 | DeprecationWarning,stacklevel=2) 2977 | 2978 | def postParse( self, instring, loc, tokenlist ): 2979 | return list(map( string.upper, tokenlist )) 2980 | 2981 | 2982 | class Combine(TokenConverter): 2983 | """Converter to concatenate all matching tokens to a single string. 2984 | By default, the matching patterns must also be contiguous in the input string; 2985 | this can be disabled by specifying 'adjacent=False' in the constructor. 2986 | """ 2987 | def __init__( self, expr, joinString="", adjacent=True ): 2988 | super(Combine,self).__init__( expr ) 2989 | # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 2990 | if adjacent: 2991 | self.leaveWhitespace() 2992 | self.adjacent = adjacent 2993 | self.skipWhitespace = True 2994 | self.joinString = joinString 2995 | 2996 | def ignore( self, other ): 2997 | if self.adjacent: 2998 | ParserElement.ignore(self, other) 2999 | else: 3000 | super( Combine, self).ignore( other ) 3001 | return self 3002 | 3003 | def postParse( self, instring, loc, tokenlist ): 3004 | retToks = tokenlist.copy() 3005 | del retToks[:] 3006 | retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3007 | 3008 | if self.resultsName and len(retToks.keys())>0: 3009 | return [ retToks ] 3010 | else: 3011 | return retToks 3012 | 3013 | class Group(TokenConverter): 3014 | """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" 3015 | def __init__( self, expr ): 3016 | super(Group,self).__init__( expr ) 3017 | self.saveAsList = True 3018 | 3019 | def postParse( self, instring, loc, tokenlist ): 3020 | return [ tokenlist ] 3021 | 3022 | class Dict(TokenConverter): 3023 | """Converter to return a repetitive expression as a list, but also as a dictionary. 3024 | Each element can also be referenced using the first token in the expression as its key. 3025 | Useful for tabular report scraping when the first column can be used as a item key. 3026 | """ 3027 | def __init__( self, exprs ): 3028 | super(Dict,self).__init__( exprs ) 3029 | self.saveAsList = True 3030 | 3031 | def postParse( self, instring, loc, tokenlist ): 3032 | for i,tok in enumerate(tokenlist): 3033 | if len(tok) == 0: 3034 | continue 3035 | ikey = tok[0] 3036 | if isinstance(ikey,int): 3037 | ikey = _ustr(tok[0]).strip() 3038 | if len(tok)==1: 3039 | tokenlist[ikey] = _ParseResultsWithOffset("",i) 3040 | elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3041 | tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3042 | else: 3043 | dictvalue = tok.copy() #ParseResults(i) 3044 | del dictvalue[0] 3045 | if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 3046 | tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3047 | else: 3048 | tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3049 | 3050 | if self.resultsName: 3051 | return [ tokenlist ] 3052 | else: 3053 | return tokenlist 3054 | 3055 | 3056 | class Suppress(TokenConverter): 3057 | """Converter for ignoring the results of a parsed expression.""" 3058 | def postParse( self, instring, loc, tokenlist ): 3059 | return [] 3060 | 3061 | def suppress( self ): 3062 | return self 3063 | 3064 | 3065 | class OnlyOnce(object): 3066 | """Wrapper for parse actions, to ensure they are only called once.""" 3067 | def __init__(self, methodCall): 3068 | self.callable = ParserElement._normalizeParseActionArgs(methodCall) 3069 | self.called = False 3070 | def __call__(self,s,l,t): 3071 | if not self.called: 3072 | results = self.callable(s,l,t) 3073 | self.called = True 3074 | return results 3075 | raise ParseException(s,l,"") 3076 | def reset(self): 3077 | self.called = False 3078 | 3079 | def traceParseAction(f): 3080 | """Decorator for debugging parse actions.""" 3081 | f = ParserElement._normalizeParseActionArgs(f) 3082 | def z(*paArgs): 3083 | thisFunc = f.func_name 3084 | s,l,t = paArgs[-3:] 3085 | if len(paArgs)>3: 3086 | thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3087 | sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3088 | try: 3089 | ret = f(*paArgs) 3090 | except Exception, exc: 3091 | sys.stderr.write( "<", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3243 | try: 3244 | if len(symbols)==len("".join(symbols)): 3245 | return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3246 | else: 3247 | return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3248 | except: 3249 | warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3250 | SyntaxWarning, stacklevel=2) 3251 | 3252 | 3253 | # last resort, just use MatchFirst 3254 | return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) 3255 | 3256 | def dictOf( key, value ): 3257 | """Helper to easily and clearly define a dictionary by specifying the respective patterns 3258 | for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens 3259 | in the proper order. The key pattern can include delimiting markers or punctuation, 3260 | as long as they are suppressed, thereby leaving the significant key text. The value 3261 | pattern can include named results, so that the Dict results can include named token 3262 | fields. 3263 | """ 3264 | return Dict( ZeroOrMore( Group ( key + value ) ) ) 3265 | 3266 | def originalTextFor(expr, asString=True): 3267 | """Helper to return the original, untokenized text for a given expression. Useful to 3268 | restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3269 | revert separate tokens with intervening whitespace back to the original matching 3270 | input text. Simpler to use than the parse action keepOriginalText, and does not 3271 | require the inspect module to chase up the call stack. By default, returns a 3272 | string containing the original parsed text. 3273 | 3274 | If the optional asString argument is passed as False, then the return value is a 3275 | ParseResults containing any results names that were originally matched, and a 3276 | single token containing the original matched text from the input string. So if 3277 | the expression passed to originalTextFor contains expressions with defined 3278 | results names, you must set asString to False if you want to preserve those 3279 | results name values.""" 3280 | locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3281 | matchExpr = locMarker("_original_start") + expr + locMarker("_original_end") 3282 | if asString: 3283 | extractText = lambda s,l,t: s[t._original_start:t._original_end] 3284 | else: 3285 | def extractText(s,l,t): 3286 | del t[:] 3287 | t.insert(0, s[t._original_start:t._original_end]) 3288 | del t["_original_start"] 3289 | del t["_original_end"] 3290 | matchExpr.setParseAction(extractText) 3291 | return matchExpr 3292 | 3293 | # convenience constants for positional expressions 3294 | empty = Empty().setName("empty") 3295 | lineStart = LineStart().setName("lineStart") 3296 | lineEnd = LineEnd().setName("lineEnd") 3297 | stringStart = StringStart().setName("stringStart") 3298 | stringEnd = StringEnd().setName("stringEnd") 3299 | 3300 | _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3301 | _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3302 | _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 3303 | _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 3304 | _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3305 | _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3306 | _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3307 | 3308 | _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3309 | 3310 | def srange(s): 3311 | r"""Helper to easily define string ranges for use in Word construction. Borrows 3312 | syntax from regexp '[]' string range definitions:: 3313 | srange("[0-9]") -> "0123456789" 3314 | srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3315 | srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3316 | The input string must be enclosed in []'s, and the returned string is the expanded 3317 | character set joined into a single string. 3318 | The values enclosed in the []'s may be:: 3319 | a single character 3320 | an escaped character with a leading backslash (such as \- or \]) 3321 | an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 3322 | an escaped octal character with a leading '\0' (\041, which is a '!' character) 3323 | a range of any of the above, separated by a dash ('a-z', etc.) 3324 | any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3325 | """ 3326 | try: 3327 | return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3328 | except: 3329 | return "" 3330 | 3331 | def matchOnlyAtCol(n): 3332 | """Helper method for defining parse actions that require matching at a specific 3333 | column in the input text. 3334 | """ 3335 | def verifyCol(strg,locn,toks): 3336 | if col(locn,strg) != n: 3337 | raise ParseException(strg,locn,"matched token not at column %d" % n) 3338 | return verifyCol 3339 | 3340 | def replaceWith(replStr): 3341 | """Helper method for common parse actions that simply return a literal value. Especially 3342 | useful when used with transformString(). 3343 | """ 3344 | def _replFunc(*args): 3345 | return [replStr] 3346 | return _replFunc 3347 | 3348 | def removeQuotes(s,l,t): 3349 | """Helper parse action for removing quotation marks from parsed quoted strings. 3350 | To use, add this parse action to quoted string using:: 3351 | quotedString.setParseAction( removeQuotes ) 3352 | """ 3353 | return t[0][1:-1] 3354 | 3355 | def upcaseTokens(s,l,t): 3356 | """Helper parse action to convert tokens to upper case.""" 3357 | return [ tt.upper() for tt in map(_ustr,t) ] 3358 | 3359 | def downcaseTokens(s,l,t): 3360 | """Helper parse action to convert tokens to lower case.""" 3361 | return [ tt.lower() for tt in map(_ustr,t) ] 3362 | 3363 | def keepOriginalText(s,startLoc,t): 3364 | """Helper parse action to preserve original parsed text, 3365 | overriding any nested parse actions.""" 3366 | try: 3367 | endloc = getTokensEndLoc() 3368 | except ParseException: 3369 | raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3370 | del t[:] 3371 | t += ParseResults(s[startLoc:endloc]) 3372 | return t 3373 | 3374 | def getTokensEndLoc(): 3375 | """Method to be called from within a parse action to determine the end 3376 | location of the parsed tokens.""" 3377 | import inspect 3378 | fstack = inspect.stack() 3379 | try: 3380 | # search up the stack (through intervening argument normalizers) for correct calling routine 3381 | for f in fstack[2:]: 3382 | if f[3] == "_parseNoCache": 3383 | endloc = f[0].f_locals["loc"] 3384 | return endloc 3385 | else: 3386 | raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3387 | finally: 3388 | del fstack 3389 | 3390 | def _makeTags(tagStr, xml): 3391 | """Internal helper to construct opening and closing tag expressions, given a tag name""" 3392 | if isinstance(tagStr,basestring): 3393 | resname = tagStr 3394 | tagStr = Keyword(tagStr, caseless=not xml) 3395 | else: 3396 | resname = tagStr.name 3397 | 3398 | tagAttrName = Word(alphas,alphanums+"_-:") 3399 | if (xml): 3400 | tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3401 | openTag = Suppress("<") + tagStr + \ 3402 | Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3403 | Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3404 | else: 3405 | printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3406 | tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3407 | openTag = Suppress("<") + tagStr + \ 3408 | Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3409 | Optional( Suppress("=") + tagAttrValue ) ))) + \ 3410 | Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3411 | closeTag = Combine(_L("") 3412 | 3413 | openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3414 | closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("" % tagStr) 3415 | 3416 | return openTag, closeTag 3417 | 3418 | def makeHTMLTags(tagStr): 3419 | """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3420 | return _makeTags( tagStr, False ) 3421 | 3422 | def makeXMLTags(tagStr): 3423 | """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3424 | return _makeTags( tagStr, True ) 3425 | 3426 | def withAttribute(*args,**attrDict): 3427 | """Helper to create a validating parse action to be used with start tags created 3428 | with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 3429 | with a required attribute value, to avoid false matches on common tags such as 3430 | or

. 3431 | 3432 | Call withAttribute with a series of attribute names and values. Specify the list 3433 | of filter attributes names and values as: 3434 | - keyword arguments, as in (class="Customer",align="right"), or 3435 | - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3436 | For attribute names with a namespace prefix, you must use the second form. Attribute 3437 | names are matched insensitive to upper/lower case. 3438 | 3439 | To verify that the attribute exists, but without specifying a value, pass 3440 | withAttribute.ANY_VALUE as the value. 3441 | """ 3442 | if args: 3443 | attrs = args[:] 3444 | else: 3445 | attrs = attrDict.items() 3446 | attrs = [(k,v) for k,v in attrs] 3447 | def pa(s,l,tokens): 3448 | for attrName,attrValue in attrs: 3449 | if attrName not in tokens: 3450 | raise ParseException(s,l,"no matching attribute " + attrName) 3451 | if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3452 | raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3453 | (attrName, tokens[attrName], attrValue)) 3454 | return pa 3455 | withAttribute.ANY_VALUE = object() 3456 | 3457 | opAssoc = _Constants() 3458 | opAssoc.LEFT = object() 3459 | opAssoc.RIGHT = object() 3460 | 3461 | def operatorPrecedence( baseExpr, opList ): 3462 | """Helper method for constructing grammars of expressions made up of 3463 | operators working in a precedence hierarchy. Operators may be unary or 3464 | binary, left- or right-associative. Parse actions can also be attached 3465 | to operator expressions. 3466 | 3467 | Parameters: 3468 | - baseExpr - expression representing the most basic element for the nested 3469 | - opList - list of tuples, one for each operator precedence level in the 3470 | expression grammar; each tuple is of the form 3471 | (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3472 | - opExpr is the pyparsing expression for the operator; 3473 | may also be a string, which will be converted to a Literal; 3474 | if numTerms is 3, opExpr is a tuple of two expressions, for the 3475 | two operators separating the 3 terms 3476 | - numTerms is the number of terms for this operator (must 3477 | be 1, 2, or 3) 3478 | - rightLeftAssoc is the indicator whether the operator is 3479 | right or left associative, using the pyparsing-defined 3480 | constants opAssoc.RIGHT and opAssoc.LEFT. 3481 | - parseAction is the parse action to be associated with 3482 | expressions matching this operator expression (the 3483 | parse action tuple member may be omitted) 3484 | """ 3485 | ret = Forward() 3486 | lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3487 | for i,operDef in enumerate(opList): 3488 | opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3489 | if arity == 3: 3490 | if opExpr is None or len(opExpr) != 2: 3491 | raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3492 | opExpr1, opExpr2 = opExpr 3493 | thisExpr = Forward()#.setName("expr%d" % i) 3494 | if rightLeftAssoc == opAssoc.LEFT: 3495 | if arity == 1: 3496 | matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3497 | elif arity == 2: 3498 | if opExpr is not None: 3499 | matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3500 | else: 3501 | matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3502 | elif arity == 3: 3503 | matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3504 | Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3505 | else: 3506 | raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3507 | elif rightLeftAssoc == opAssoc.RIGHT: 3508 | if arity == 1: 3509 | # try to avoid LR with this extra test 3510 | if not isinstance(opExpr, Optional): 3511 | opExpr = Optional(opExpr) 3512 | matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3513 | elif arity == 2: 3514 | if opExpr is not None: 3515 | matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3516 | else: 3517 | matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3518 | elif arity == 3: 3519 | matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3520 | Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3521 | else: 3522 | raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3523 | else: 3524 | raise ValueError("operator must indicate right or left associativity") 3525 | if pa: 3526 | matchExpr.setParseAction( pa ) 3527 | thisExpr << ( matchExpr | lastExpr ) 3528 | lastExpr = thisExpr 3529 | ret << lastExpr 3530 | return ret 3531 | 3532 | dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3533 | sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3534 | quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3535 | unicodeString = Combine(_L('u') + quotedString.copy()) 3536 | 3537 | def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): 3538 | """Helper method for defining nested lists enclosed in opening and closing 3539 | delimiters ("(" and ")" are the default). 3540 | 3541 | Parameters: 3542 | - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3543 | - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3544 | - content - expression for items within the nested lists (default=None) 3545 | - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3546 | 3547 | If an expression is not provided for the content argument, the nested 3548 | expression will capture all whitespace-delimited content between delimiters 3549 | as a list of separate values. 3550 | 3551 | Use the ignoreExpr argument to define expressions that may contain 3552 | opening or closing characters that should not be treated as opening 3553 | or closing characters for nesting, such as quotedString or a comment 3554 | expression. Specify multiple expressions using an Or or MatchFirst. 3555 | The default is quotedString, but if no expressions are to be ignored, 3556 | then pass None for this argument. 3557 | """ 3558 | if opener == closer: 3559 | raise ValueError("opening and closing strings cannot be the same") 3560 | if content is None: 3561 | if isinstance(opener,basestring) and isinstance(closer,basestring): 3562 | if len(opener) == 1 and len(closer)==1: 3563 | if ignoreExpr is not None: 3564 | content = (Combine(OneOrMore(~ignoreExpr + 3565 | CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3566 | ).setParseAction(lambda t:t[0].strip())) 3567 | else: 3568 | content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3569 | ).setParseAction(lambda t:t[0].strip())) 3570 | else: 3571 | if ignoreExpr is not None: 3572 | content = (Combine(OneOrMore(~ignoreExpr + 3573 | ~Literal(opener) + ~Literal(closer) + 3574 | CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3575 | ).setParseAction(lambda t:t[0].strip())) 3576 | else: 3577 | content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3578 | CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3579 | ).setParseAction(lambda t:t[0].strip())) 3580 | else: 3581 | raise ValueError("opening and closing arguments must be strings if no content expression is given") 3582 | ret = Forward() 3583 | if ignoreExpr is not None: 3584 | ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3585 | else: 3586 | ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3587 | return ret 3588 | 3589 | def indentedBlock(blockStatementExpr, indentStack, indent=True): 3590 | """Helper method for defining space-delimited indentation blocks, such as 3591 | those used to define block statements in Python source code. 3592 | 3593 | Parameters: 3594 | - blockStatementExpr - expression defining syntax of statement that 3595 | is repeated within the indented block 3596 | - indentStack - list created by caller to manage indentation stack 3597 | (multiple statementWithIndentedBlock expressions within a single grammar 3598 | should share a common indentStack) 3599 | - indent - boolean indicating whether block must be indented beyond the 3600 | the current level; set to False for block of left-most statements 3601 | (default=True) 3602 | 3603 | A valid block must contain at least one blockStatement. 3604 | """ 3605 | def checkPeerIndent(s,l,t): 3606 | if l >= len(s): return 3607 | curCol = col(l,s) 3608 | if curCol != indentStack[-1]: 3609 | if curCol > indentStack[-1]: 3610 | raise ParseFatalException(s,l,"illegal nesting") 3611 | raise ParseException(s,l,"not a peer entry") 3612 | 3613 | def checkSubIndent(s,l,t): 3614 | curCol = col(l,s) 3615 | if curCol > indentStack[-1]: 3616 | indentStack.append( curCol ) 3617 | else: 3618 | raise ParseException(s,l,"not a subentry") 3619 | 3620 | def checkUnindent(s,l,t): 3621 | if l >= len(s): return 3622 | curCol = col(l,s) 3623 | if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3624 | raise ParseException(s,l,"not an unindent") 3625 | indentStack.pop() 3626 | 3627 | NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3628 | INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3629 | PEER = Empty().setParseAction(checkPeerIndent) 3630 | UNDENT = Empty().setParseAction(checkUnindent) 3631 | if indent: 3632 | smExpr = Group( Optional(NL) + 3633 | FollowedBy(blockStatementExpr) + 3634 | INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3635 | else: 3636 | smExpr = Group( Optional(NL) + 3637 | (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3638 | blockStatementExpr.ignore(_bslash + LineEnd()) 3639 | return smExpr 3640 | 3641 | alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3642 | punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3643 | 3644 | anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3645 | commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3646 | _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3647 | replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3648 | 3649 | # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3650 | cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3651 | 3652 | htmlComment = Regex(r"") 3653 | restOfLine = Regex(r".*").leaveWhitespace() 3654 | dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3655 | cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist)) 3673 | print ("tokens = " + str(tokens)) 3674 | print ("tokens.columns = " + str(tokens.columns)) 3675 | print ("tokens.tables = " + str(tokens.tables)) 3676 | print (tokens.asXML("SQL",True)) 3677 | except ParseBaseException,err: 3678 | print (teststring + "->") 3679 | print (err.line) 3680 | print (" "*(err.column-1) + "^") 3681 | print (err) 3682 | print() 3683 | 3684 | selectToken = CaselessLiteral( "select" ) 3685 | fromToken = CaselessLiteral( "from" ) 3686 | 3687 | ident = Word( alphas, alphanums + "_$" ) 3688 | columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3689 | columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3690 | tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3691 | tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3692 | simpleSQL = ( selectToken + \ 3693 | ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3694 | fromToken + \ 3695 | tableNameList.setResultsName( "tables" ) ) 3696 | 3697 | test( "SELECT * from XYZZY, ABC" ) 3698 | test( "select * from SYS.XYZZY" ) 3699 | test( "Select A from Sys.dual" ) 3700 | test( "Select AA,BB,CC from Sys.dual" ) 3701 | test( "Select A, B, C from Sys.dual" ) 3702 | test( "Select A, B, C from Sys.dual" ) 3703 | test( "Xelect A, B, C from Sys.dual" ) 3704 | test( "Select A, B, C frox Sys.dual" ) 3705 | test( "Select" ) 3706 | test( "Select ^^^ frox Sys.dual" ) 3707 | test( "Select A, B, C from Sys.dual, Table2 " ) 3708 | -------------------------------------------------------------------------------- /release.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """Release information""" 4 | 5 | 6 | version = "0.0.2" 7 | author = "Roland|Chima" 8 | email = "Rolandazim@gmail.com" 9 | copyright = "Copyright 2019~ Roland|Chima and contributors" 10 | license = "MIT " 11 | url = "https://github.com/ORC-1/ibolang" 12 | download_url="https://github.com/ORC-1/ibolang" 13 | description="programming language in Igbo" 14 | long_description = """ 15 | .. contents:: 16 | :depth: 2 17 | 18 | Introduction 19 | -------------- 20 | 21 | Igbo is an indigenous language popularly spoken in Nigeria, Ibolang is a full 22 | extension of the Igbo language in Python. 23 | With Ibolang, you can write and run python like programs in Igbo 24 | 25 | Ibolang acts like python 3 and plays like python 3, it maintains all the python syntax 26 | and methods. 27 | user could use it to learn programming in their native language. 28 | 29 | Example 30 | ---------- 31 | ibolang is highly user friendly, the following is a simple "HelloWorld" program 32 | 33 | deputa("Uwa Aloo") 34 | 35 | 36 | running this, will diplay 37 | 38 | Uwa Aloo 39 | 40 | to console, which translated to English is "Hello World" 41 | 42 | you can code more complex code by installing ibolang to your PC: 43 | 44 | 45 | To run programs is as simple as: 46 | 47 | $ ibolang filename.ibl 48 | 49 | from your preferred shell or command line 50 | 51 | you can go through the dictionary on: 52 | * https://github.com/ORC-1/ibolang/blob/master/dictionary.txt 53 | 54 | to get an exhaustive list of all currently available commands and there English translation 55 | 56 | Install 57 | ---------- 58 | 59 | If you'd like to play Ibolang with full features included, you should install Ibolang. 60 | 61 | You could use pip or easy_install command to install Ibolang: 62 | 63 | $ pip install Ibolang 64 | 65 | or 66 | 67 | $ easy_install -U Ibolang 68 | 69 | to use easy_install command, you should install distribute module for python 3 first: 70 | 71 | http://pypi.python.org/pypi/distribute/ 72 | 73 | And check your system path params if it contains python3.x/bin path. 74 | 75 | ex: edit .bashrc to include "/Library/Frameworks/Python.framework/Versions/3.x/bin" in your PATH parameter. 76 | 77 | For sytem running multiple version of python, you are better of using a virtual enviroment 78 | with Ibolang: 79 | 80 | $ conda create -n Ibolang python==3.XX 81 | 82 | or using Virtualenv 83 | 84 | $ virtualenv ibolang python==3.XX 85 | 86 | Lastly you can clone the repo using this url https://github.com/ORC-1/ibolang.git : navigate to the folder path and run python setup.py 87 | Copy the source files into your script folder, you should highly consider using 88 | a virtual enviroment if you are using this option and the previous options are better 89 | off 90 | 91 | 92 | 93 | Change Log 94 | ------------- 95 | 96 | You could view the ChangeLog to see what's new in these version. 97 | 98 | * https://github.com/ORC-1/ibolang/blob/master/CHANGELOG.txt 99 | 100 | 101 | """ 102 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup, find_packages 3 | except ImportError: 4 | from ez_setup import use_setuptools 5 | use_setuptools() 6 | from setuptools import setup, find_packages 7 | 8 | from pkg_resources import DistributionNotFound 9 | 10 | import sys 11 | import os 12 | import glob 13 | import release 14 | #execfile('release.py') 15 | 16 | 17 | # setup params 18 | required_modules = ["setuptools"] 19 | #if mac, install readline 20 | #if(sys.platform=="darwin"): 21 | # required_modules.append("readline >= 2.6.4") 22 | 23 | # nose is used for test 24 | extra_modules = {} 25 | 26 | setup( 27 | name="ibolang", 28 | version=release.version, 29 | author=release.author, 30 | author_email=release.email, 31 | download_url=release.download_url, 32 | py_modules=['core','ibolang','ig_tran','igbolang',], 33 | license=license, 34 | keywords = "traditional, simplified, Igbo, Afrocode, language, tokenize", 35 | description=release.description, 36 | long_description=release.long_description, 37 | url=release.url, 38 | zip_safe=False, 39 | install_requires = required_modules, 40 | extras_require = extra_modules, 41 | include_package_data = True, 42 | packages=find_packages(exclude=["ez_setup", 'examples', 'apidocs', "tests", "ibl"]), 43 | entry_points = """ 44 | [console_scripts] 45 | ibolang = ibolang:commandline 46 | 47 | """, 48 | classifiers = [ 49 | 'Development Status :: 4 - Beta', 50 | 'Environment :: Console', 51 | 'Intended Audience :: Education', 52 | 'Intended Audience :: Developers', 53 | 'Intended Audience :: System Administrators', 54 | 'License :: OSI Approved :: MIT License', 55 | 'Operating System :: OS Independent', 56 | 'Programming Language :: Python', 57 | 'Topic :: Software Development :: Libraries :: Python Modules', 58 | 'Topic :: Software Development :: Code Generators'], 59 | #test_suite = 'nose.collector', 60 | ) 61 | 62 | --------------------------------------------------------------------------------

Absolute Beginners

Then: