├── requirements.txt ├── tests ├── fixtures │ ├── empty_file.hcl │ ├── float.hcl │ ├── empty.hcl │ ├── escape.hcl │ ├── flat.hcl │ ├── float.json │ ├── multi_line_comment.json │ ├── multiline.json │ ├── single_line_comment.json │ ├── multiline_bad.hcl │ ├── tab_heredoc.json │ ├── tab_heredoc.hcl │ ├── basic.hcl │ ├── array_comment.hcl │ ├── basic_squish.hcl │ ├── multi_line_comment.hcl │ ├── structure_list.hcl │ ├── array_comment.json │ ├── basic.json │ ├── only_comments.hcl │ ├── structure_flatmap.hcl │ ├── structure_multi.hcl │ ├── heredoc_terminator_same_line.json │ ├── scientific.hcl │ ├── terraform_heroku.hcl │ ├── structure_flatmap.json │ ├── heredoc_terminator_same_line.hcl │ ├── structure.hcl │ ├── structure_list.json │ ├── multi_line_comment_M.json │ ├── structure.json │ ├── structure_flat.json │ ├── single_line_comment.hcl │ ├── terraform_heroku.json │ ├── structure_comment.hcl │ ├── structure_comment_L.json │ ├── scientific.json │ ├── structure_comment_M.json │ ├── structure_multi.json │ ├── structure2.json │ ├── structure2.hcl │ ├── nested_lists.hcl │ ├── single_line_comment_L.json │ ├── decode_tf_variable.hcl │ ├── structure_comment_A.json │ ├── decode_policy.hcl │ ├── structure_list_deep.hcl │ ├── function.hcl │ ├── issue12.hcl │ ├── decode_tf_variable.json │ ├── nested_lists.json │ ├── structure_three_tiers.hcl │ ├── decode_policy.json │ ├── function.json │ ├── issue12.json │ ├── structure_list_deep.json │ └── structure_three_tiers.json ├── lex-fixtures │ ├── list.hcl │ ├── list_comma.hcl │ ├── empty_heredoc.hcl │ ├── multiple.hcl │ ├── structure_empty.hcl │ ├── unterminated_block_comment.hcl │ ├── nested_comment.hcl │ ├── old.hcl │ ├── array_comment.hcl │ ├── assign_deep.hcl │ ├── heredoc_terminator_same_line.hcl │ ├── structure_basic.hcl │ ├── assign_colon.hcl │ ├── structure_comma.hcl │ ├── structure.hcl │ ├── list_of_maps.hcl │ ├── types.hcl │ ├── comment.hcl │ ├── windows_heredoc.hcl │ ├── complex.hcl │ ├── terraform0.12syntax.hcl │ └── conditional_operator.hcl ├── run_tests.sh ├── test_load_dump.py ├── test_parser.py ├── test_decoder.py └── test_lexer.py ├── testing-requirements.txt ├── setup.cfg ├── src └── hcl │ ├── ply │ ├── __init__.py │ └── lex.py │ ├── __init__.py │ ├── api.py │ ├── lexer.py │ └── parser.py ├── .gitignore ├── MANIFEST.in ├── pyproject.toml ├── scripts └── hcltool ├── CHANGELOG.md ├── .github └── workflows │ └── dist.yml ├── README.rst ├── setup.py └── LICENSE /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/empty_file.hcl: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/float.hcl: -------------------------------------------------------------------------------- 1 | a = 1.02 2 | -------------------------------------------------------------------------------- /testing-requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | coverage 3 | -------------------------------------------------------------------------------- /tests/fixtures/empty.hcl: -------------------------------------------------------------------------------- 1 | resource "foo" {} 2 | -------------------------------------------------------------------------------- /tests/fixtures/escape.hcl: -------------------------------------------------------------------------------- 1 | foo = "bar\"baz\\n" 2 | -------------------------------------------------------------------------------- /tests/fixtures/flat.hcl: -------------------------------------------------------------------------------- 1 | foo = "bar" 2 | Key = 7 3 | -------------------------------------------------------------------------------- /tests/fixtures/float.json: -------------------------------------------------------------------------------- 1 | { 2 | "a": 1.02 3 | } 4 | -------------------------------------------------------------------------------- /tests/lex-fixtures/list.hcl: -------------------------------------------------------------------------------- 1 | foo = [1, 2, "foo"] 2 | -------------------------------------------------------------------------------- /tests/lex-fixtures/list_comma.hcl: -------------------------------------------------------------------------------- 1 | foo = [1, 2, "foo",] 2 | -------------------------------------------------------------------------------- /tests/lex-fixtures/empty_heredoc.hcl: -------------------------------------------------------------------------------- 1 | empty = <`__ in Python. This 6 | implementation aims to be compatible with the original golang version of 7 | the parser. 8 | 9 | pyhcl does not support HCL2 (which is what modern terraform uses). You might try 10 | https://pypi.org/project/python-hcl2/ instead (though I've never personally tried it). 11 | 12 | The grammar and many of the tests/fixtures were copied/ported from the 13 | golang parser into pyhcl. All releases are tested with a variety of 14 | python versions from Python 2.7 onward. 15 | 16 | This version has been modified to work with terraform 0.12 syntax. 17 | It should be backward compatible with earlier versions. 18 | It doesn't cover every situation. See discussion in pull request: 19 | https://github.com/virtuald/pyhcl/pull/57 20 | 21 | Installation 22 | ============ 23 | 24 | :: 25 | 26 | pip install pyhcl 27 | 28 | Usage 29 | ===== 30 | 31 | This module is intended to be used in mostly the same way that one would 32 | use the json module in python, and load/loads/dumps are implemented. 33 | 34 | :: 35 | 36 | import hcl 37 | 38 | with open('file.hcl', 'r') as fp: 39 | obj = hcl.load(fp) 40 | 41 | Currently the dumps function outputs JSON, and not HCL. 42 | 43 | Convert HCL to JSON 44 | ------------------- 45 | 46 | pyhcl comes with a script that you can use to easily convert HCL to JSON, 47 | similar to the json.tool that comes with python:: 48 | 49 | hcltool INFILE [OUTFILE] 50 | 51 | Structure Validation 52 | -------------------- 53 | 54 | Similar to JSON, the output of parsing HCL is a python dictionary with 55 | no defined structure. The golang library for HCL implements support for 56 | parsing HCL according to defined objects, but this implementation does 57 | not currently support such constructs. 58 | 59 | Instead, I recommend that you use tools designed to validate JSON, such 60 | as the `schematics `_ library. 61 | 62 | Syntax 63 | ====== 64 | 65 | - Single line comments start with ``#`` or ``//`` 66 | 67 | - Multi-line comments are wrapped in ``/*`` and ``*/`` 68 | 69 | - Values are assigned with the syntax ``key = value`` (whitespace 70 | doesn't matter). The value can be any primitive: a string, number, 71 | boolean, object, or list. 72 | 73 | - Strings are double-quoted and can contain any UTF-8 characters. 74 | Example: ``"Hello, World"`` 75 | 76 | - Numbers are assumed to be base 10. If you prefix a number with 0x, it 77 | is treated as a hexadecimal. If it is prefixed with 0, it is treated 78 | as an octal. Numbers can be in scientific notation: "1e10". 79 | 80 | - Boolean values: ``true``, ``false`` 81 | 82 | - Arrays can be made by wrapping it in ``[]``. Example: 83 | ``["foo", "bar", 42]``. Arrays can contain primitives and other 84 | arrays, but cannot contain objects. Objects must use the block syntax 85 | shown below. 86 | 87 | Objects and nested objects are created using the structure shown below:: 88 | 89 | variable "ami" { 90 | description = "the AMI to use" 91 | } 92 | 93 | Testing 94 | ======= 95 | 96 | To run the tests:: 97 | 98 | pip install -r testing-requirements.txt 99 | tests/run_tests.sh 100 | 101 | Debug Mode 102 | ---------- 103 | 104 | To enable debug mode:: 105 | 106 | import hcl 107 | hcl.parser.DEBUG = True 108 | 109 | Authors 110 | ======= 111 | 112 | Dustin Spicuzza (dustin@virtualroadside.com) 113 | 114 | Note: This project is not associated with Hashicorp 115 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | 5 | from os.path import abspath, dirname, join, exists 6 | 7 | try: 8 | from setuptools.command.build_py import build_py as _build_py 9 | except ImportError: 10 | from distutils.command.build_py import build_py as _build_py 11 | 12 | from distutils.core import setup 13 | 14 | import os 15 | import sys 16 | import subprocess 17 | 18 | setup_dir = abspath(dirname(__file__)) 19 | version_file = join(setup_dir, 'src', 'hcl', 'version.py') 20 | 21 | 22 | def _pre_install(): 23 | '''Initialize the parse table at install time''' 24 | 25 | # Generate the parsetab.dat file at setup time 26 | dat = join(setup_dir, 'src', 'hcl', 'parsetab.dat') 27 | if exists(dat): 28 | os.unlink(dat) 29 | 30 | sys.path.insert(0, join(setup_dir, 'src')) 31 | 32 | import hcl 33 | from hcl.parser import HclParser 34 | 35 | parser = HclParser() 36 | 37 | 38 | class build_py(_build_py): 39 | def run(self): 40 | self.execute(_pre_install, (), msg="generating parse table...") 41 | _build_py.run(self) 42 | 43 | 44 | # Automatically generate a version.py based on the git version 45 | if exists(join(setup_dir, '.git')): 46 | p = subprocess.Popen( 47 | ["git", "describe", "--tags", "--long", "--dirty=-dirty"], 48 | stdout=subprocess.PIPE, 49 | stderr=subprocess.PIPE, 50 | ) 51 | out, err = p.communicate() 52 | # Make sure the git version has at least one tag 53 | if err: 54 | print("Error: You need to create a tag for this repo to use the builder") 55 | sys.exit(1) 56 | 57 | # Convert git version to PEP440 compliant version 58 | # - Older versions of pip choke on local identifiers, so we can't include the git commit 59 | v, commits, local = out.decode('utf-8').rstrip().split('-', 2) 60 | if commits != '0' or '-dirty' in local: 61 | v = '%s.post0.dev%s' % (v, commits) 62 | 63 | # Create the version.py file 64 | with open(version_file, 'w') as fp: 65 | fp.write("# Autogenerated by setup.py\n__version__ = '{0}'".format(v)) 66 | 67 | with open(join(setup_dir, 'README.rst'), 'r') as readme_file: 68 | long_description = readme_file.read() 69 | 70 | with open(version_file) as fp: 71 | exec(compile(fp.read(), 'version.py', 'exec'), {}, locals()) 72 | 73 | install_requires = open(join(setup_dir, 'requirements.txt')).readlines() 74 | 75 | setup( 76 | name='pyhcl', 77 | version=__version__, 78 | description='HCL configuration parser for python', 79 | long_description=long_description, 80 | author='Dustin Spicuzza', 81 | author_email='dustin@virtualroadside.com', 82 | url='https://github.com/virtuald/pyhcl', 83 | package_dir={'': 'src'}, 84 | package_data={'hcl': ['src/hcl/parsetab.dat']}, 85 | packages=['hcl','hcl.ply'], 86 | scripts=["scripts/hcltool"], 87 | include_package_data=True, 88 | setup_requires=install_requires, 89 | install_requires=install_requires, 90 | cmdclass={'build_py': build_py}, 91 | classifiers=[ 92 | "Development Status :: 5 - Production/Stable", 93 | "Intended Audience :: Developers", 94 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", 95 | "Operating System :: OS Independent", 96 | "Programming Language :: Python", 97 | "Programming Language :: Python :: 2", 98 | "Programming Language :: Python :: 2.7", 99 | "Programming Language :: Python :: 3", 100 | "Programming Language :: Python :: 3.3", 101 | "Programming Language :: Python :: 3.4", 102 | "Topic :: Text Processing", 103 | ], 104 | ) 105 | -------------------------------------------------------------------------------- /tests/test_decoder.py: -------------------------------------------------------------------------------- 1 | # 2 | # These tests are taken from decoder_test.go 3 | # 4 | 5 | from __future__ import print_function 6 | 7 | from os.path import join, dirname 8 | import hcl 9 | import json 10 | 11 | import pytest 12 | 13 | # hcl, json, dict 14 | FIXTURE_DIR = join(dirname(__file__), 'fixtures') 15 | FIXTURES = [ 16 | ('array_comment.hcl', 'array_comment.json', None), 17 | ('basic.hcl', 'basic.json', None), 18 | ('basic_squish.hcl', None, {'foo': 'bar', 'bar': '${file("bing/bong.txt")}', 'foo-bar':"baz"}), 19 | ('decode_policy.hcl', 'decode_policy.json', None), 20 | ('decode_tf_variable.hcl', 'decode_tf_variable.json', None), 21 | ('empty.hcl', None, {'resource': {'foo': {}}}), 22 | ('empty_file.hcl', None, {}), 23 | ('escape.hcl', None, {'foo': 'bar"baz\\n'}), 24 | ('flat.hcl', None, {'foo': 'bar', 'Key': 7}), 25 | ('float.hcl', None, {'a': 1.02}), 26 | ('float.hcl', 'float.json', None), 27 | ('function.hcl', 'function.json', None), 28 | ('issue12.hcl', 'issue12.json', None), 29 | ('multiline_bad.hcl', 'multiline.json', None), 30 | ('nested_lists.hcl', 'nested_lists.json', None), 31 | ('only_comments.hcl', None, {}), 32 | ('scientific.hcl', 'scientific.json', None), 33 | ('structure.hcl', 'structure_flat.json', None), 34 | #('structure2.hcl', 'structure2.json', None), # not in the golang tests 35 | ('structure_flatmap.hcl', 'structure_flatmap.json', None), 36 | ('structure_list.hcl', 'structure_list.json', None), # these don't match in golang either 37 | ('structure_list.hcl', None, {'foo': [{'key': 7}, {'key': 12}]}), # nor this 38 | ('issue12.hcl', 'issue12.json', None), 39 | #'structure_list_deep.json' 40 | ('structure_multi.hcl', 'structure_multi.json', None), 41 | ('structure_three_tiers.hcl', 'structure_three_tiers.json', None), 42 | ('tab_heredoc.hcl', 'tab_heredoc.json', None), 43 | ('terraform_heroku.hcl', 'terraform_heroku.json', None), 44 | ('structure_list_deep.hcl','structure_list_deep.json', None), 45 | ('heredoc_terminator_same_line.hcl','heredoc_terminator_same_line.json', None), 46 | ] 47 | 48 | 49 | 50 | 51 | @pytest.mark.parametrize("hcl_fname,json_fname,struct", FIXTURES) 52 | def test_decoder(hcl_fname, json_fname, struct): 53 | 54 | with open(join(FIXTURE_DIR, hcl_fname), 'r') as fp: 55 | hcl_json = hcl.load(fp) 56 | 57 | assert json_fname is not None or struct is not None 58 | 59 | if json_fname is not None: 60 | with open(join(FIXTURE_DIR, json_fname), 'r') as fp: 61 | good_json = json.load(fp) 62 | 63 | assert hcl_json == good_json 64 | 65 | if struct is not None: 66 | assert hcl_json == struct 67 | 68 | 69 | COMMENTED_FIXTURES = [ 70 | ('single_line_comment.hcl', 'single_line_comment_L.json', "single_line_comment.json", 'single_line_comment_L.json'), 71 | ('multi_line_comment.hcl', 'multi_line_comment.json', 'multi_line_comment_M.json', 'multi_line_comment_M.json'), 72 | ('structure_comment.hcl', 'structure_comment_L.json', 'structure_comment_M.json', 'structure_comment_A.json'), 73 | ('array_comment.hcl', 'array_comment.json', 'array_comment.json', 'array_comment.json') 74 | ] 75 | 76 | @pytest.mark.parametrize("export_comments", ['LINE', 'MULTILINE', 'ALL']) 77 | @pytest.mark.parametrize("hcl_fname,sline_fname,mline_fname,aline_fname", COMMENTED_FIXTURES) 78 | def test_decoder_export_comments(hcl_fname, sline_fname, mline_fname, aline_fname, export_comments): 79 | with open(join(FIXTURE_DIR, hcl_fname), 'r') as fp: 80 | hcl_json = hcl.load(fp, export_comments) 81 | 82 | json_fname = { 83 | "LINE": sline_fname, 84 | "MULTILINE": mline_fname, 85 | "ALL": aline_fname 86 | } 87 | 88 | with open(join(FIXTURE_DIR, json_fname[export_comments]), 'r') as fp: 89 | good_json = json.load(fp) 90 | 91 | assert hcl_json == good_json -------------------------------------------------------------------------------- /src/hcl/lexer.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | from .ply import lex 5 | 6 | if sys.version_info < (3,): 7 | text_type = unicode 8 | else: 9 | text_type = str 10 | 11 | 12 | def _raise_error(t, message=None): 13 | lexpos = t.lexer.lexpos 14 | lexdata = t.lexer.lexdata 15 | lineno = t.lexer.lineno 16 | column = _find_column(lexdata, t) 17 | if message is None: 18 | message = "Illegal character '%s'" % lexdata[lexpos] 19 | raise ValueError( 20 | "Line %d, column %d, index %d: %s" % (lineno, column, lexpos, message) 21 | ) 22 | 23 | 24 | def _find_column(input, token): 25 | last_cr = input.rfind('\n', 0, token.lexpos) 26 | column = (token.lexpos - last_cr) - 1 27 | return column 28 | 29 | 30 | class Lexer(object): 31 | 32 | tokens = ( 33 | 'BOOL', 34 | 'FLOAT', 35 | 'NUMBER', 36 | 'COMMA', 37 | 'COMMENT', 38 | 'MULTICOMMENT', 39 | 'IDENTIFIER', 40 | 'EQUAL', 41 | 'STRING', 42 | 'ADD', 43 | 'MINUS', 44 | 'MULTIPLY', 45 | 'DIVIDE', 46 | 'LEFTBRACE', 47 | 'RIGHTBRACE', 48 | 'LEFTBRACKET', 49 | 'RIGHTBRACKET', 50 | 'PERIOD', 51 | 'EPLUS', 52 | 'EMINUS', 53 | 'LEFTPAREN', 54 | 'RIGHTPAREN', 55 | 'QMARK', 56 | 'COLON', 57 | 'ASTERISK_PERIOD', 58 | 'GT', 59 | 'LT', 60 | 'EQ', 61 | 'NE', 62 | 'LE', 63 | 'GE', 64 | ) 65 | 66 | states = ( 67 | ('stringdollar', 'exclusive'), 68 | ('string', 'exclusive'), 69 | ('heredoc', 'exclusive'), 70 | ('tabbedheredoc', 'exclusive'), 71 | ) 72 | 73 | can_export_comments = [] 74 | 75 | def t_BOOL(self, t): 76 | r'(true)|(false)' 77 | t.value = t.value == 'true' 78 | return t 79 | 80 | def t_EMINUS(self, t): 81 | r'(?<=\d|\.)[eE]-' 82 | return t 83 | 84 | def t_EPLUS(self, t): 85 | r'(?<=\d)[eE]\+?|(?<=\d\.)[eE]\+?' 86 | return t 87 | 88 | def t_FLOAT(self, t): 89 | r'-?((\d+\.\d*)|(\d*\.\d+))' 90 | t.value = float(t.value) 91 | return t 92 | 93 | def t_hexnumber(self, t): 94 | r'-?0[xX][0-9a-fA-F]+' 95 | t.value = int(t.value, base=16) 96 | t.type = 'NUMBER' 97 | return t 98 | 99 | def t_intnumber(self, t): 100 | r'-?\d+' 101 | t.value = int(t.value) 102 | t.type = 'NUMBER' 103 | return t 104 | 105 | def t_PERIOD(self, t): 106 | r'\.' 107 | return t 108 | 109 | def t_COMMA(self, t): 110 | r',' 111 | return t 112 | 113 | def t_QMARK(self, t): 114 | r'\?' 115 | return t 116 | 117 | def t_COLON(self, t): 118 | r':' 119 | return t 120 | 121 | def t_ASTERISK_PERIOD(self, t): 122 | r'\*\.' 123 | return t 124 | 125 | def t_GT(self, t): 126 | r'(?)>(?!>|=)' 127 | return t 128 | 129 | def t_LT(self, t): 130 | r'(?=' 147 | return t 148 | 149 | def t_IDENTIFIER(self, t): 150 | r'[^\W\d][\w.-]*' 151 | t.value = text_type(t.value) 152 | return t 153 | 154 | # Strings 155 | def t_string(self, t): 156 | # Start of a string 157 | r'\"' 158 | # abs_start is the absolute start of the string. We use this at the end 159 | # to know how many new lines we've consumed 160 | t.lexer.abs_start = t.lexer.lexpos 161 | # rel_pos is the begining of the unconsumed part of the string. It will 162 | # get modified when consuming escaped characters 163 | t.lexer.rel_pos = t.lexer.lexpos 164 | # The value of the consumed part of the string 165 | t.lexer.string_value = u'' 166 | t.lexer.begin('string') 167 | 168 | def t_string_escapedchar(self, t): 169 | # If a quote or backslash is escaped, build up the string by ignoring 170 | # the escape character. Should this be done for other characters? 171 | r'(?<=\\)(\"|\\)' 172 | t.lexer.string_value += ( 173 | t.lexer.lexdata[t.lexer.rel_pos : t.lexer.lexpos - 2] + t.value 174 | ) 175 | t.lexer.rel_pos = t.lexer.lexpos 176 | pass 177 | 178 | def t_string_stringdollar(self, t): 179 | # Left brace preceeded by a dollar 180 | r'(?<=\$)\{' 181 | t.lexer.braces = 1 182 | t.lexer.begin('stringdollar') 183 | 184 | def t_string_ignoring(self, t): 185 | # Ignore everything except for a quote 186 | r'[^\"]' 187 | pass 188 | 189 | def t_string_STRING(self, t): 190 | # End of the string 191 | r'\"' 192 | t.value = ( 193 | t.lexer.string_value + t.lexer.lexdata[t.lexer.rel_pos : t.lexer.lexpos - 1] 194 | ) 195 | t.lexer.lineno += t.lexer.lexdata[t.lexer.abs_start : t.lexer.lexpos - 1].count( 196 | '\n' 197 | ) 198 | t.lexer.begin('INITIAL') 199 | return t 200 | 201 | def t_string_eof(self, t): 202 | t.lexer.lineno += t.lexer.lexdata[t.lexer.abs_start : t.lexer.lexpos].count( 203 | '\n' 204 | ) 205 | _raise_error(t, 'EOF before closing string quote') 206 | 207 | def t_stringdollar_dontcare(self, t): 208 | # Ignore everything except for braces 209 | r'[^\{\}]' 210 | pass 211 | 212 | def t_stringdollar_lbrace(self, t): 213 | r'\{' 214 | t.lexer.braces += 1 215 | 216 | def t_stringdollar_rbrace(self, t): 217 | r'\}' 218 | t.lexer.braces -= 1 219 | 220 | if t.lexer.braces == 0: 221 | # End of the dollar brace, back to the rest of the string 222 | t.lexer.begin('string') 223 | 224 | def t_stringdollar_eof(self, t): 225 | t.lexer.lineno += t.lexer.lexdata[t.lexer.abs_start : t.lexer.lexpos].count( 226 | '\n' 227 | ) 228 | _raise_error(t, "EOF before closing '${}' expression") 229 | 230 | def _init_heredoc(self, t): 231 | t.lexer.here_start = t.lexer.lexpos 232 | 233 | if t.value.endswith('\r\n'): 234 | t.lexer.newline_chars = 2 235 | else: 236 | t.lexer.newline_chars = 1 237 | 238 | if t.lexer.is_tabbed: 239 | # Chop '<<-' 240 | chop = 3 241 | else: 242 | # Chop '<<' 243 | chop = 2 244 | 245 | t.lexer.here_identifier = t.value[chop : -t.lexer.newline_chars] 246 | # We consumed a newline in the regex so bump the counter 247 | t.lexer.lineno += 1 248 | 249 | def t_tabbedheredoc(self, t): 250 | r'<<-\S+\r?\n' 251 | t.lexer.is_tabbed = True 252 | self._init_heredoc(t) 253 | t.lexer.begin('tabbedheredoc') 254 | 255 | def t_heredoc(self, t): 256 | r'<<\S+\r?\n' 257 | t.lexer.is_tabbed = False 258 | self._init_heredoc(t) 259 | t.lexer.begin('heredoc') 260 | 261 | def _end_heredoc(self, t): 262 | if t.lexer.is_tabbed: 263 | # Strip leading tabs 264 | value = t.value.strip() 265 | else: 266 | value = t.value 267 | 268 | if value == t.lexer.here_identifier: 269 | # Handle case where identifier is on a line of its own. Need to 270 | # subtract the current line and the newline characters from 271 | # the previous line to get the endpos 272 | endpos = t.lexer.lexpos - (t.lexer.newline_chars + len(t.value)) 273 | elif value.endswith(t.lexer.here_identifier): 274 | # Handle case where identifier is at the end of the line. Need to 275 | # subtract the identifier from to get the endpos 276 | endpos = t.lexer.lexpos - len(t.lexer.here_identifier) 277 | else: 278 | return 279 | 280 | entire_string = t.lexer.lexdata[t.lexer.here_start : endpos] 281 | 282 | if t.lexer.is_tabbed: 283 | # Get rid of any initial tabs, and remove any tabs preceded by 284 | # a new line 285 | chopped_starting_tabs = re.sub('^\t*', '', entire_string) 286 | t.value = re.sub('\n\t*', '\n', chopped_starting_tabs) 287 | else: 288 | t.value = entire_string 289 | 290 | t.lexer.lineno += t.lexer.lexdata[t.lexer.here_start : t.lexer.lexpos].count( 291 | '\n' 292 | ) 293 | t.lexer.begin('INITIAL') 294 | return t 295 | 296 | def t_tabbedheredoc_STRING(self, t): 297 | r'^\t*.+?(?=\r?$)' 298 | return self._end_heredoc(t) 299 | 300 | def t_heredoc_STRING(self, t): 301 | r'^.+?(?=\r?$)' 302 | return self._end_heredoc(t) 303 | 304 | def t_heredoc_ignoring(self, t): 305 | r'.+|\n' 306 | pass 307 | 308 | def t_heredoc_eof(self, t): 309 | t.lexer.lineno += t.lexer.lexdata[t.lexer.here_start : t.lexer.lexpos].count( 310 | '\n' 311 | ) 312 | _raise_error(t, 'EOF before closing heredoc') 313 | 314 | t_tabbedheredoc_ignoring = t_heredoc_ignoring 315 | t_tabbedheredoc_eof = t_heredoc_eof 316 | 317 | t_LEFTBRACE = r'\{' 318 | t_RIGHTBRACE = r'\}' 319 | t_LEFTBRACKET = r'\[' 320 | t_RIGHTBRACKET = r'\]' 321 | t_LEFTPAREN = r'\(' 322 | t_RIGHTPAREN = r'\)' 323 | 324 | def t_COMMENT(self, t): 325 | r'(\#|(//)).*' 326 | if 'COMMENT' in self.can_export_comments: 327 | t.value = t.value.lstrip('#').lstrip('//').lstrip() 328 | return t 329 | 330 | def t_MULTICOMMENT(self, t): 331 | r'/\*(.|\n)*?(\*/)' 332 | t.lexer.lineno += t.value.count('\n') 333 | if 'MULTICOMMENT' in self.can_export_comments: 334 | return t 335 | 336 | # Define a rule so we can track line numbers 337 | def t_newline(self, t): 338 | r'\n+' 339 | t.lexer.lineno += len(t.value) 340 | 341 | t_ignore = ' \t\r\f\v' 342 | 343 | t_EQUAL = r'(?"), 252 | ("LT", "<"), 253 | ("EQ", "=="), 254 | ("NE", "!="), 255 | ("LE", "<="), 256 | ("GE", ">="), 257 | ("ASTERISK_PERIOD", "*."), 258 | 259 | # Bools 260 | ("BOOL", "true"), 261 | ("BOOL", "false"), 262 | 263 | # Identifier 264 | ("IDENTIFIER", "a"), 265 | ("IDENTIFIER", "a0"), 266 | ("IDENTIFIER", "foobar"), 267 | ("IDENTIFIER", "foo-bar"), 268 | ("IDENTIFIER", "abc123"), 269 | ("IDENTIFIER", "LGTM"), 270 | ("IDENTIFIER", "_"), 271 | ("IDENTIFIER", "_abc123"), 272 | ("IDENTIFIER", "abc123_"), 273 | ("IDENTIFIER", "_abc_123_"), 274 | ("IDENTIFIER", "_äöü"), 275 | ("IDENTIFIER", "_本"), 276 | ("IDENTIFIER", "äöü"), 277 | ("IDENTIFIER", "本"), 278 | ("IDENTIFIER", "a۰۱۸"), 279 | ("IDENTIFIER", "foo६४"), 280 | ("IDENTIFIER", "bar9876"), 281 | 282 | # Heredoc 283 | ("STRING", "< 0: 514 | lexindexfunc[i] = (None, None) 515 | else: 516 | lexindexfunc[i] = (None, toknames[f]) 517 | 518 | return [(lexre, lexindexfunc)], [regex], [lexindexnames] 519 | except Exception: 520 | m = int(len(relist)/2) 521 | if m == 0: 522 | m = 1 523 | llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) 524 | rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) 525 | return (llist+rlist), (lre+rre), (lnames+rnames) 526 | 527 | # ----------------------------------------------------------------------------- 528 | # def _statetoken(s,names) 529 | # 530 | # Given a declaration name s of the form "t_" and a dictionary whose keys are 531 | # state names, this function returns a tuple (states,tokenname) where states 532 | # is a tuple of state names and tokenname is the name of the token. For example, 533 | # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') 534 | # ----------------------------------------------------------------------------- 535 | def _statetoken(s, names): 536 | parts = s.split('_') 537 | for i, part in enumerate(parts[1:], 1): 538 | if part not in names and part != 'ANY': 539 | break 540 | 541 | if i > 1: 542 | states = tuple(parts[1:i]) 543 | else: 544 | states = ('INITIAL',) 545 | 546 | if 'ANY' in states: 547 | states = tuple(names) 548 | 549 | tokenname = '_'.join(parts[i:]) 550 | return (states, tokenname) 551 | 552 | 553 | # ----------------------------------------------------------------------------- 554 | # LexerReflect() 555 | # 556 | # This class represents information needed to build a lexer as extracted from a 557 | # user's input file. 558 | # ----------------------------------------------------------------------------- 559 | class LexerReflect(object): 560 | def __init__(self, ldict, log=None, reflags=0): 561 | self.ldict = ldict 562 | self.error_func = None 563 | self.tokens = [] 564 | self.reflags = reflags 565 | self.stateinfo = {'INITIAL': 'inclusive'} 566 | self.modules = set() 567 | self.error = False 568 | self.log = PlyLogger(sys.stderr) if log is None else log 569 | 570 | # Get all of the basic information 571 | def get_all(self): 572 | self.get_tokens() 573 | self.get_literals() 574 | self.get_states() 575 | self.get_rules() 576 | 577 | # Validate all of the information 578 | def validate_all(self): 579 | self.validate_tokens() 580 | self.validate_literals() 581 | self.validate_rules() 582 | return self.error 583 | 584 | # Get the tokens map 585 | def get_tokens(self): 586 | tokens = self.ldict.get('tokens', None) 587 | if not tokens: 588 | self.log.error('No token list is defined') 589 | self.error = True 590 | return 591 | 592 | if not isinstance(tokens, (list, tuple)): 593 | self.log.error('tokens must be a list or tuple') 594 | self.error = True 595 | return 596 | 597 | if not tokens: 598 | self.log.error('tokens is empty') 599 | self.error = True 600 | return 601 | 602 | self.tokens = tokens 603 | 604 | # Validate the tokens 605 | def validate_tokens(self): 606 | terminals = {} 607 | for n in self.tokens: 608 | if not _is_identifier.match(n): 609 | self.log.error("Bad token name '%s'", n) 610 | self.error = True 611 | if n in terminals: 612 | self.log.warning("Token '%s' multiply defined", n) 613 | terminals[n] = 1 614 | 615 | # Get the literals specifier 616 | def get_literals(self): 617 | self.literals = self.ldict.get('literals', '') 618 | if not self.literals: 619 | self.literals = '' 620 | 621 | # Validate literals 622 | def validate_literals(self): 623 | try: 624 | for c in self.literals: 625 | if not isinstance(c, StringTypes) or len(c) > 1: 626 | self.log.error('Invalid literal %s. Must be a single character', repr(c)) 627 | self.error = True 628 | 629 | except TypeError: 630 | self.log.error('Invalid literals specification. literals must be a sequence of characters') 631 | self.error = True 632 | 633 | def get_states(self): 634 | self.states = self.ldict.get('states', None) 635 | # Build statemap 636 | if self.states: 637 | if not isinstance(self.states, (tuple, list)): 638 | self.log.error('states must be defined as a tuple or list') 639 | self.error = True 640 | else: 641 | for s in self.states: 642 | if not isinstance(s, tuple) or len(s) != 2: 643 | self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s)) 644 | self.error = True 645 | continue 646 | name, statetype = s 647 | if not isinstance(name, StringTypes): 648 | self.log.error('State name %s must be a string', repr(name)) 649 | self.error = True 650 | continue 651 | if not (statetype == 'inclusive' or statetype == 'exclusive'): 652 | self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name) 653 | self.error = True 654 | continue 655 | if name in self.stateinfo: 656 | self.log.error("State '%s' already defined", name) 657 | self.error = True 658 | continue 659 | self.stateinfo[name] = statetype 660 | 661 | # Get all of the symbols with a t_ prefix and sort them into various 662 | # categories (functions, strings, error functions, and ignore characters) 663 | 664 | def get_rules(self): 665 | tsymbols = [f for f in self.ldict if f[:2] == 't_'] 666 | 667 | # Now build up a list of functions and a list of strings 668 | self.toknames = {} # Mapping of symbols to token names 669 | self.funcsym = {} # Symbols defined as functions 670 | self.strsym = {} # Symbols defined as strings 671 | self.ignore = {} # Ignore strings by state 672 | self.errorf = {} # Error functions by state 673 | self.eoff = {} # EOF functions by state 674 | 675 | for s in self.stateinfo: 676 | self.funcsym[s] = [] 677 | self.strsym[s] = [] 678 | 679 | if len(tsymbols) == 0: 680 | self.log.error('No rules of the form t_rulename are defined') 681 | self.error = True 682 | return 683 | 684 | for f in tsymbols: 685 | t = self.ldict[f] 686 | states, tokname = _statetoken(f, self.stateinfo) 687 | self.toknames[f] = tokname 688 | 689 | if hasattr(t, '__call__'): 690 | if tokname == 'error': 691 | for s in states: 692 | self.errorf[s] = t 693 | elif tokname == 'eof': 694 | for s in states: 695 | self.eoff[s] = t 696 | elif tokname == 'ignore': 697 | line = t.__code__.co_firstlineno 698 | file = t.__code__.co_filename 699 | self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__) 700 | self.error = True 701 | else: 702 | for s in states: 703 | self.funcsym[s].append((f, t)) 704 | elif isinstance(t, StringTypes): 705 | if tokname == 'ignore': 706 | for s in states: 707 | self.ignore[s] = t 708 | if '\\' in t: 709 | self.log.warning("%s contains a literal backslash '\\'", f) 710 | 711 | elif tokname == 'error': 712 | self.log.error("Rule '%s' must be defined as a function", f) 713 | self.error = True 714 | else: 715 | for s in states: 716 | self.strsym[s].append((f, t)) 717 | else: 718 | self.log.error('%s not defined as a function or string', f) 719 | self.error = True 720 | 721 | # Sort the functions by line number 722 | for f in self.funcsym.values(): 723 | f.sort(key=lambda x: x[1].__code__.co_firstlineno) 724 | 725 | # Sort the strings by regular expression length 726 | for s in self.strsym.values(): 727 | s.sort(key=lambda x: len(x[1]), reverse=True) 728 | 729 | # Validate all of the t_rules collected 730 | def validate_rules(self): 731 | for state in self.stateinfo: 732 | # Validate all rules defined by functions 733 | 734 | for fname, f in self.funcsym[state]: 735 | line = f.__code__.co_firstlineno 736 | file = f.__code__.co_filename 737 | module = inspect.getmodule(f) 738 | self.modules.add(module) 739 | 740 | tokname = self.toknames[fname] 741 | if isinstance(f, types.MethodType): 742 | reqargs = 2 743 | else: 744 | reqargs = 1 745 | nargs = f.__code__.co_argcount 746 | if nargs > reqargs: 747 | self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) 748 | self.error = True 749 | continue 750 | 751 | if nargs < reqargs: 752 | self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) 753 | self.error = True 754 | continue 755 | 756 | if not _get_regex(f): 757 | self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__) 758 | self.error = True 759 | continue 760 | 761 | try: 762 | c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) 763 | if c.match(''): 764 | self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) 765 | self.error = True 766 | except re.error as e: 767 | self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) 768 | if '#' in _get_regex(f): 769 | self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__) 770 | self.error = True 771 | 772 | # Validate all rules defined by strings 773 | for name, r in self.strsym[state]: 774 | tokname = self.toknames[name] 775 | if tokname == 'error': 776 | self.log.error("Rule '%s' must be defined as a function", name) 777 | self.error = True 778 | continue 779 | 780 | if tokname not in self.tokens and tokname.find('ignore_') < 0: 781 | self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname) 782 | self.error = True 783 | continue 784 | 785 | try: 786 | c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) 787 | if (c.match('')): 788 | self.log.error("Regular expression for rule '%s' matches empty string", name) 789 | self.error = True 790 | except re.error as e: 791 | self.log.error("Invalid regular expression for rule '%s'. %s", name, e) 792 | if '#' in r: 793 | self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name) 794 | self.error = True 795 | 796 | if not self.funcsym[state] and not self.strsym[state]: 797 | self.log.error("No rules defined for state '%s'", state) 798 | self.error = True 799 | 800 | # Validate the error function 801 | efunc = self.errorf.get(state, None) 802 | if efunc: 803 | f = efunc 804 | line = f.__code__.co_firstlineno 805 | file = f.__code__.co_filename 806 | module = inspect.getmodule(f) 807 | self.modules.add(module) 808 | 809 | if isinstance(f, types.MethodType): 810 | reqargs = 2 811 | else: 812 | reqargs = 1 813 | nargs = f.__code__.co_argcount 814 | if nargs > reqargs: 815 | self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) 816 | self.error = True 817 | 818 | if nargs < reqargs: 819 | self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) 820 | self.error = True 821 | 822 | for module in self.modules: 823 | self.validate_module(module) 824 | 825 | # ----------------------------------------------------------------------------- 826 | # validate_module() 827 | # 828 | # This checks to see if there are duplicated t_rulename() functions or strings 829 | # in the parser input file. This is done using a simple regular expression 830 | # match on each line in the source code of the given module. 831 | # ----------------------------------------------------------------------------- 832 | 833 | def validate_module(self, module): 834 | try: 835 | lines, linen = inspect.getsourcelines(module) 836 | except IOError: 837 | return 838 | 839 | fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') 840 | sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') 841 | 842 | counthash = {} 843 | linen += 1 844 | for line in lines: 845 | m = fre.match(line) 846 | if not m: 847 | m = sre.match(line) 848 | if m: 849 | name = m.group(1) 850 | prev = counthash.get(name) 851 | if not prev: 852 | counthash[name] = linen 853 | else: 854 | filename = inspect.getsourcefile(module) 855 | self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) 856 | self.error = True 857 | linen += 1 858 | 859 | # ----------------------------------------------------------------------------- 860 | # lex(module) 861 | # 862 | # Build all of the regular expression rules from definitions in the supplied module 863 | # ----------------------------------------------------------------------------- 864 | def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', 865 | reflags=int(re.VERBOSE), nowarn=False, outputdir=None, debuglog=None, errorlog=None): 866 | 867 | if lextab is None: 868 | lextab = 'lextab' 869 | 870 | global lexer 871 | 872 | ldict = None 873 | stateinfo = {'INITIAL': 'inclusive'} 874 | lexobj = Lexer() 875 | lexobj.lexoptimize = optimize 876 | global token, input 877 | 878 | if errorlog is None: 879 | errorlog = PlyLogger(sys.stderr) 880 | 881 | if debug: 882 | if debuglog is None: 883 | debuglog = PlyLogger(sys.stderr) 884 | 885 | # Get the module dictionary used for the lexer 886 | if object: 887 | module = object 888 | 889 | # Get the module dictionary used for the parser 890 | if module: 891 | _items = [(k, getattr(module, k)) for k in dir(module)] 892 | ldict = dict(_items) 893 | # If no __file__ attribute is available, try to obtain it from the __module__ instead 894 | if '__file__' not in ldict: 895 | ldict['__file__'] = sys.modules[ldict['__module__']].__file__ 896 | else: 897 | ldict = get_caller_module_dict(2) 898 | 899 | # Determine if the module is package of a package or not. 900 | # If so, fix the tabmodule setting so that tables load correctly 901 | pkg = ldict.get('__package__') 902 | if pkg and isinstance(lextab, str): 903 | if '.' not in lextab: 904 | lextab = pkg + '.' + lextab 905 | 906 | # Collect parser information from the dictionary 907 | linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) 908 | linfo.get_all() 909 | if not optimize: 910 | if linfo.validate_all(): 911 | raise SyntaxError("Can't build lexer") 912 | 913 | if optimize and lextab: 914 | try: 915 | lexobj.readtab(lextab, ldict) 916 | token = lexobj.token 917 | input = lexobj.input 918 | lexer = lexobj 919 | return lexobj 920 | 921 | except ImportError: 922 | pass 923 | 924 | # Dump some basic debugging information 925 | if debug: 926 | debuglog.info('lex: tokens = %r', linfo.tokens) 927 | debuglog.info('lex: literals = %r', linfo.literals) 928 | debuglog.info('lex: states = %r', linfo.stateinfo) 929 | 930 | # Build a dictionary of valid token names 931 | lexobj.lextokens = set() 932 | for n in linfo.tokens: 933 | lexobj.lextokens.add(n) 934 | 935 | # Get literals specification 936 | if isinstance(linfo.literals, (list, tuple)): 937 | lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) 938 | else: 939 | lexobj.lexliterals = linfo.literals 940 | 941 | lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) 942 | 943 | # Get the stateinfo dictionary 944 | stateinfo = linfo.stateinfo 945 | 946 | regexs = {} 947 | # Build the master regular expressions 948 | for state in stateinfo: 949 | regex_list = [] 950 | 951 | # Add rules defined by functions first 952 | for fname, f in linfo.funcsym[state]: 953 | regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) 954 | if debug: 955 | debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) 956 | 957 | # Now add all of the simple rules 958 | for name, r in linfo.strsym[state]: 959 | regex_list.append('(?P<%s>%s)' % (name, r)) 960 | if debug: 961 | debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) 962 | 963 | regexs[state] = regex_list 964 | 965 | # Build the master regular expressions 966 | 967 | if debug: 968 | debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') 969 | 970 | for state in regexs: 971 | lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) 972 | lexobj.lexstatere[state] = lexre 973 | lexobj.lexstateretext[state] = re_text 974 | lexobj.lexstaterenames[state] = re_names 975 | if debug: 976 | for i, text in enumerate(re_text): 977 | debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) 978 | 979 | # For inclusive states, we need to add the regular expressions from the INITIAL state 980 | for state, stype in stateinfo.items(): 981 | if state != 'INITIAL' and stype == 'inclusive': 982 | lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) 983 | lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) 984 | lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) 985 | 986 | lexobj.lexstateinfo = stateinfo 987 | lexobj.lexre = lexobj.lexstatere['INITIAL'] 988 | lexobj.lexretext = lexobj.lexstateretext['INITIAL'] 989 | lexobj.lexreflags = reflags 990 | 991 | # Set up ignore variables 992 | lexobj.lexstateignore = linfo.ignore 993 | lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') 994 | 995 | # Set up error functions 996 | lexobj.lexstateerrorf = linfo.errorf 997 | lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) 998 | if not lexobj.lexerrorf: 999 | errorlog.warning('No t_error rule is defined') 1000 | 1001 | # Set up eof functions 1002 | lexobj.lexstateeoff = linfo.eoff 1003 | lexobj.lexeoff = linfo.eoff.get('INITIAL', None) 1004 | 1005 | # Check state information for ignore and error rules 1006 | for s, stype in stateinfo.items(): 1007 | if stype == 'exclusive': 1008 | if s not in linfo.errorf: 1009 | errorlog.warning("No error rule is defined for exclusive state '%s'", s) 1010 | if s not in linfo.ignore and lexobj.lexignore: 1011 | errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) 1012 | elif stype == 'inclusive': 1013 | if s not in linfo.errorf: 1014 | linfo.errorf[s] = linfo.errorf.get('INITIAL', None) 1015 | if s not in linfo.ignore: 1016 | linfo.ignore[s] = linfo.ignore.get('INITIAL', '') 1017 | 1018 | # Create global versions of the token() and input() functions 1019 | token = lexobj.token 1020 | input = lexobj.input 1021 | lexer = lexobj 1022 | 1023 | # If in optimize mode, we write the lextab 1024 | if lextab and optimize: 1025 | if outputdir is None: 1026 | # If no output directory is set, the location of the output files 1027 | # is determined according to the following rules: 1028 | # - If lextab specifies a package, files go into that package directory 1029 | # - Otherwise, files go in the same directory as the specifying module 1030 | if isinstance(lextab, types.ModuleType): 1031 | srcfile = lextab.__file__ 1032 | else: 1033 | if '.' not in lextab: 1034 | srcfile = ldict['__file__'] 1035 | else: 1036 | parts = lextab.split('.') 1037 | pkgname = '.'.join(parts[:-1]) 1038 | exec('import %s' % pkgname) 1039 | srcfile = getattr(sys.modules[pkgname], '__file__', '') 1040 | outputdir = os.path.dirname(srcfile) 1041 | try: 1042 | lexobj.writetab(lextab, outputdir) 1043 | if lextab in sys.modules: 1044 | del sys.modules[lextab] 1045 | except IOError as e: 1046 | errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) 1047 | 1048 | return lexobj 1049 | 1050 | # ----------------------------------------------------------------------------- 1051 | # runmain() 1052 | # 1053 | # This runs the lexer as a main program 1054 | # ----------------------------------------------------------------------------- 1055 | 1056 | def runmain(lexer=None, data=None): 1057 | if not data: 1058 | try: 1059 | filename = sys.argv[1] 1060 | with open(filename) as f: 1061 | data = f.read() 1062 | except IndexError: 1063 | sys.stdout.write('Reading from standard input (type EOF to end):\n') 1064 | data = sys.stdin.read() 1065 | 1066 | if lexer: 1067 | _input = lexer.input 1068 | else: 1069 | _input = input 1070 | _input(data) 1071 | if lexer: 1072 | _token = lexer.token 1073 | else: 1074 | _token = token 1075 | 1076 | while True: 1077 | tok = _token() 1078 | if not tok: 1079 | break 1080 | sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos)) 1081 | 1082 | # ----------------------------------------------------------------------------- 1083 | # @TOKEN(regex) 1084 | # 1085 | # This decorator function can be used to set the regex expression on a function 1086 | # when its docstring might need to be set in an alternative way 1087 | # ----------------------------------------------------------------------------- 1088 | 1089 | def TOKEN(r): 1090 | def set_regex(f): 1091 | if hasattr(r, '__call__'): 1092 | f.regex = _get_regex(r) 1093 | else: 1094 | f.regex = r 1095 | return f 1096 | return set_regex 1097 | 1098 | # Alternative spelling of the TOKEN decorator 1099 | Token = TOKEN 1100 | --------------------------------------------------------------------------------