├── requirements.txt ├── tests ├── fixtures │ ├── float.hcl │ ├── empty.hcl │ ├── escape.hcl │ ├── flat.hcl │ ├── float.json │ ├── multiline.json │ ├── multiline_bad.hcl │ ├── no_argument_function.hcl │ ├── tab_heredoc.json │ ├── tab_heredoc.hcl │ ├── array_comment.hcl │ ├── basic.hcl │ ├── basic_squish.hcl │ ├── no_argument_function.json │ ├── structure_list.hcl │ ├── array_comment.json │ ├── structure_flatmap.hcl │ ├── structure_multi.hcl │ ├── scientific.hcl │ ├── terraform_heroku.hcl │ ├── heredoc_terminator_same_line.json │ ├── structure_flatmap.json │ ├── basic.json │ ├── heredoc_terminator_same_line.hcl │ ├── structure.hcl │ ├── structure_list.json │ ├── interpolation3.hcl │ ├── structure.json │ ├── structure_flat.json │ ├── terraform_heroku.json │ ├── scientific.json │ ├── structure_multi.json │ ├── structure2.json │ ├── structure2.hcl │ ├── depends_on.hcl │ ├── interpolation3.json │ ├── decode_tf_variable.hcl │ ├── decode_policy.hcl │ ├── depends_on.json │ ├── structure_list_deep.hcl │ ├── issue12.hcl │ ├── decode_tf_variable.json │ ├── interpolation2.hcl │ ├── structure_three_tiers.hcl │ ├── decode_policy.json │ ├── interpolation.hcl │ ├── issue12.json │ ├── structure_list_deep.json │ ├── interpolation.json │ ├── interpolation2.json │ └── structure_three_tiers.json ├── lex-fixtures │ ├── list.hcl │ ├── list_comma.hcl │ ├── empty_heredoc.hcl │ ├── multiple.hcl │ ├── structure_empty.hcl │ ├── unterminated_block_comment.hcl │ ├── nested_comment.hcl │ ├── old.hcl │ ├── array_comment.hcl │ ├── assign_deep.hcl │ ├── heredoc_terminator_same_line.hcl │ ├── structure_basic.hcl │ ├── assign_colon.hcl │ ├── structure_comma.hcl │ ├── structure.hcl │ ├── list_of_maps.hcl │ ├── types.hcl │ ├── comment.hcl │ ├── windows_heredoc.hcl │ ├── complex.hcl │ ├── terraform0.12syntax.hcl │ └── conditional_operator.hcl ├── run_tests.sh ├── test_parser.py ├── test_decoder.py └── test_lexer.py ├── testing-requirements.txt ├── pyproject.toml ├── setup.cfg ├── src └── hcl │ ├── __init__.py │ ├── api.py │ ├── lexer.py │ └── parser.py ├── .gitignore ├── MANIFEST.in ├── .travis.yml ├── scripts └── hcltool ├── CHANGELOG.md ├── README.rst ├── setup.py └── LICENSE /requirements.txt: -------------------------------------------------------------------------------- 1 | ply>=3.8,<4 2 | -------------------------------------------------------------------------------- /tests/fixtures/float.hcl: -------------------------------------------------------------------------------- 1 | a = 1.02 2 | -------------------------------------------------------------------------------- /testing-requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | coverage 3 | -------------------------------------------------------------------------------- /tests/fixtures/empty.hcl: -------------------------------------------------------------------------------- 1 | resource "foo" {} 2 | -------------------------------------------------------------------------------- /tests/fixtures/escape.hcl: -------------------------------------------------------------------------------- 1 | foo = "bar\"baz\\n" 2 | -------------------------------------------------------------------------------- /tests/fixtures/flat.hcl: -------------------------------------------------------------------------------- 1 | foo = "bar" 2 | Key = 7 3 | -------------------------------------------------------------------------------- /tests/fixtures/float.json: -------------------------------------------------------------------------------- 1 | { 2 | "a": 1.02 3 | } 4 | -------------------------------------------------------------------------------- /tests/lex-fixtures/list.hcl: -------------------------------------------------------------------------------- 1 | foo = [1, 2, "foo"] 2 | -------------------------------------------------------------------------------- /tests/lex-fixtures/list_comma.hcl: -------------------------------------------------------------------------------- 1 | foo = [1, 2, "foo",] 2 | -------------------------------------------------------------------------------- /tests/lex-fixtures/empty_heredoc.hcl: -------------------------------------------------------------------------------- 1 | empty = <`__ in Python. This 8 | implementation aims to be compatible with the original golang version of 9 | the parser. 10 | 11 | The grammar and many of the tests/fixtures were copied/ported from the 12 | golang parser into pyhcl. All releases are tested with a variety of 13 | python versions from Python 2.7 onward. 14 | 15 | Installation 16 | ============ 17 | 18 | :: 19 | 20 | pip install pyhcl2 21 | 22 | Usage 23 | ===== 24 | 25 | This module is intended to be used in mostly the same way that one would 26 | use the json module in python, and load/loads/dumps are implemented. 27 | 28 | :: 29 | 30 | import hcl 31 | 32 | with open('file.hcl', 'r') as fp: 33 | obj = hcl.load(fp) 34 | 35 | Currently the dumps function outputs JSON, and not HCL. 36 | 37 | Convert HCL to JSON 38 | ------------------- 39 | 40 | pyhcl2 comes with a script that you can use to easily convert HCL to JSON, 41 | similar to the json.tool that comes with python:: 42 | 43 | hcltool INFILE [OUTFILE] 44 | 45 | Structure Validation 46 | -------------------- 47 | 48 | Similar to JSON, the output of parsing HCL is a python dictionary with 49 | no defined structure. The golang library for HCL implements support for 50 | parsing HCL according to defined objects, but this implementation does 51 | not currently support such constructs. 52 | 53 | Instead, I recommend that you use tools designed to validate JSON, such 54 | as the `schematics `_ library. 55 | 56 | Syntax 57 | ====== 58 | 59 | Only terraform version 12 syntax is supported. 60 | 61 | - Single line comments start with ``#`` or ``//`` 62 | 63 | - Multi-line comments are wrapped in ``/*`` and ``*/`` 64 | 65 | - Values are assigned with the syntax ``key = value`` (whitespace 66 | doesn't matter). The value can be any primitive: a string, number, 67 | boolean, object, or list. 68 | 69 | - Strings are double-quoted and can contain any UTF-8 characters. 70 | Example: ``"Hello, World"`` 71 | 72 | - Numbers are assumed to be base 10. If you prefix a number with 0x, it 73 | is treated as a hexadecimal. If it is prefixed with 0, it is treated 74 | as an octal. Numbers can be in scientific notation: "1e10". 75 | 76 | - Boolean values: ``true``, ``false`` 77 | 78 | - Arrays can be made by wrapping it in ``[]``. Example: 79 | ``["foo", "bar", 42]``. Arrays can contain primitives and other 80 | arrays, but cannot contain objects. Objects must use the block syntax 81 | shown below. 82 | 83 | Objects and nested objects are created using the structure shown below:: 84 | 85 | variable "ami" { 86 | description = "the AMI to use" 87 | } 88 | 89 | Testing 90 | ======= 91 | 92 | To run the tests:: 93 | 94 | pip install -r testing-requirements.txt 95 | tests/run_tests.sh 96 | 97 | Authors 98 | ======= 99 | 100 | Dustin Spicuzza (dustin@virtualroadside.com) 101 | 102 | Note: This project is not associated with Hashicorp 103 | 104 | .. |Build Status| image:: https://travis-ci.org/virtuald/pyhcl.svg?branch=master 105 | :target: https://travis-ci.org/virtuald/pyhcl 106 | -------------------------------------------------------------------------------- /tests/lex-fixtures/conditional_operator.hcl: -------------------------------------------------------------------------------- 1 | // objectitem : objectkey EQUAL objectkey QMARK objectkey COLON objectkey 2 | identifier1 = identifier2 ? identifier3 : identifier4 3 | // objectitem : objectkey EQUAL objectkey QMARK objectkey COLON number 4 | identifier1 = identifier2 ? identifier3 : 1 5 | // objectitem : objectkey EQUAL objectkey QMARK objectkey COLON BOOL 6 | identifier1 = identifier2 ? identifier3 : True 7 | // objectitem : objectkey EQUAL objectkey QMARK objectkey COLON function 8 | identifier1 = identifier2 ? identifier3 : element(identifier4, identifier5) 9 | // objectitem : objectkey EQUAL objectkey QMARK number COLON objectkey 10 | identifier1 = identifier2 ? 1 : identifier3 11 | // objectitem : objectkey EQUAL objectkey QMARK BOOL COLON objectkey 12 | identifier1 = identifier2 ? True : identifier3 13 | // objectitem : objectkey EQUAL objectkey QMARK function COLON objectkey 14 | identifier1 = identifier2 ? element(identifier3, identifier4) : identifier5 15 | // objectitem : objectkey EQUAL objectkey QMARK number COLON number 16 | identifier1 = identifier2 ? 1 : 2 17 | // objectitem : objectkey EQUAL objectkey QMARK number COLON BOOL 18 | identifier1 = identifier2 ? 1 : True 19 | // objectitem : objectkey EQUAL objectkey QMARK number COLON function 20 | identifier1 = identifier2 ? 1 : element(identifier3, identifier4) 21 | // objectitem : objectkey EQUAL objectkey QMARK BOOL COLON number 22 | identifier1 = identifier2 ? True : 1 23 | // objectitem : objectkey EQUAL objectkey QMARK BOOL COLON function 24 | identifier1 = identifier2 ? True : element(identifier3, identifier4) 25 | // objectitem : objectkey EQUAL objectkey QMARK BOOL COLON BOOL 26 | identifier1 = identifier2 ? True : False 27 | 28 | // objectitem : objectkey EQUAL booleanexp QMARK objectkey COLON objectkey 29 | identifier1 = identifier2 == identifier3 ? identifier4 : identifier5 30 | // objectitem : objectkey EQUAL booleanexp QMARK objectkey COLON number 31 | identifier1 = identifier2 == identifier3 ? identifier4 : 1 32 | // objectitem : objectkey EQUAL booleanexp QMARK objectkey COLON BOOL 33 | identifier1 = identifier2 == identifier3 ? identifier4 : True 34 | // objectitem : objectkey EQUAL booleanexp QMARK objectkey COLON function 35 | identifier1 = identifier2 == identifier3 ? identifier4 : element(identifier5, identifier6) 36 | // objectitem : objectkey EQUAL booleanexp QMARK number COLON objectkey 37 | identifier1 = identifier2 == identifier3 ? 1 : identifier4 38 | // objectitem : objectkey EQUAL booleanexp QMARK BOOL COLON objectkey 39 | identifier1 = identifier2 == identifier3 ? True : identifier4 40 | // objectitem : objectkey EQUAL booleanexp QMARK function COLON objectkey 41 | identifier1 = identifier2 == identifier3 ? element(identifier4, identifier5) : identifier6 42 | // objectitem : objectkey EQUAL booleanexp QMARK number COLON number 43 | identifier1 = identifier2 == identifier3 ? 1 : 2 44 | // objectitem : objectkey EQUAL booleanexp QMARK number COLON BOOL 45 | identifier1 = identifier2 == identifier3 ? 1 : True 46 | // objectitem : objectkey EQUAL booleanexp QMARK number COLON function 47 | identifier1 = identifier2 == identifier3 ? 1 : element(identifier4, identifier5) 48 | // objectitem : objectkey EQUAL booleanexp QMARK BOOL COLON number 49 | identifier1 = identifier2 == identifier3 ? True : 1 50 | // objectitem : objectkey EQUAL booleanexp QMARK BOOL COLON function 51 | identifier1 = identifier2 == identifier3 ? True : element(identifier4, identifier5) 52 | // objectitem : objectkey EQUAL booleanexp QMARK BOOL COLON BOOL 53 | identifier1 = identifier2 == identifier3 ? True : False -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | 5 | from os.path import abspath, dirname, join, exists 6 | from distutils.core import setup 7 | 8 | try: 9 | from setuptools.command.build_py import build_py as _build_py 10 | except ImportError: 11 | from distutils.command.build_py import build_py as _build_py 12 | 13 | import os 14 | import sys 15 | import subprocess 16 | 17 | setup_dir = abspath(dirname(__file__)) 18 | version_file = join(setup_dir, 'src', 'hcl', 'version.py') 19 | 20 | 21 | def _pre_install(): 22 | '''Initialize the parse table at install time''' 23 | 24 | # Generate the parsetab.dat file at setup time 25 | dat = join(setup_dir, 'src', 'hcl', 'parsetab.dat') 26 | if exists(dat): 27 | os.unlink(dat) 28 | 29 | sys.path.insert(0, join(setup_dir, 'src')) 30 | 31 | import hcl 32 | from hcl.parser import HclParser 33 | 34 | parser = HclParser() 35 | 36 | 37 | class build_py(_build_py): 38 | def run(self): 39 | self.execute(_pre_install, (), msg="Generating parse table...") 40 | _build_py.run(self) 41 | 42 | 43 | # Automatically generate a version.py based on the git version 44 | if exists(join(setup_dir, '.git')): 45 | p = subprocess.Popen( 46 | ["git", "describe", "--tags", "--long", "--dirty=-dirty"], 47 | stdout=subprocess.PIPE, 48 | stderr=subprocess.PIPE, 49 | ) 50 | out, err = p.communicate() 51 | # Make sure the git version has at least one tag 52 | if err: 53 | print("Error: You need to create a tag for this repo to use the builder") 54 | sys.exit(1) 55 | 56 | # Convert git version to PEP440 compliant version 57 | # - Older versions of pip choke on local identifiers, so we can't include the git commit 58 | v, commits, local = out.decode('utf-8').rstrip().split('-', 2) 59 | if commits != '0' or '-dirty' in local: 60 | v = '%s.post0.dev%s' % (v, commits) 61 | 62 | # Create the version.py file 63 | with open(version_file, 'w') as fp: 64 | fp.write("# Autogenerated by setup.py\n__version__ = '{0}'".format(v)) 65 | 66 | with open(join(setup_dir, 'README.rst'), 'r') as readme_file: 67 | long_description = readme_file.read() 68 | 69 | with open(version_file) as fp: 70 | exec(compile(fp.read(), 'version.py', 'exec'), {}, locals()) 71 | 72 | install_requires = open(join(setup_dir, 'requirements.txt')).readlines() 73 | 74 | setup( 75 | name='pyhcl', 76 | version=__version__, 77 | description='HCL configuration parser for python', 78 | long_description=long_description, 79 | author='Dustin Spicuzza', 80 | author_email='dustin@virtualroadside.com', 81 | url='https://github.com/virtuald/pyhcl', 82 | package_dir={'': 'src'}, 83 | package_data={'hcl': ['src/hcl/parsetab.dat']}, 84 | packages=['hcl'], 85 | scripts=["scripts/hcltool"], 86 | include_package_data=True, 87 | setup_requires=install_requires, 88 | install_requires=install_requires, 89 | cmdclass={'build_py': build_py}, 90 | classifiers=[ 91 | "Development Status :: 5 - Production/Stable", 92 | "Intended Audience :: Developers", 93 | "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", 94 | "Operating System :: OS Independent", 95 | "Programming Language :: Python", 96 | "Programming Language :: Python :: 2", 97 | "Programming Language :: Python :: 2.7", 98 | "Programming Language :: Python :: 3", 99 | "Programming Language :: Python :: 3.3", 100 | "Programming Language :: Python :: 3.4", 101 | "Topic :: Text Processing", 102 | ], 103 | ) 104 | -------------------------------------------------------------------------------- /src/hcl/lexer.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | import ply.lex as lex 4 | 5 | if sys.version_info < (3,): 6 | text_type = unicode 7 | else: 8 | text_type = str 9 | 10 | 11 | def _raise_error(t, message=None): 12 | lexpos = t.lexer.lexpos 13 | lexdata = t.lexer.lexdata 14 | lineno = t.lexer.lineno 15 | column = _find_column(lexdata, t) 16 | if message is None: 17 | message = "Illegal character '%s'" % lexdata[lexpos] 18 | raise ValueError( 19 | "Line %d, column %d, index %d: %s" % (lineno, column, lexpos, message) 20 | ) 21 | 22 | 23 | def _find_column(input, token): 24 | last_cr = input.rfind('\n', 0, token.lexpos) 25 | column = (token.lexpos - last_cr) - 1 26 | return column 27 | 28 | 29 | class Lexer(object): 30 | 31 | tokens = ( 32 | 'BOOL', 33 | 'FLOAT', 34 | 'NUMBER', 35 | 'COMMA', 36 | 'IDENTIFIER', 37 | 'STRING_IDENTIFIER', 38 | 'EQUAL', 39 | 'STRING', 40 | 'ADD', 41 | 'MINUS', 42 | 'MULTIPLY', 43 | 'DIVIDE', 44 | 'LEFTBRACE', 45 | 'RIGHTBRACE', 46 | 'LEFTBRACKET', 47 | 'RIGHTBRACKET', 48 | 'PERIOD', 49 | 'EPLUS', 50 | 'EMINUS', 51 | 'LEFTPAREN', 52 | 'RIGHTPAREN', 53 | 'QMARK', 54 | 'COLON', 55 | 'ASTERISK_PERIOD', 56 | 'GT', 57 | 'LT', 58 | 'EQ', 59 | 'NE', 60 | 'LE', 61 | 'GE', 62 | ) 63 | 64 | states = ( 65 | ('stringdollar', 'exclusive'), 66 | ('string', 'exclusive'), 67 | ('heredoc', 'exclusive'), 68 | ('tabbedheredoc', 'exclusive'), 69 | ) 70 | 71 | def t_BOOL(self, t): 72 | r'(true)|(false)' 73 | t.value = t.value == 'true' 74 | return t 75 | 76 | def t_EMINUS(self, t): 77 | r'(?<=\d|\.)[eE]-' 78 | return t 79 | 80 | def t_EPLUS(self, t): 81 | r'(?<=\d)[eE]\+?|(?<=\d\.)[eE]\+?' 82 | return t 83 | 84 | def t_FLOAT(self, t): 85 | r'-?((\d+\.\d*)|(\d*\.\d+))' 86 | t.value = float(t.value) 87 | return t 88 | 89 | def t_hexnumber(self, t): 90 | r'-?0[xX][0-9a-fA-F]+' 91 | t.value = int(t.value, base=16) 92 | t.type = 'NUMBER' 93 | return t 94 | 95 | def t_intnumber(self, t): 96 | r'-?\d+' 97 | t.value = int(t.value) 98 | t.type = 'NUMBER' 99 | return t 100 | 101 | def t_PERIOD(self, t): 102 | r'\.' 103 | return t 104 | 105 | def t_COMMA(self, t): 106 | r',' 107 | return t 108 | 109 | def t_QMARK(self, t): 110 | r'\?' 111 | return t 112 | 113 | def t_COLON(self, t): 114 | r':' 115 | return t 116 | 117 | def t_ASTERISK_PERIOD(self, t): 118 | r'\*\.' 119 | return t 120 | 121 | def t_GT(self, t): 122 | r'(?)>(?!>|=)' 123 | return t 124 | 125 | def t_LT(self, t): 126 | r'(?=' 143 | return t 144 | 145 | def t_IDENTIFIER(self, t): 146 | r'[^\W\d][\w.-]*' 147 | t.value = text_type(t.value) 148 | return t 149 | 150 | def t_STRING_IDENTIFIER(self, t): 151 | r'"(?:[^\\"]|\\.)*"' 152 | t.value = text_type(t.value) 153 | return t 154 | 155 | def t_string(self, t): 156 | # Start of a string 157 | r'\"' 158 | # abs_start is the absolute start of the string. We use this at the end 159 | # to know how many new lines we've consumed 160 | t.lexer.abs_start = t.lexer.lexpos 161 | # rel_pos is the begining of the unconsumed part of the string. It will 162 | # get modified when consuming escaped characters 163 | t.lexer.rel_pos = t.lexer.lexpos 164 | # The value of the consumed part of the string 165 | t.lexer.string_value = u'' 166 | t.lexer.begin('string') 167 | 168 | # Strings 169 | def t_string_escapedchar(self, t): 170 | # If a quote or backslash is escaped, build up the string by ignoring 171 | # the escape character. Should this be done for other characters? 172 | r'(?<=\\)(\"|\\)' 173 | t.lexer.string_value += ( 174 | t.lexer.lexdata[t.lexer.rel_pos : t.lexer.lexpos - 2] + t.value 175 | ) 176 | t.lexer.rel_pos = t.lexer.lexpos 177 | pass 178 | 179 | def t_string_stringdollar(self, t): 180 | # Left brace preceeded by a dollar 181 | r'(?<=\$)\{' 182 | t.lexer.braces = 1 183 | t.lexer.begin('stringdollar') 184 | 185 | def t_string_ignoring(self, t): 186 | # Ignore everything except for a quote 187 | r'[^\"]' 188 | pass 189 | 190 | def t_string_STRING(self, t): 191 | # End of the string 192 | r'\"' 193 | t.value = ( 194 | t.lexer.string_value + t.lexer.lexdata[t.lexer.rel_pos : t.lexer.lexpos - 1] 195 | ) 196 | t.lexer.lineno += t.lexer.lexdata[t.lexer.abs_start : t.lexer.lexpos - 1].count( 197 | '\n' 198 | ) 199 | t.lexer.begin('INITIAL') 200 | return t 201 | 202 | def t_string_eof(self, t): 203 | t.lexer.lineno += t.lexer.lexdata[t.lexer.abs_start : t.lexer.lexpos].count( 204 | '\n' 205 | ) 206 | _raise_error(t, 'EOF before closing string quote') 207 | 208 | def t_stringdollar_dontcare(self, t): 209 | # Ignore everything except for braces 210 | r'[^\{\}]' 211 | pass 212 | 213 | def t_stringdollar_lbrace(self, t): 214 | r'\{' 215 | t.lexer.braces += 1 216 | 217 | def t_stringdollar_rbrace(self, t): 218 | r'\}' 219 | t.lexer.braces -= 1 220 | 221 | if t.lexer.braces == 0: 222 | # End of the dollar brace, back to the rest of the string 223 | t.lexer.begin('string') 224 | 225 | def t_stringdollar_eof(self, t): 226 | t.lexer.lineno += t.lexer.lexdata[t.lexer.abs_start : t.lexer.lexpos].count( 227 | '\n' 228 | ) 229 | _raise_error(t, "EOF before closing '${}' expression") 230 | 231 | def _init_heredoc(self, t): 232 | t.lexer.here_start = t.lexer.lexpos 233 | 234 | if t.value.endswith('\r\n'): 235 | t.lexer.newline_chars = 2 236 | else: 237 | t.lexer.newline_chars = 1 238 | 239 | if t.lexer.is_tabbed: 240 | # Chop '<<-' 241 | chop = 3 242 | else: 243 | # Chop '<<' 244 | chop = 2 245 | 246 | t.lexer.here_identifier = t.value[chop : -t.lexer.newline_chars] 247 | # We consumed a newline in the regex so bump the counter 248 | t.lexer.lineno += 1 249 | 250 | def t_tabbedheredoc(self, t): 251 | r'<<-\S+\r?\n' 252 | t.lexer.is_tabbed = True 253 | self._init_heredoc(t) 254 | t.lexer.begin('tabbedheredoc') 255 | 256 | def t_heredoc(self, t): 257 | r'<<\S+\r?\n' 258 | t.lexer.is_tabbed = False 259 | self._init_heredoc(t) 260 | t.lexer.begin('heredoc') 261 | 262 | def _end_heredoc(self, t): 263 | if t.lexer.is_tabbed: 264 | # Strip leading tabs 265 | value = t.value.strip() 266 | else: 267 | value = t.value 268 | 269 | if value == t.lexer.here_identifier: 270 | # Handle case where identifier is on a line of its own. Need to 271 | # subtract the current line and the newline characters from 272 | # the previous line to get the endpos 273 | endpos = t.lexer.lexpos - (t.lexer.newline_chars + len(t.value)) 274 | elif value.endswith(t.lexer.here_identifier): 275 | # Handle case where identifier is at the end of the line. Need to 276 | # subtract the identifier from to get the endpos 277 | endpos = t.lexer.lexpos - len(t.lexer.here_identifier) 278 | else: 279 | return 280 | 281 | entire_string = t.lexer.lexdata[t.lexer.here_start : endpos] 282 | 283 | if t.lexer.is_tabbed: 284 | # Get rid of any initial tabs, and remove any tabs preceded by 285 | # a new line 286 | chopped_starting_tabs = re.sub('^\t*', '', entire_string) 287 | t.value = re.sub('\n\t*', '\n', chopped_starting_tabs) 288 | else: 289 | t.value = entire_string 290 | 291 | t.lexer.lineno += t.lexer.lexdata[t.lexer.here_start : t.lexer.lexpos].count( 292 | '\n' 293 | ) 294 | t.lexer.begin('INITIAL') 295 | return t 296 | 297 | def t_tabbedheredoc_STRING(self, t): 298 | r'^\t*.+?(?=\r?$)' 299 | return self._end_heredoc(t) 300 | 301 | def t_heredoc_STRING(self, t): 302 | r'^.+?(?=\r?$)' 303 | return self._end_heredoc(t) 304 | 305 | def t_heredoc_ignoring(self, t): 306 | r'.+|\n' 307 | pass 308 | 309 | def t_heredoc_eof(self, t): 310 | t.lexer.lineno += t.lexer.lexdata[t.lexer.here_start : t.lexer.lexpos].count( 311 | '\n' 312 | ) 313 | _raise_error(t, 'EOF before closing heredoc') 314 | 315 | t_tabbedheredoc_ignoring = t_heredoc_ignoring 316 | t_tabbedheredoc_eof = t_heredoc_eof 317 | 318 | t_LEFTBRACE = r'\{' 319 | t_RIGHTBRACE = r'\}' 320 | t_LEFTBRACKET = r'\[' 321 | t_RIGHTBRACKET = r'\]' 322 | t_LEFTPAREN = r'\(' 323 | t_RIGHTPAREN = r'\)' 324 | 325 | def t_COMMENT(self, t): 326 | r'(\#|(//)).*' 327 | pass 328 | 329 | def t_MULTICOMMENT(self, t): 330 | r'/\*(.|\n)*?(\*/)' 331 | t.lexer.lineno += t.value.count('\n') 332 | pass 333 | 334 | # Define a rule so we can track line numbers 335 | def t_newline(self, t): 336 | r'\n+' 337 | t.lexer.lineno += len(t.value) 338 | 339 | t_ignore = ' \t\r\f\v' 340 | 341 | t_EQUAL = r'(?"), 247 | ("LT", "<"), 248 | ("EQ", "=="), 249 | ("NE", "!="), 250 | ("LE", "<="), 251 | ("GE", ">="), 252 | ("ASTERISK_PERIOD", "*."), 253 | 254 | # Bools 255 | ("BOOL", "true"), 256 | ("BOOL", "false"), 257 | 258 | # Identifier 259 | ("IDENTIFIER", "a"), 260 | ("IDENTIFIER", "a0"), 261 | ("IDENTIFIER", "foobar"), 262 | ("IDENTIFIER", "foo-bar"), 263 | ("IDENTIFIER", "abc123"), 264 | ("IDENTIFIER", "LGTM"), 265 | ("IDENTIFIER", "_"), 266 | ("IDENTIFIER", "_abc123"), 267 | ("IDENTIFIER", "abc123_"), 268 | ("IDENTIFIER", "_abc_123_"), 269 | ("IDENTIFIER", "_äöü"), 270 | ("IDENTIFIER", "_本"), 271 | ("IDENTIFIER", "äöü"), 272 | ("IDENTIFIER", "本"), 273 | ("IDENTIFIER", "a۰۱۸"), 274 | ("IDENTIFIER", "foo६४"), 275 | ("IDENTIFIER", "bar9876"), 276 | 277 | # Heredoc 278 | ("STRING", "<