├── .github └── workflows │ ├── release.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── README.md ├── neon ├── __init__.py ├── decoder.py ├── encoder.py ├── entity.py ├── errors.py ├── tokens.py └── utils.py ├── pyproject.toml └── tests ├── __init__.py ├── requirements.txt ├── test_data_structures.py ├── test_decoder.py ├── test_edge_cases.py ├── test_errors.py └── test_types.py /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | release: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: '3.9' 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools setuptools_scm wheel build 20 | - name: Build package 21 | run: python -m build 22 | - name: Publish package 23 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 24 | with: 25 | user: __token__ 26 | password: ${{ secrets.PYPI_API_TOKEN }} 27 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | 11 | lint: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python 3.9 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: '3.9' 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install pre-commit 22 | - name: Lint with pre-commit 23 | run: | 24 | pre-commit run -a --show-diff-on-failure 25 | 26 | test: 27 | needs: [lint] 28 | runs-on: ubuntu-latest 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | python-version: [3.7, 3.8, 3.9] 33 | 34 | steps: 35 | - uses: actions/checkout@v2 36 | - name: Set up Python ${{ matrix.python-version }} 37 | uses: actions/setup-python@v2 38 | with: 39 | python-version: ${{ matrix.python-version }} 40 | - name: Install dependencies 41 | run: | 42 | python -m pip install --upgrade pip setuptools setuptools_scm 43 | python -m pip install .[test] 44 | - name: Test with pytest 45 | run: | 46 | pytest tests/ 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated version file 2 | /neon/version.py 3 | 4 | *.py[co] 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | 19 | # Installer logs 20 | pip-log.txt 21 | 22 | # Unit test / coverage reports 23 | .coverage 24 | .tox 25 | .noseids 26 | 27 | # Translations 28 | # *.mo 29 | 30 | # Mr Developer 31 | .mr.developer.cfg 32 | 33 | # Sublime Text 34 | *.sublime* 35 | 36 | # Eclipse 37 | .settings 38 | 39 | # Environment 40 | .env 41 | .envrc 42 | .direnv 43 | 44 | # Sphinx 45 | doctrees 46 | build 47 | .buildinfo 48 | 49 | # Celery 50 | celerybeat-schedule 51 | celerybeat-schedule.db 52 | 53 | # Database 54 | dump.rdb 55 | 56 | # Tests 57 | .cache 58 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.0.1 7 | hooks: 8 | - id: check-yaml 9 | - id: end-of-file-fixer 10 | - id: trailing-whitespace 11 | exclude: ^.*\.(md|rst)$ 12 | - id: debug-statements 13 | - id: mixed-line-ending 14 | - id: check-merge-conflict 15 | 16 | - repo: https://github.com/ambv/black 17 | rev: 22.3.0 18 | hooks: 19 | - id: black 20 | types: [python] 21 | 22 | - repo: https://github.com/timothycrosley/isort 23 | rev: 5.10.1 24 | hooks: 25 | - id: isort 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Pavel Dedik 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the {organization} nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst LICENSE 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | NEON for Python 2 | =============== 3 | 4 | [![Test](https://github.com/paveldedik/neon-py/actions/workflows/test.yml/badge.svg)](https://github.com/paveldedik/neon-py/actions/workflows/test.yml) 5 | 6 | NEON is very similar to YAML. The main difference is that the NEON supports 7 | "entities"(so can be used e.g.to parse phpDoc annotations) and tab characters 8 | for indentation. NEON syntax is a little simpler. 9 | 10 | Example of Neon code: 11 | 12 | ```yaml 13 | # neon example 14 | 15 | name: Homer 16 | 17 | address: 18 | 19 | street: 742 Evergreen Terrace 20 | city: Springfield 21 | 22 | children: 23 | 24 | + Bart 25 | + Lisa 26 | + Maggie 27 | 28 | entity: Column(type=integer) 29 | ``` 30 | 31 | Installation 32 | ------------ 33 | 34 | To install NEON parser for Python, simply run: 35 | 36 | ```bash 37 | pip install neon-py 38 | ``` 39 | 40 | Quick start 41 | ----------- 42 | 43 | Decoding NEON config files is super easy: 44 | 45 | ```python 46 | import neon 47 | 48 | with open('/path/to/config.neon', 'r') as fd: 49 | config = neon.decode(fd.read()) 50 | ``` 51 | 52 | Links 53 | ----- 54 | 55 | * [Neon sandbox](http://ne-on.org) 56 | * [Neon for PHP](https://github.com/nette/neon) 57 | * [Neon for Javascript](https://github.com/matej21/neon-js) 58 | -------------------------------------------------------------------------------- /neon/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = "Pavel Dedik" 2 | from .decoder import parse 3 | from .encoder import to_string 4 | from .version import version as __version__ 5 | 6 | __all__ = ("decode", "encode") 7 | 8 | 9 | def decode(config): 10 | try: 11 | string = config.read() 12 | except AttributeError: 13 | string = config 14 | return parse(string) 15 | 16 | 17 | def encode(tree): 18 | return to_string(tree) 19 | -------------------------------------------------------------------------------- /neon/decoder.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from more_itertools import peekable 4 | 5 | from . import errors 6 | from .tokens import ( 7 | TOKENS, 8 | Dedent, 9 | End, 10 | Indent, 11 | LeftBrace, 12 | LeftRound, 13 | LeftSquare, 14 | NewLine, 15 | RightBrace, 16 | RightRound, 17 | RightSquare, 18 | ) 19 | from .utils import lstripped 20 | 21 | #: Flags to use in the Scanner class. 22 | SCANNER_FLAGS = re.MULTILINE | re.UNICODE | re.VERBOSE 23 | 24 | 25 | def _tokenize(input_string): 26 | position = len(lstripped(input_string)) + 1 27 | tokens, _ = _scanner.scan(input_string.strip()) 28 | tokens = peekable(tokens) 29 | 30 | curr_indent = 0 31 | indent_stack = [0] 32 | newline_last = False 33 | inside_bracket = 0 34 | 35 | while tokens: 36 | indent_change = 0 37 | tok = next(tokens) 38 | tok.line = position 39 | 40 | # If inside brackets, no Indent/Dedent tokens are yielded. 41 | if tok.id in (LeftRound.id, LeftSquare.id, LeftBrace.id): 42 | inside_bracket += 1 43 | elif tok.id in (RightRound.id, RightSquare.id, RightBrace.id): 44 | inside_bracket -= 1 45 | 46 | # Determination of current indentation and indentation change 47 | # is necessary for correct generation of the Indent/Dedent tokens. 48 | if newline_last and not inside_bracket: 49 | indent = tok.value if tok.id == Indent.id else 0 50 | if indent != curr_indent: 51 | indent_change = indent - curr_indent 52 | curr_indent = indent 53 | 54 | # Here we determine the position of a token in the input string. 55 | if tok.id == NewLine.id: 56 | newline_last = True 57 | while tokens.peek().id == NewLine.id: 58 | tok.value = next(tokens).value 59 | position += tok.value 60 | else: 61 | newline_last = False 62 | 63 | # If indentation decreased we want to generate the needed dedent 64 | # tokens. These tokens are instantiated here as they cannot be 65 | # matched by regular expression. 66 | if indent_change < 0: 67 | while indent_stack[-1] > curr_indent: 68 | yield Dedent(indent_stack.pop(), line=position) 69 | yield NewLine(1, line=position) 70 | 71 | # If indentation increased we want to yield the indent token. 72 | if indent_change > 0: 73 | indent_stack.append(curr_indent) 74 | yield tok 75 | 76 | # We don't want to yield any other Indent tokens as our goal is 77 | # to represent the left/right braces with Indent/Dedent tokens. 78 | if tok.id != Indent.id and not (inside_bracket and tok.id == NewLine.id): 79 | yield tok 80 | 81 | while len(indent_stack) > 1: 82 | yield Dedent(indent_stack.pop(), line=position) 83 | 84 | 85 | class tokenize(peekable): 86 | """Tokenizes an input string. 87 | 88 | :param input_string: String to be tokenized. 89 | :type input_string: str 90 | :return: List of pairs (token type, value). 91 | """ 92 | 93 | def __init__(self, input_string): 94 | tokens = _tokenize(str(input_string)) 95 | super(tokenize, self).__init__(tokens) 96 | 97 | def next(self): 98 | try: 99 | return self.__next__() 100 | except (StopIteration, RuntimeError): 101 | return End() 102 | 103 | def advance(self, allowed=None, skip=None): 104 | """Helper for iterating through tokens. 105 | 106 | :param allowed: Optional list of allowed tokens. Default is 107 | any token. If the found token is not allowed, the function 108 | raises syntax error. 109 | :type allowed: :class:`Token` or iterable of tokens 110 | :param skip: If specified, a sequence of given token types 111 | is skipped first. Default is :obj:`False`. 112 | :type skip: Token | list[Token] 113 | """ 114 | tok = self.next() 115 | if skip is not None: 116 | skips = ( 117 | map(lambda tok: tok.id, skip) 118 | if isinstance(skip, (list, tuple)) 119 | else {skip.id} 120 | ) 121 | while tok.id in skips: 122 | tok = self.next() 123 | if allowed is None: 124 | return tok 125 | if not isinstance(allowed, (list, tuple)): 126 | allowed = [allowed] 127 | if all(tok.id != Token.id for Token in allowed): 128 | raise_error(allowed, tok) 129 | return tok 130 | 131 | def peek(self): 132 | try: 133 | return super(tokenize, self).peek() 134 | except (StopIteration, RuntimeError): 135 | return End() 136 | 137 | 138 | def raise_error(expected, token): 139 | """Raises an error with some information about position etc. 140 | 141 | :param expected: List of expected tokens. 142 | :param token: Received token. 143 | :raises: :class:`errors.ParserError` 144 | """ 145 | msg = "Unexpected {}".format(token.name) 146 | if token.line: 147 | msg += " on line {}".format(token.line) 148 | if expected and token.id != Indent.id: 149 | allowed_list = [Token.name for Token in expected if Token.re] 150 | if allowed_list: 151 | tok_msg = " or ".join(allowed_list) 152 | msg += ", expected {}".format(tok_msg) 153 | raise errors.ParserError(msg + ".") 154 | 155 | 156 | def parse(input_string): 157 | """Parses given string according to NEON syntax. 158 | 159 | :param input_string: String to parse. 160 | :type input_string: string 161 | :return: Parsed string. 162 | :rtype: :class:`dict` 163 | """ 164 | tokens = tokenize(input_string) 165 | return Indent().parse(tokens) 166 | 167 | 168 | #: The Scanner is instantiated with a list of re's and associated 169 | #: functions. It is used to scan a string, returning a list of parts 170 | #: which match the given re's. 171 | #: 172 | #: See: http://stackoverflow.com/a/17214398/2874089 173 | _scanner = re.Scanner( 174 | [TokenClass.getscan() for TokenClass in TOKENS if TokenClass.re is not None], 175 | flags=SCANNER_FLAGS, 176 | ) 177 | -------------------------------------------------------------------------------- /neon/encoder.py: -------------------------------------------------------------------------------- 1 | def format_list(list_, indent_level): 2 | indent = "\t" * indent_level 3 | newline = "\n" if indent_level else "\n\n" 4 | return newline.join( 5 | ["{}- {}".format(indent, to_string(value, indent_level + 1)) for value in list_] 6 | ) 7 | 8 | 9 | def format_dict(dict_, indent_level): 10 | indent = "\t" * indent_level 11 | newline = "\n" if indent_level else "\n\n" 12 | return newline.join( 13 | [ 14 | "{}{}: {}".format(indent, key, to_string(value, indent_level + 1)) 15 | for key, value in dict_.items() 16 | ] 17 | ) 18 | 19 | 20 | def to_string(obj, indent_level=0): 21 | """Encodes given object using the NEON syntax. 22 | 23 | :param obj: Object to encode. 24 | :return: Encoded object. 25 | :rtype: string 26 | """ 27 | if isinstance(obj, dict): 28 | return "\n" + format_dict(obj, indent_level) 29 | elif isinstance(obj, list): 30 | return "\n" + format_list(obj, indent_level) 31 | elif obj is None: 32 | return "Null" 33 | return str(obj) 34 | -------------------------------------------------------------------------------- /neon/entity.py: -------------------------------------------------------------------------------- 1 | class Entity(object): 2 | """Representation of Foo(bar=1) literal.""" 3 | 4 | def __init__(self, value=None, attrs=None): 5 | self.value = value 6 | self.attributes = dict(attrs) or {} 7 | 8 | def __repr__(self): 9 | keywords = ", ".join( 10 | [ 11 | "{}={}".format(key, value) if pos != key else str(value) 12 | for pos, (key, value) in enumerate(self.attributes.items()) 13 | ] 14 | ) 15 | return "{}({})".format(self.value, keywords) 16 | 17 | def __str__(self): 18 | return repr(self) 19 | 20 | def __eq__(self, other): 21 | return self.value == other.value and self.attributes == other.attributes 22 | -------------------------------------------------------------------------------- /neon/errors.py: -------------------------------------------------------------------------------- 1 | class TokenError(Exception): 2 | """Raised when tokenization ends up with an error.""" 3 | 4 | 5 | class ParserError(Exception): 6 | """Raised when parsing ends up with an error.""" 7 | -------------------------------------------------------------------------------- /neon/tokens.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import dateutil.parser 4 | 5 | from . import errors 6 | from .entity import Entity 7 | from .utils import camel_case_to_underscore, classproperty, variants 8 | 9 | #: List of all tokens. 10 | TOKENS = [] 11 | 12 | 13 | def token(cls): 14 | """Registers a token class.""" 15 | assert issubclass(cls, Token), "Tokens must subclass the Token class." 16 | TOKENS.append(cls) 17 | return cls 18 | 19 | 20 | class Token(object): 21 | """Token representation.""" 22 | 23 | #: Regular expression for tokenization. 24 | re = None 25 | 26 | #: Unique ID of the token. 27 | id = None 28 | 29 | @classproperty 30 | def name(cls): 31 | return camel_case_to_underscore(cls.__name__).replace("_", " ") 32 | 33 | def __init__(self, value=None, line=None): 34 | self.value = value 35 | self.line = line 36 | 37 | def parse(self, tokens): 38 | return self.value 39 | 40 | def __eq__(self, other): 41 | return type(self) == type(other) and self.value == other.value 42 | 43 | def __str__(self): 44 | name = type(self).__name__ 45 | value = "" if self.value is None else self.value 46 | return "{}({})".format(name, value) 47 | 48 | def __repr__(self): 49 | return str(self) 50 | 51 | @classmethod 52 | def do(cls, scanner, string): 53 | return cls(string) 54 | 55 | @classmethod 56 | def getscan(cls): 57 | return (cls.re, cls.do) 58 | 59 | 60 | class Primitive(Token): 61 | """Represents primitive type.""" 62 | 63 | def parse(self, tokens): 64 | peek = tokens.peek() 65 | if peek.id == LeftRound.id: 66 | attributes = tokens.advance().parse(tokens) 67 | return Entity(self.value, attributes) 68 | return self.value 69 | 70 | 71 | @token 72 | class String(Primitive): 73 | """Represents string token.""" 74 | 75 | re = r""" 76 | (?: "(?:\\.|[^"\\])*" | '(?:\\.|[^'\\])*' ) 77 | """ 78 | id = "str" 79 | 80 | @classmethod 81 | def do(cls, scanner, string): 82 | double = '"' 83 | single = "'" 84 | if string[0] == double: 85 | string = string.strip(double).replace(r"\"", '"') 86 | else: 87 | string = string.strip(single).replace(r"\'", "'") 88 | # TODO: refactor to deal with \t, \n, \r, \xXX, \uXXXX etc 89 | string = string.replace("\\\\", "\\") 90 | return cls(string) 91 | 92 | 93 | @token 94 | class Integer(Primitive): 95 | """Represents integer token.""" 96 | 97 | re = None 98 | id = "int" 99 | 100 | @classmethod 101 | def convert(cls, string): 102 | try: 103 | return int(string, 0) 104 | except ValueError: 105 | return 106 | 107 | 108 | @token 109 | class Float(Primitive): 110 | """Represents float token.""" 111 | 112 | re = None 113 | id = "float" 114 | 115 | @classmethod 116 | def convert(cls, string): 117 | try: 118 | return float(string) 119 | except ValueError: 120 | return 121 | 122 | 123 | @token 124 | class Boolean(Primitive): 125 | """Represents boolean token.""" 126 | 127 | re = None 128 | id = "bool" 129 | 130 | _mapping = { 131 | True: variants("true", "yes", "on"), 132 | False: variants("false", "no", "off"), 133 | } 134 | 135 | @classmethod 136 | def convert(cls, string): 137 | for value, alternatives in cls._mapping.items(): 138 | if string in alternatives: 139 | return value 140 | 141 | 142 | @token 143 | class NoneValue(Primitive): 144 | """Represents :obj:`None` token.""" 145 | 146 | re = None 147 | id = "none" 148 | 149 | _variants = variants("null") 150 | 151 | 152 | @token 153 | class DateTime(Primitive): 154 | """Represents datetime token.""" 155 | 156 | re = None 157 | id = "datetime" 158 | 159 | @classmethod 160 | @functools.lru_cache(maxsize=None) 161 | def convert(cls, string): 162 | try: 163 | return dateutil.parser.parse(string) 164 | except (ValueError, TypeError): 165 | return 166 | 167 | 168 | @token 169 | class Literal(Token): 170 | """Represents literal token.""" 171 | 172 | re = r""" 173 | (?: [^#"',:=[\]{}()\x00-\x20!`-] | [:-][^"',\]})\s] ) 174 | (?: [^,:=\]})(\x00-\x20]+ | :(?! [\s,\]})] | $ ) | 175 | [\ \t]+ [^#,:=\]})(\x00-\x20] )* 176 | """ 177 | id = "literal" 178 | 179 | @classmethod 180 | def do(cls, scanner, string): 181 | for Type in [Integer, Float, Boolean, DateTime]: 182 | value = Type.convert(string) 183 | if value is not None: 184 | return Type(value) 185 | if string in NoneValue._variants: 186 | return NoneValue(None) 187 | return String(string) 188 | 189 | 190 | class Symbol(Token): 191 | """Represents symbol token.""" 192 | 193 | id = "symbol" 194 | 195 | @classproperty 196 | def name(cls): 197 | return "'{}'".format(str(cls.re).replace("\\", "")) 198 | 199 | @classmethod 200 | def do(cls, scanner, string): 201 | return cls() 202 | 203 | 204 | @token 205 | class Comma(Symbol): 206 | """Represents comma token.""" 207 | 208 | re = r"," 209 | id = "comma" 210 | 211 | 212 | @token 213 | class Colon(Symbol): 214 | """Represents colon token.""" 215 | 216 | re = r":" 217 | id = "colon" 218 | 219 | 220 | @token 221 | class EqualSign(Symbol): 222 | """Represents equal sign.""" 223 | 224 | re = r"=" 225 | id = "eq" 226 | 227 | 228 | @token 229 | class Hyphen(Symbol): 230 | """Represents hyphen token.""" 231 | 232 | re = r"-" 233 | id = "hyphen" 234 | 235 | 236 | @token 237 | class LeftRound(Symbol): 238 | """Represents left round bracket.""" 239 | 240 | re = r"\(" 241 | id = "leftround" 242 | 243 | def parse(self, tokens): 244 | data = {} 245 | tok = tokens.advance(skip=NewLine) 246 | iteration = 0 247 | 248 | while tok.id != RightRound.id: 249 | key = tok.parse(tokens) 250 | tok = tokens.advance((EqualSign, Comma, RightRound)) 251 | 252 | if tok.id == EqualSign.id: 253 | data[key] = tokens.advance().parse(tokens) 254 | tok = tokens.advance((Comma, RightRound)) 255 | if tok.id == Comma.id: 256 | tok = tokens.advance(skip=NewLine) 257 | 258 | elif tok.id == Comma.id: 259 | data[iteration] = key 260 | tok = tokens.advance(skip=NewLine) 261 | 262 | elif tok.id == RightRound.id: 263 | data[iteration] = key 264 | 265 | iteration += 1 266 | 267 | return data 268 | 269 | 270 | @token 271 | class RightRound(Symbol): 272 | """Represents right round bracket.""" 273 | 274 | re = r"\)" 275 | id = "rightround" 276 | 277 | 278 | @token 279 | class LeftSquare(Symbol): 280 | """Represents left square bracket.""" 281 | 282 | re = r"\[" 283 | id = "leftsquare" 284 | 285 | def parse(self, tokens): 286 | data = [] 287 | tok = tokens.advance(skip=NewLine) 288 | 289 | while tok.id != RightSquare.id: 290 | value = tok.parse(tokens) 291 | data.append(value) 292 | 293 | tok = tokens.advance((Comma, RightSquare)) 294 | if tok.id == Comma.id: 295 | tok = tokens.advance(skip=NewLine) 296 | 297 | return data 298 | 299 | 300 | @token 301 | class RightSquare(Symbol): 302 | """Represents right square bracket.""" 303 | 304 | re = r"\]" 305 | id = "rightsquare" 306 | 307 | 308 | @token 309 | class LeftBrace(Symbol): 310 | """Represents left brace.""" 311 | 312 | re = r"{" 313 | id = "leftbrace" 314 | 315 | def parse(self, tokens): 316 | data = {} 317 | tok = tokens.advance(skip=NewLine) 318 | 319 | while tok.id != RightBrace.id: 320 | key = tok.parse(tokens) 321 | tokens.advance(Colon) 322 | data[key] = tokens.advance().parse(tokens) 323 | 324 | tok = tokens.advance((Comma, RightBrace)) 325 | if tok.id == Comma.id: 326 | tok = tokens.advance(skip=NewLine) 327 | 328 | return data 329 | 330 | 331 | @token 332 | class RightBrace(Symbol): 333 | """Represents right brace.""" 334 | 335 | re = r"}" 336 | id = "rightbrace" 337 | 338 | 339 | @token 340 | class Comment(Token): 341 | """Represents comment token.""" 342 | 343 | re = r"\s*\#.*" 344 | id = "comment" 345 | do = None # ignore comments 346 | 347 | 348 | @token 349 | class Indent(Token): 350 | """Represents indent token.""" 351 | 352 | re = r"^[\t\ ]+" 353 | id = "indent" 354 | 355 | def _parse_list(self, tokens, tok): 356 | data = [] 357 | 358 | while tok.id not in [Dedent.id, End.id]: 359 | while tok.id == Hyphen.id: 360 | old_tok = tok 361 | tok = tokens.advance(skip=(NewLine, Indent)) 362 | # in this case, the list looks like this: 363 | # - 364 | # - a 365 | if old_tok.id == tok.id == Hyphen.id: 366 | data.append(None) 367 | if tokens.peek().id == Colon.id: 368 | tokens.advance() 369 | key = tok.parse(tokens) 370 | tok = tokens.advance(skip=NewLine) 371 | value = {key: tok.parse(tokens)} 372 | else: 373 | value = tok.parse(tokens) 374 | data.append(value) 375 | 376 | tok = tokens.advance((End, NewLine, Dedent)) 377 | if tok.id == NewLine.id: 378 | tok = tokens.advance((Hyphen, Dedent)) 379 | 380 | return data 381 | 382 | def _parse_dict(self, tokens, tok): 383 | data = {} 384 | 385 | while tok.id not in [Dedent.id, End.id]: 386 | key = tok.parse(tokens) 387 | tokens.advance(Colon) 388 | 389 | tok = tokens.advance() 390 | if tok.id == NewLine.id: 391 | tok = tokens.advance() 392 | if tok.id not in [Indent.id, Dedent.id]: 393 | data[key] = None 394 | continue 395 | data[key] = tok.parse(tokens) 396 | 397 | tok = tokens.advance((End, NewLine, Dedent)) 398 | if tok.id == NewLine.id: 399 | tok = tokens.advance(skip=NewLine) 400 | 401 | return data 402 | 403 | def parse(self, tokens): 404 | tok = tokens.advance() 405 | 406 | while tok.id == NewLine.id: 407 | tok = tokens.advance() 408 | 409 | if tok.id == Hyphen.id: 410 | return self._parse_list(tokens, tok) 411 | elif tokens.peek().id == End.id: 412 | return tok.parse(tokens) 413 | else: 414 | return self._parse_dict(tokens, tok) 415 | 416 | @classmethod 417 | def do(cls, scanner, string): 418 | return cls(len(string)) 419 | 420 | 421 | @token 422 | class Dedent(Token): 423 | """Represents dedent token.""" 424 | 425 | re = None # this token is generated after the scanning procedure 426 | id = "dedent" 427 | 428 | 429 | @token 430 | class NewLine(Token): 431 | """Represents new line token.""" 432 | 433 | re = r"[\n]+" 434 | id = "newline" 435 | 436 | @classmethod 437 | def do(cls, scanner, string): 438 | return cls(len(string)) 439 | 440 | 441 | @token 442 | class WhiteSpace(Token): 443 | """Represents comment token.""" 444 | 445 | re = r"[\t\ ]+" 446 | id = "whitespace" 447 | do = None # ignore white-spaces 448 | 449 | 450 | @token 451 | class Unknown(Token): 452 | """Represents unknown character sequence match.""" 453 | 454 | re = r".*" 455 | id = "unknown" 456 | 457 | @classmethod 458 | def do(cls, scanner, token): 459 | msg = "Unknown character sequence: {!r}" 460 | raise errors.TokenError(msg.format(token)) 461 | 462 | 463 | @token 464 | class End(Token): 465 | """Represents EOL token.""" 466 | 467 | re = None 468 | id = "end" 469 | name = "end of file" 470 | -------------------------------------------------------------------------------- /neon/utils.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import re 3 | 4 | 5 | class classproperty(object): 6 | """Useful when class properties need to be defined.""" 7 | 8 | def __init__(self, fget): 9 | self.fget = fget 10 | 11 | def __get__(self, obj, cls=None): 12 | if cls is None: 13 | cls = type(obj) 14 | return self.fget(cls) 15 | 16 | 17 | def lstripped(string): 18 | """Number of potentially stripped characters on left. 19 | 20 | :return: A string of characters that will be stripped 21 | on the left side of the given string if the method 22 | `strip()` is called on it. 23 | :rtype: string 24 | """ 25 | return "".join(itertools.takewhile(str.isspace, string)) 26 | 27 | 28 | def variants(*strings): 29 | """Creates three variants of each string: 30 | 31 | - lowercase (e.g. `husky`) 32 | - title version (e.g. `Husky`) 33 | - uppercase (e.g. `HUSKY`) 34 | 35 | :return: A list of all variants of all given strings. 36 | :rtype: list 37 | """ 38 | result = [] 39 | for string in strings: 40 | lowercase = string.lower() 41 | result += [lowercase, lowercase.title(), string.upper()] 42 | return result 43 | 44 | 45 | def camel_case_to_underscore(name): 46 | """Converts string from camel case notation to underscore. 47 | 48 | :param name: String to convert to underscore. 49 | :type name: string 50 | :return: A string converted from camel case to underscore. 51 | :rtype: string 52 | """ 53 | s1 = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", name) 54 | return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1).lower() 55 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.2.0", "wheel", "setuptools_scm[toml]>=3.4.3"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "neon-py" 7 | authors = [{name = "Pavel Dedik", email = "dedikx@gmail.com"}] 8 | license = {text = "BSD"} 9 | description = "NEON parser for Python" 10 | readme = "README.md" 11 | keywords = ["neon", "parser", "config file"] 12 | classifiers = [ 13 | "Programming Language :: Python", 14 | "Operating System :: OS Independent", 15 | "Topic :: Utilities", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.7", 18 | "Programming Language :: Python :: 3.8", 19 | "Programming Language :: Python :: 3.9", 20 | ] 21 | urls = {Homepage = "https://github.com/paveldedik/neon-py"} 22 | dependencies = ["python-dateutil", "more-itertools"] 23 | dynamic = ["version"] 24 | 25 | [project.optional-dependencies] 26 | test = ["pytest"] 27 | testing = ["pytest"] 28 | 29 | [tool.setuptools] 30 | zip-safe = false 31 | include-package-data = true 32 | 33 | [tool.setuptools.packages.find] 34 | exclude = ["tests"] 35 | namespaces = false 36 | 37 | [tool.setuptools_scm] 38 | write_to = "neon/version.py" 39 | 40 | [tool.black] 41 | target_version = ["py39"] 42 | 43 | [tool.isort] 44 | # config compatible with Black 45 | profile = "black" 46 | line_length = 100 47 | default_section = "THIRDPARTY" 48 | include_trailing_comma = true 49 | known_first_party = "neon" 50 | 51 | [tool.tox] 52 | legacy_tox_ini = """ 53 | [tox] 54 | isolated_build = True 55 | envlist = pylint,py{37,38,39} 56 | 57 | [testenv] 58 | deps = .[test] 59 | commands = pytest {posargs:} 60 | """ 61 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paveldedik/neon-py/3986908381953713d702ba6efa56f4ebfe954672/tests/__init__.py -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | -------------------------------------------------------------------------------- /tests/test_data_structures.py: -------------------------------------------------------------------------------- 1 | import neon 2 | 3 | NEON_SIMPLE_DICT = """ 4 | a: b 5 | c: d 6 | """ 7 | 8 | 9 | def test_simple_dict(): 10 | assert neon.decode(NEON_SIMPLE_DICT) == {"a": "b", "c": "d"} 11 | 12 | 13 | NEON_SIMPLE_LIST = """ 14 | - a 15 | - b 16 | """ 17 | 18 | 19 | def test_simple_list(): 20 | assert neon.decode(NEON_SIMPLE_LIST) == ["a", "b"] 21 | 22 | 23 | NEON_SIMPLE = """ 24 | a: 25 | - 26 | - d 27 | b: 28 | e: 29 | g: h 30 | """ 31 | 32 | 33 | def test_simple_none(): 34 | expected = {"a": [None, "d"], "b": {"e": None, "g": "h"}} 35 | assert neon.decode(NEON_SIMPLE) == expected 36 | 37 | 38 | NEON_LIST_OF_DICTS = """ 39 | - a: 40 | - b: False 41 | - d: [1] 42 | """ 43 | 44 | 45 | def test_list_of_dicts(): 46 | expected = [{"a": [{"b": False}]}, {"d": [1]}] 47 | assert neon.decode(NEON_LIST_OF_DICTS) == expected 48 | 49 | 50 | NEON_DATA_STRUCTURES = """ 51 | list: [1, a, 52 | [v, True] 53 | ] 54 | dict1: ( 55 | a=5, 56 | b={1: [True]}, 57 | ) 58 | dict2: { 59 | d: 8, 60 | e: {Null: off}, 61 | } 62 | """ 63 | 64 | 65 | def test_data_structures(): 66 | expected = { 67 | "list": [1, "a", ["v", True]], 68 | "dict1": {"a": 5, "b": {1: [True]}}, 69 | "dict2": {"d": 8, "e": {None: False}}, 70 | } 71 | assert neon.decode(NEON_DATA_STRUCTURES) == expected 72 | 73 | 74 | NEON_EMPTY_DATA_STRUCTURES = """ 75 | - {} 76 | - [] 77 | - () 78 | - Tree() 79 | """ 80 | 81 | 82 | def test_empty_data_structures(): 83 | expected = [{}, [], {}, neon.entity.Entity("Tree", {})] 84 | assert neon.decode(NEON_EMPTY_DATA_STRUCTURES) == expected 85 | 86 | 87 | NEON_INDENTED_COMMENTS = """ 88 | root: 89 | # comment 1 90 | - "aaa" 91 | # comment 2 92 | - "bbb" 93 | """ 94 | 95 | 96 | def test_indented_comments(): 97 | assert neon.decode(NEON_INDENTED_COMMENTS) == {"root": ["aaa", "bbb"]} 98 | 99 | 100 | NEON_INDENTED_LIST_OF_DICTS = """ 101 | - 102 | a: b 103 | """ 104 | 105 | 106 | def test_indented_list_dict(): 107 | assert neon.decode(NEON_INDENTED_LIST_OF_DICTS) == [{"a": "b"}] 108 | -------------------------------------------------------------------------------- /tests/test_decoder.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import neon 4 | 5 | NEON_DECODE_SAMPLE = """ 6 | # neon file - edit it now! 7 | 8 | name: Homer 9 | 10 | address: 11 | street: 742 Evergreen Terrace 12 | city: "Springfield" 13 | 14 | #asdf 15 | country: 16 | - a 17 | whatever: 18 | - b 19 | 20 | phones: { home: 555-6528, work: { 21 | asdf: 555-7334, 22 | wtf: 1234, 23 | } 24 | } 25 | 26 | whoa: [a, b, c, 1e5, 0x22, 2014-01-01] 27 | 28 | children: 29 | - Bart 30 | - Lisa 31 | - Maggie 32 | - (type=whatever, wtf=(wtf=5)) 33 | 34 | entity: Column(type=integer) 35 | 36 | special: "#characters put in quotes" 37 | 38 | # this is a comment 39 | """ 40 | 41 | 42 | def test_decode_sample(): 43 | expected = { 44 | "name": "Homer", 45 | "address": { 46 | "street": "742 Evergreen Terrace", 47 | "city": "Springfield", 48 | "country": ["a"], 49 | "whatever": ["b"], 50 | }, 51 | "phones": { 52 | "home": "555-6528", 53 | "work": { 54 | "asdf": "555-7334", 55 | "wtf": 1234, 56 | }, 57 | }, 58 | "whoa": ["a", "b", "c", 100000.0, 34, datetime(2014, 1, 1, 0, 0)], 59 | "children": [ 60 | "Bart", 61 | "Lisa", 62 | "Maggie", 63 | { 64 | "type": "whatever", 65 | "wtf": {"wtf": 5}, 66 | }, 67 | ], 68 | "entity": neon.entity.Entity("Column", {"type": "integer"}), 69 | "special": "#characters put in quotes", 70 | } 71 | assert neon.decode(NEON_DECODE_SAMPLE) == expected 72 | 73 | 74 | NEON_UTF8_SUPPORT = """ 75 | - ěšíčťľĺ 76 | - 5 × 6 ÷ 7 ± ∞ - π 77 | """ 78 | 79 | 80 | def test_utf8_support(): 81 | expected = ["ěšíčťľĺ", "5 × 6 ÷ 7 ± ∞ - π"] 82 | assert neon.decode(NEON_UTF8_SUPPORT) == expected 83 | -------------------------------------------------------------------------------- /tests/test_edge_cases.py: -------------------------------------------------------------------------------- 1 | import neon 2 | 3 | NEON_INDENTED_LIST_VALUE = """ 4 | - 5 | aaa 6 | """ 7 | 8 | 9 | def test_indented_list_value(): 10 | assert neon.decode(NEON_INDENTED_LIST_VALUE) == ["aaa"] 11 | 12 | 13 | NEON_SIMPLE_VALUE = """ 14 | hello 15 | """ 16 | 17 | 18 | def test_simple_value(): 19 | assert neon.decode(NEON_SIMPLE_VALUE) == "hello" 20 | 21 | 22 | NEON_SIMPLE_LIST_VALUE = """ 23 | - 24 | """ 25 | 26 | 27 | def test_simple_list_value(): 28 | assert neon.decode(NEON_SIMPLE_LIST_VALUE) == [None] 29 | -------------------------------------------------------------------------------- /tests/test_errors.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import neon 4 | from neon import errors 5 | 6 | NEON_ERROR_COLON1 = "a: (a: B)" 7 | NEON_ERROR_COLON2 = "a: [1: 2]" 8 | 9 | NEON_ERROR_COLON1_MSG = "Unexpected ':' on line 1, expected '=' or ',' or ')'." 10 | NEON_ERROR_COLON2_MSG = "Unexpected ':' on line 1, expected ',' or ']'." 11 | 12 | 13 | def test_error_colons(): 14 | with pytest.raises(errors.ParserError) as excinfo: 15 | neon.decode(NEON_ERROR_COLON1) 16 | assert str(excinfo.value) == NEON_ERROR_COLON1_MSG 17 | 18 | with pytest.raises(errors.ParserError) as excinfo: 19 | neon.decode(NEON_ERROR_COLON2) 20 | assert str(excinfo.value) == NEON_ERROR_COLON2_MSG 21 | 22 | 23 | NEON_BAD_INDENT = """ 24 | a: 25 | - b 26 | - c 27 | """ 28 | NEON_BAD_INDENT_MSG = "Unexpected indent on line 4." 29 | 30 | 31 | def test_bad_indent(): 32 | with pytest.raises(errors.ParserError) as excinfo: 33 | neon.decode(NEON_BAD_INDENT) 34 | assert str(excinfo.value) == NEON_BAD_INDENT_MSG 35 | 36 | 37 | NEON_UNEXPECTED_END = "a: [" 38 | NEON_UNEXPECTED_END_MSG = "Unexpected end of file, expected ',' or ']'." 39 | 40 | 41 | def test_unexpected_end(): 42 | with pytest.raises(errors.ParserError) as excinfo: 43 | neon.decode(NEON_UNEXPECTED_END) 44 | assert str(excinfo.value) == NEON_UNEXPECTED_END_MSG 45 | -------------------------------------------------------------------------------- /tests/test_types.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from dateutil.tz import tz 4 | 5 | import neon 6 | 7 | NEON_ENTITY = """ 8 | entity: Column(something, type=int) 9 | """ 10 | 11 | 12 | def test_entity(): 13 | expected = {"entity": neon.entity.Entity("Column", {0: "something", "type": "int"})} 14 | assert neon.decode(NEON_ENTITY) == expected 15 | 16 | 17 | NEON_TYPES = """ 18 | string: "a () #' text" 19 | integer: 5902 20 | hexint: 0xAA 21 | octint: 0o666 22 | binint: 0b111000111 23 | float: 5.234 24 | floatbig: 5e10 25 | nones: [NULL, null, Null] 26 | bools: [TRUE, True, true, YES, Yes, yes, ON, On, on, 27 | FALSE, False, false, NO, No, no, OFF, Off, off] 28 | """ 29 | 30 | 31 | def test_types(): 32 | expected = { 33 | "string": "a () #' text", 34 | "integer": 5902, 35 | "hexint": 0xAA, 36 | "octint": 0o666, 37 | "binint": 0b111000111, 38 | "float": 5.234, 39 | "floatbig": 5e10, 40 | "nones": [None] * 3, 41 | "bools": [True] * 9 + [False] * 9, 42 | } 43 | result = neon.decode(NEON_TYPES) 44 | for key in expected: 45 | assert result[key] == expected[key] 46 | 47 | 48 | NEON_DATETIME = """ 49 | - 2013-04-23 13:24:55.123456+0000 50 | - 2015-01-20 51 | - 2015-5-10 52 | """ 53 | 54 | 55 | def test_datetime(): 56 | expected = [ 57 | datetime(2013, 4, 23, 13, 24, 55, 123456, tzinfo=tz.tzutc()), 58 | datetime(2015, 1, 20), 59 | datetime(2015, 5, 10), 60 | ] 61 | assert neon.decode(NEON_DATETIME) == expected 62 | 63 | 64 | def test_string_escaping(): 65 | assert neon.decode('key: "msg"') == {"key": "msg"} 66 | assert neon.decode('key: "msg \\" end"') == {"key": 'msg " end'} 67 | assert neon.decode("key: 'msg \\' end'") == {"key": "msg ' end"} 68 | assert neon.decode('src: "\\\\usr\\\\share"') == {"src": "\\usr\\share"} 69 | --------------------------------------------------------------------------------