├── .gitattributes ├── .gitignore ├── .travis.yml ├── INSTALL.sh ├── LICENSE ├── Python ├── .gitignore ├── MANIFEST.in ├── README.rst ├── Ruikowa │ ├── Bootstrap │ │ ├── Ast.py │ │ ├── Compile.py │ │ ├── Parser.py │ │ ├── Token.py │ │ ├── __init__.py │ │ └── grammar │ ├── Command.py │ ├── Config.py │ ├── Core │ │ ├── BaseDef.py │ │ └── __init__.py │ ├── ErrorFamily.py │ ├── ErrorHandler.py │ ├── ObjectRegex │ │ ├── ASTDef.py │ │ ├── MetaInfo.py │ │ ├── Node.py │ │ ├── Optimize.py │ │ ├── Tokenizer.py │ │ └── __init__.py │ ├── Tools │ │ └── __init__.py │ ├── __init__.py │ ├── color.py │ └── io.py ├── release-note └── setup.py ├── README.md ├── Ruiko ├── README.rst ├── ast.cpp ├── bootstrap.ruiko ├── dev_bnf.cpp ├── flowerq │ ├── Composite.hpp │ ├── IO.File.hpp │ ├── IO.hpp │ ├── List.BaseMethods.hpp │ ├── List.Constructor.hpp │ ├── List.Node.hpp │ ├── List.hpp │ ├── Macro.hpp │ └── Match.hpp ├── main.cpp ├── test.txt └── xml.ruiko ├── docs ├── RuikoEBNF.rst ├── codes │ ├── just.py │ ├── just.ruiko │ ├── lisp.ruiko │ ├── lisp_parser.py │ ├── parsing_CastMap.py │ ├── parsing_CastMap.ruiko │ ├── parsing_tokenizer.py │ ├── parsing_tokenizer.ruiko │ ├── proj.py │ ├── test.lisp │ ├── test_lang.py │ ├── url.py │ └── url.ruiko ├── conf.py ├── index.rst ├── parsing.rst └── quickstart.rst ├── test.sh ├── testRuikowa.sh └── tests └── Ruikowa ├── Lang └── Lisp │ ├── grammar │ ├── pparser.py │ ├── test.ast │ ├── test.json │ ├── testLisp.sh │ └── test_lang.py ├── test.py ├── testBootstrap.py ├── testCycleLeftRecur.py ├── testCycleLeftRecur3.py ├── testCycleLeftRecurAndDumpToJSON.py └── testLiteralParser.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py linguist-language=python 2 | *.cs linguist-language=csharp 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .spyproject/ 3 | Attempts-which-failed/ 4 | .vscode/ 5 | *.idea/ 6 | Ruiko/cmake-build-debug/ 7 | CSharp/ 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | .idea/ 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # dotenv 91 | .env 92 | 93 | # virtualenv 94 | .venv 95 | venv/ 96 | ENV/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | - "3.6-dev" 5 | - "3.7-dev" 6 | script: 7 | - pip install Linq 8 | - bash ./testRuikowa.sh installAndTest 9 | 10 | 11 | -------------------------------------------------------------------------------- /INSTALL.sh: -------------------------------------------------------------------------------- 1 | cd ./Python 2 | python setup.py install 3 | rm -r ./build 4 | rm -r ./EBNFParser.egg-info 5 | rm -r ./dist -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Python/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .spyproject/ 3 | Attempts-which-failed/ 4 | .vscode/ 5 | .idea/ 6 | EBNFParser.egg-info/ 7 | build/ 8 | dist/ 9 | -------------------------------------------------------------------------------- /Python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include Misakawa *.py 2 | include Misakawa 3 | recursive-include Ruikowa *.py 4 | include Ruikowa 5 | -------------------------------------------------------------------------------- /Python/README.rst: -------------------------------------------------------------------------------- 1 | |Build Status| |PyPI version| |Release Note| |MIT License| 2 | 3 | EBNFParser 4 | ========== 5 | 6 | Parse Many, Any, Every |Doc| 7 | ---------------------------- 8 | 9 | :: 10 | 11 | LR ::= LR 'a' 'b' | LR 'c' | 'd'; 12 | 13 | - `Python Project(Support Python 14 | 3.6+) `__ 15 | (v 2.0+) 16 | 17 | - `Old Version : Misakawa 18 | v0.x `__ 19 | - `Old Version : Ruikowa 20 | v1.x `__ 21 | 22 | -------------- 23 | 24 | Install 25 | ------- 26 | 27 | - Python 28 | 29 | - pip 30 | 31 | ``pip installl -U EBNFParser`` 32 | 33 | - setup 34 | 35 | .. code:: shell 36 | 37 | git clone https://github.com/thautwarm/EBNFParser 38 | cd EBNFParser/Python 39 | python setup.py install 40 | 41 | Usage 42 | ----- 43 | 44 | - Command Line Tools 45 | 46 | - ``ruiko``. 47 | 48 | .. code:: shell 49 | 50 | ruiko ./ ./ 51 | [--testTk] # print tokenized words or not 52 | [--test] # generate test script "test_lang.py" 53 | 54 | Use command ``ruiko`` to generate parser and token files, and then 55 | you can use ``test_lang.py`` to test your parser. 56 | 57 | .. code:: shell 58 | 59 | python ./test_lang.py Stmt " (+ 1 2) " -o test.json --testTk 60 | 61 | - Integrated into your own project 62 | 63 | .. code:: python 64 | 65 | 66 | from Ruikowa.ObjectRegex.ASTDef import Ast 67 | from Ruikowa.ErrorHandler import ErrorHandler 68 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo 69 | from Ruikowa.ObjectRegex.Tokenizer import Tokenizer 70 | 71 | from import , token_table 72 | 73 | 74 | import typing as t 75 | 76 | def token_func(src_code: str) -> t.Iterable[Tokenizer]: 77 | return Tokenizer.from_raw_strings(src_code, token_table, ({}, {})) 78 | 79 | parser = ErrorHandler(.match, token_func) 80 | 81 | def parse(filename: str) -> Ast: 82 | 83 | return parser.from_file(filename) 84 | 85 | 86 | print(parse()) 87 | 88 | Need more? See `the 89 | documents `__. 90 | 91 | Examples 92 | -------- 93 | 94 | Here are some examples to refer: 95 | 96 | EBNFParser 2.0 97 | 98 | - `Rem `__ 99 | The Rem programming language. 100 | 101 | Old version(Before EBNFParser 1.1). 102 | 103 | - | `DBG-Lang `__ 104 | | A DSL for SQL development in Python areas. 105 | 106 | - | `Rem(Based 107 | EBNFParser1.1) `__ 108 | | A full featured modern language to enhance program readability 109 | based on CPython. 110 | 111 | - | `Lang.Red `__ 112 | | An attempt to making ASDL in CPython(unfinished yet) 113 | 114 | Will support F# and Rem. 115 | 116 | .. |Build Status| image:: https://travis-ci.org/thautwarm/EBNFParser.svg?branch=boating-new 117 | :target: https://travis-ci.org/thautwarm/EBNFParser 118 | .. |PyPI version| image:: https://img.shields.io/pypi/v/EBNFParser.svg 119 | :target: https://pypi.python.org/pypi/EBNFParser 120 | .. |Release Note| image:: https://img.shields.io/badge/note-release-orange.svg 121 | :target: https://github.com/thautwarm/EBNFParser/blob/boating-new/Python/release-note 122 | .. |MIT License| image:: https://img.shields.io/badge/license-MIT-Green.svg?style=flat 123 | :target: https://github.com/thautwarm/EBNFParser/blob/boating-new/LICENSE 124 | .. |Doc| image:: https://img.shields.io/badge/document-2.1.2-yellow.svg?style=flat 125 | :target: http://ebnfparser.readthedocs.io/en/boating-new 126 | -------------------------------------------------------------------------------- /Python/Ruikowa/Bootstrap/Ast.py: -------------------------------------------------------------------------------- 1 | import os 2 | import linq 3 | from collections import namedtuple 4 | from typing import List, Tuple 5 | from .Token import NameEnum 6 | from ..Core.BaseDef import * 7 | from ..ErrorFamily import UnsupportedStringPrefix, find_location 8 | from ..ObjectRegex.Node import Ast 9 | from ..ObjectRegex.Tokenizer import Mode, TokenSpec, Tokenizer 10 | from ..color import Colored 11 | from ..io import grace_open 12 | 13 | SeqParserParams = namedtuple('DA', ['at_least', 'at_most']) 14 | CompilingNodes = namedtuple('CN', ['reachable', 'alone']) 15 | 16 | T = 'Union[Ast, List[Union[Ast, Tokenizer]]]' 17 | 18 | 19 | def get_string_and_mode(prefix_string: str) -> 'Tuple[Optional[str], str]': 20 | if prefix_string[0] is not '\'': 21 | return prefix_string[0], prefix_string[1:] 22 | else: 23 | return None, prefix_string 24 | 25 | 26 | def surround_with_double_quotes(string): 27 | return '"{}"'.format(string) 28 | 29 | 30 | class Compiler: 31 | # TODO: refactor and clear redundant items. 32 | def __init__(self, filename: str = None, src_code: str = None): 33 | self.src = src_code 34 | self.filename = filename 35 | 36 | self.token_func_src = None 37 | self.token_spec = TokenSpec() 38 | self.token_ignores = ('{}', '{}') # define what to ignore when tokenizing. 39 | self.prefix_mapping = {} 40 | self.cast_map = {} 41 | self.c_macro = {} 42 | 43 | self.generated_token_names = set() 44 | 45 | self.literal_parser_definitions = [] 46 | self.combined_parsers = [] 47 | 48 | self.compile_helper = CompilingNodes(set(), set()) 49 | self._current_indent = None 50 | self._current__combined_parser_name = None 51 | self._current_events = None 52 | self._current_anonymous_count = 0 53 | 54 | def ast_for_stmts(self, stmts: T) -> None: 55 | """ 56 | Stmts ::= TokenDef{0, 1} Equals*; 57 | """ 58 | if not stmts: 59 | raise ValueError('no ast found!') 60 | head, *equals = stmts 61 | 62 | if head.name is NameEnum.TokenDef: 63 | self.ast_for_token_def(head) 64 | elif head.name is NameEnum.TokenIgnore: 65 | self.ast_for_token_ignore(head) 66 | else: 67 | self.ast_for_equals(head) 68 | 69 | for each in equals: 70 | self.ast_for_equals(each) 71 | 72 | # if every combined parser can reach any other combined, 73 | # just take any of them and compile it! 74 | if not self.compile_helper.alone and self._current__combined_parser_name: 75 | self.compile_helper.alone.add(self._current__combined_parser_name) 76 | 77 | def ast_for_token_ignore(self, token_ignore: T): 78 | _, _, *items, _ = token_ignore 79 | grouped = linq.Flow(items).GroupBy(lambda x: x.name is NameEnum.Str).Unboxed() 80 | lit_ignore = "{{{}}}".format(', '.join(map(lambda _: _.string, grouped[True]))) 81 | name_ignore = "{{{}}}".format(', '.join(map(lambda _: '"' + _.string + '"', grouped[False]))) 82 | self.token_ignores = (name_ignore, lit_ignore) 83 | 84 | def ast_for_token_def(self, token_def: T): 85 | content = token_def[1] 86 | if content.name is NameEnum.Name: 87 | path = os.path.join(* 88 | map(lambda _: '..' if _ == 'parent' else _, 89 | content.string.split('.'))) 90 | self.token_func_src = grace_open(path).read() 91 | return 92 | else: 93 | self.token_func_src = content.string[2:-2] 94 | 95 | def ast_for_combined_parser_def(self, equals: T): 96 | if equals[1].name is NameEnum.Throw: 97 | name, throw, _, expr, _ = equals 98 | throw: 'T' = self.ast_for_throw(throw) 99 | grouped = linq.Flow(throw).GroupBy(lambda x: x.name is NameEnum.Str).Unboxed() 100 | else: 101 | name, _, expr, _ = equals 102 | grouped = {True: (), False: ()} 103 | 104 | name = self._current__combined_parser_name = name.string 105 | self.token_spec.enums.__setitem__(name, f"'{name}'") 106 | 107 | if name not in self.compile_helper.reachable: 108 | self.compile_helper.alone.add(name) 109 | 110 | indent = ' ' + " " * len(name) 111 | self.combined_parsers.append( 112 | '{name} = AstParser({possibilities},\n' 113 | '{indent}name="{name}",\n' 114 | '{indent}to_ignore=({name_ignore}, {lit_ignore}))' 115 | ''.format( 116 | indent=indent, 117 | name=name, 118 | possibilities=(',\n{}'.format(indent)).join(self.ast_for_expr(expr)), 119 | lit_ignore="{{{}}}".format(', '.join(map(lambda _: _.string, grouped[True]))), 120 | name_ignore="{{{}}}".format(', '.join(map(lambda _: '"' + _.string + '"', grouped[False]))) 121 | )) 122 | 123 | def ast_for_literal_parser_def(self, equals: T): 124 | str_tks: 'List[Tokenizer]' 125 | defining_cast_map = False 126 | if equals[-2].name is NameEnum.Str: 127 | 128 | if equals[1].string is NameEnum.keyword_cast: 129 | defining_cast_map = True 130 | h, _, *t = equals 131 | equals = [h, *t] 132 | 133 | if equals[1].name is NameEnum.Prefix: 134 | name, prefix, _, *str_tks, _ = equals 135 | prefix: 'Ast' 136 | prefix_string = prefix[1].string 137 | if len(prefix_string) > 1: 138 | raise UnsupportedStringPrefix(prefix_string, 139 | " the length of prefix name should be 1 only." + 140 | find_location(self.filename, prefix[1], self.src)) 141 | self.prefix_mapping[prefix_string] = name.string 142 | 143 | elif equals[1].name is NameEnum.Of: 144 | 145 | ref_name, of, _, *str_tks, _ = equals 146 | name = of[1] 147 | self.c_macro[ref_name.string] = name.string 148 | 149 | else: 150 | name, _, *str_tks, _ = equals 151 | 152 | name = name.string 153 | if defining_cast_map: 154 | # define cast map 155 | for str_tk in str_tks: 156 | mode, string = get_string_and_mode(str_tk.string) 157 | if mode: 158 | raise UnsupportedStringPrefix(mode, 159 | 'do not support setting prefix when defining custom prefix.' + 160 | find_location(self.filename, str_tk, self.src)) 161 | self.cast_map[string] = name 162 | 163 | # define how to tokenize 164 | for str_tk in str_tks: 165 | mode, string = get_string_and_mode(str_tk.string) 166 | if mode is 'R': 167 | mode = Mode.regex 168 | elif len(string) is 3: 169 | mode = Mode.char 170 | else: 171 | mode = Mode.const 172 | self.token_spec.tokens.append((name, mode, string)) 173 | if string[1:-1].isidentifier(): 174 | self.token_spec.enums.__setitem__(f'{name}_{string[1:-1]}', string) 175 | 176 | if name not in self.generated_token_names: 177 | self.literal_parser_definitions.append("{} = LiteralNameParser('{}')".format(name, name)) 178 | self.generated_token_names.add(name) 179 | self.token_spec.enums.__setitem__(name, f"'{name}'") 180 | 181 | def ast_for_equals(self, equals: T): 182 | if equals[-2].name is NameEnum.Str: 183 | self.ast_for_literal_parser_def(equals) 184 | return 185 | self.ast_for_combined_parser_def(equals) 186 | 187 | @classmethod 188 | def ast_for_throw(cls, throw: T): 189 | _, _, *items, _ = throw 190 | return items 191 | 192 | def ast_for_expr(self, expr: T): 193 | return (self.ast_for_or(each) for each in expr[::2]) 194 | 195 | def ast_for_or(self, or_expr: T): 196 | 197 | return '[{}]'.format(', '.join(self.ast_for_atom_expr(each) for each in or_expr)) 198 | 199 | def handle_atom_with_trailer(self, atom: T): 200 | maybe_tk, default_attrs = self.ast_for_atom(atom) 201 | default_attrs: 'SeqParserParams' 202 | if maybe_tk.__class__ is Tokenizer: 203 | if maybe_tk.name is NameEnum.Name: 204 | 205 | name = self.c_macro.get(maybe_tk.string, maybe_tk.string) 206 | 207 | if name in self.compile_helper.alone: 208 | self.compile_helper.alone.remove(name) 209 | 210 | if name not in self.compile_helper.reachable: 211 | self.compile_helper.reachable.add(name) 212 | 213 | return "Ref('{}')".format(name) 214 | 215 | else: 216 | mode, string = get_string_and_mode(maybe_tk.string) 217 | if not mode: 218 | for k, mode, v in self.token_spec.tokens: 219 | # check if need to create a new token pattern 220 | if v is string and k == 'auto_const': 221 | break 222 | 223 | else: 224 | if len(string) is 3: 225 | self.token_spec.tokens.append(('auto_const', Mode.char, string)) 226 | else: 227 | self.token_spec.tokens.append(('auto_const', Mode.const, string)) 228 | return string 229 | 230 | if mode is 'R': 231 | for k, mode, v in self.token_spec.tokens: 232 | if mode is Mode.regex and v == string: 233 | 234 | if k in self.compile_helper.alone: 235 | self.compile_helper.alone.remove(k) 236 | 237 | if k not in self.compile_helper.reachable: 238 | self.compile_helper.reachable.add(k) 239 | 240 | return "Ref('{}')".format(k) 241 | 242 | name: str = 'anonymous_{}'.format(self._current_anonymous_count) 243 | self._current_anonymous_count += 1 244 | warnings.warn( 245 | Colored.LightBlue + 246 | '\nFor efficiency of the parser, ' 247 | 'we do not do regex matching when parsing(only in tokenizing we use regex), ' 248 | 'you are now creating a anonymous regex literal parser ' 249 | '{}<{}>{} when defining combined parser{}\n' 250 | .format(Colored.Red, name, Colored.LightBlue, Colored.Clear)) 251 | 252 | self.token_spec.tokens.append((name, Mode.regex, string)) 253 | self.token_spec.enums.__setitem__(name, f"'{name}'") 254 | self.literal_parser_definitions.append("{} = LiteralNameParser('{}')".format(name, name)) 255 | 256 | if name in self.compile_helper.alone: 257 | self.compile_helper.alone.remove(name) 258 | 259 | if name not in self.compile_helper.reachable: 260 | self.compile_helper.reachable.add(name) 261 | 262 | return "Ref('{}')".format(name) 263 | 264 | elif mode is 'L': 265 | return f"L({string})" 266 | 267 | elif mode not in self.prefix_mapping: 268 | raise UnsupportedStringPrefix(mode, "Prefix not defined." 269 | + find_location(self.filename, maybe_tk, self.src)) 270 | 271 | else: 272 | name = self.prefix_mapping[mode] 273 | self.cast_map[string] = name 274 | return f"('{name}', {string})" 275 | 276 | return dict(possibilities=', '.join(maybe_tk), 277 | at_least=default_attrs.at_least, 278 | at_most=default_attrs.at_most) 279 | 280 | def ast_for_atom_expr(self, atom_expr: T): 281 | if len(atom_expr) is 1: 282 | res = self.handle_atom_with_trailer(atom_expr[0]) 283 | if res.__class__ is dict: 284 | return ('SeqParser({possibilities}, ' 285 | 'at_least={at_least},' 286 | 'at_most={at_most})'.format(**res)) 287 | return res 288 | 289 | atom, trailer = atom_expr 290 | res = self.handle_atom_with_trailer(atom) 291 | attrs = self.ast_for_trailer(trailer) 292 | 293 | if res.__class__ is dict: 294 | res.update(at_least=attrs.at_least, at_most=attrs.at_most) 295 | 296 | return ('SeqParser({possibilities}, ' 297 | 'at_least={at_least},' 298 | 'at_most={at_most})'.format(**res)) 299 | 300 | return ('SeqParser({possibilities}, ' 301 | 'at_least={at_least},' 302 | 'at_most={at_most})'.format(possibilities=res if res[0] is '[' else f'[{res}]', 303 | at_most=attrs.at_most, 304 | at_least=attrs.at_least)) 305 | 306 | def ast_for_atom(self, atom: 'Ast'): 307 | if atom[0].string is '(': 308 | return self.ast_for_expr(atom[1]), SeqParserParams(1, 1) 309 | elif atom[0].string is '[': 310 | return self.ast_for_expr(atom[1]), SeqParserParams(0, 1) 311 | 312 | return atom[0], None 313 | 314 | @classmethod 315 | def ast_for_trailer(cls, trailer): 316 | if len(trailer) is 1: 317 | trailer: 'Tokenizer' = trailer[0] 318 | return SeqParserParams(0, 'Undef') if trailer.string is '*' else SeqParserParams(1, 'Undef') 319 | else: 320 | _, *numbers, _ = trailer 321 | numbers: 'List[Tokenizer]' 322 | if len(numbers) is 2: 323 | a, b = numbers 324 | return SeqParserParams(a.string, a.string) 325 | else: 326 | return SeqParserParams(numbers[0].string, 'Undef') 327 | -------------------------------------------------------------------------------- /Python/Ruikowa/Bootstrap/Compile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Oct 19 18:00:43 2017 5 | 6 | @author: misakawa 7 | """ 8 | 9 | from .Ast import Compiler 10 | from .Parser import Stmts 11 | from ..ObjectRegex.Node import MetaInfo 12 | from ..ErrorHandler import ErrorHandler 13 | from .Token import token_func 14 | from ..io import grace_open 15 | 16 | include = ( 17 | "# This file is automatically generated by EBNFParser.\n" 18 | "from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer\n" 19 | "from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef\n" 20 | "namespace = globals()\n" 21 | "recur_searcher = set()") 22 | 23 | 24 | def compile(src_path, print_token=False): 25 | parser = ErrorHandler(Stmts.match, token_func) 26 | src_code = grace_open(src_path).read() 27 | stmts = parser.from_file(src_path, MetaInfo(fileName=src_path), print_token=print_token) 28 | compiler = Compiler(filename=src_path, src_code=src_code) 29 | compiler.ast_for_stmts(stmts) 30 | cast_map_dumps = "{{{}}}".format( 31 | ', '.join(f"{k}: unique_literal_cache_pool['{v}']" for k, v in compiler.cast_map.items())) 32 | if compiler.token_func_src: 33 | token_func_src = (f"token_table = {compiler.token_spec.to_token_table()}\n" 34 | f"{compiler.token_spec.to_name_enum()}\n" 35 | f"{cast_map_dumps}\n" 36 | f"{compiler.token_func_src}") 37 | 38 | else: 39 | token_func_src = (f"token_table = {compiler.token_spec.to_token_table()}\n" 40 | f"{compiler.token_spec.to_name_enum()}\n" 41 | f"cast_map = {cast_map_dumps}\n" 42 | f"token_func = lambda _: " 43 | "Tokenizer.from_raw_strings(_, token_table, " 44 | f"({compiler.token_ignores[0]}, " 45 | f"{compiler.token_ignores[1]})," 46 | f"cast_map=cast_map)") 47 | 48 | literal_parsers = '\n'.join(compiler.literal_parser_definitions) 49 | 50 | combined_parsers = '\n'.join(compiler.combined_parsers) 51 | 52 | compiling = '\n'.join( 53 | map(lambda _: '{}.compile(namespace, recur_searcher)'.format(_), compiler.compile_helper.alone)) 54 | 55 | return '{}\n{}\n{}\n{}\n{}'.format(include, token_func_src, literal_parsers, combined_parsers, compiling) 56 | -------------------------------------------------------------------------------- /Python/Ruikowa/Bootstrap/Parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Oct 17 10:16:52 2017 5 | 6 | @author: misakawa 7 | """ 8 | from ..ObjectRegex.Node import Ref, AstParser, SeqParser, LiteralNameParser, LiteralNameValueParser 9 | from ..ObjectRegex.Tokenizer import Tokenizer 10 | from ..ObjectRegex.MetaInfo import MetaInfo 11 | 12 | Str = LiteralNameParser('Str') 13 | Name = LiteralNameParser('Name') 14 | Number = LiteralNameParser('Number') 15 | Codes = LiteralNameParser('Codes') 16 | 17 | namespace = globals() 18 | recurSearcher = set() 19 | 20 | TokenIgnore = AstParser( 21 | [('keyword', 'ignore'), 22 | '[', 23 | SeqParser([Name], [Str]), 24 | ']'], 25 | name='TokenIgnore') 26 | 27 | Prefix = AstParser( 28 | [('keyword', 'as'), Name], 29 | name='Prefix') 30 | 31 | Of = AstParser( 32 | [('keyword', 'of'), Name], 33 | name='Of') 34 | 35 | Stmts = AstParser( 36 | [SeqParser([Ref('TokenIgnore')], 37 | [Ref('TokenDef')], 38 | at_most=1), 39 | SeqParser([Ref('Equals')])], 40 | name='Stmts') 41 | 42 | TokenDef = AstParser( 43 | [('keyword', 'deftoken'), SeqParser([Name], [Codes], at_most=1, at_least=1)], 44 | name='TokenDef') 45 | 46 | Equals = AstParser( 47 | [Name, SeqParser(['cast'], at_most=1) ,SeqParser([Ref('Prefix')], [Ref('Of')], at_most=1), ':=', SeqParser([Str]), ';'], 48 | [Name, SeqParser([Ref('Throw')], at_most=1), '::=', Ref('Expr'), ';'], 49 | name='Equals') 50 | 51 | Throw = AstParser( 52 | [('keyword', 'throw'), 53 | '[', 54 | SeqParser([Name], [Str]), 55 | ']' 56 | ], 57 | name='Throw') 58 | 59 | Expr = AstParser( 60 | [Ref('Or'), SeqParser(['|', Ref('Or')])], 61 | name='Expr') 62 | 63 | Or = AstParser( 64 | [SeqParser([Ref('AtomExpr')], at_least=1)], 65 | name=' Or') 66 | 67 | AtomExpr = AstParser( 68 | [Ref('Atom'), SeqParser([Ref('Trailer')])], 69 | name='AtomExpr') 70 | 71 | Atom = AstParser( 72 | [Str], 73 | [Name], 74 | ['[', Ref('Expr'), ']'], 75 | ['(', Ref('Expr'), ')'], 76 | name='Atom') 77 | 78 | Trailer = AstParser( 79 | ['+'], 80 | ['*'], 81 | ['{', SeqParser([Number], at_least=1, at_most=2), '}'], 82 | name='Trailer') 83 | 84 | Stmts.compile(namespace, recurSearcher) 85 | -------------------------------------------------------------------------------- /Python/Ruikowa/Bootstrap/Token.py: -------------------------------------------------------------------------------- 1 | import re as re 2 | from ..ObjectRegex.Tokenizer import (Tokenizer, str_matcher, regex_matcher, 3 | char_matcher, unique_literal_cache_pool) 4 | 5 | 6 | def _escape(*str_s): 7 | return '|'.join([re.escape(string) for string in str_s]) 8 | 9 | 10 | class NameEnum: 11 | keyword_as = unique_literal_cache_pool['as'] 12 | keyword_of = unique_literal_cache_pool['of'] 13 | keyword_throw = unique_literal_cache_pool['throw'] 14 | keyword_deftoken = unique_literal_cache_pool['deftoken'] 15 | keyword_ignore = unique_literal_cache_pool['ignore'] 16 | keyword_cast = unique_literal_cache_pool['cast'] 17 | 18 | Of = unique_literal_cache_pool['Of'] 19 | Prefix = unique_literal_cache_pool['Prefix'] 20 | Comments = unique_literal_cache_pool['Comments'] 21 | Str = unique_literal_cache_pool['Str'] 22 | Codes = unique_literal_cache_pool['Codes'] 23 | 24 | Name = unique_literal_cache_pool['Name'] 25 | Number = unique_literal_cache_pool['Number'] 26 | Newline = unique_literal_cache_pool['Newline'] 27 | 28 | TokenIgnore = unique_literal_cache_pool['TokenIgnore'] 29 | Single = unique_literal_cache_pool['Single'] 30 | Eq = unique_literal_cache_pool['Eq'] 31 | TokenRelated = unique_literal_cache_pool['TokenRelated'] 32 | 33 | TokenDef = unique_literal_cache_pool['TokenDef'] 34 | Throw = unique_literal_cache_pool['Throw'] 35 | 36 | 37 | token_table = ( 38 | # match by value 39 | ("auto_const", char_matcher( 40 | ('|', 41 | '{', 42 | '}', 43 | ';', 44 | '[', 45 | ']', 46 | '(', 47 | ')', 48 | '+', 49 | '*', 50 | '.') 51 | )), 52 | 53 | # match by value 54 | ("auto_const", str_matcher( 55 | ("::=", ":=") 56 | )), 57 | 58 | # match by name 59 | ('Comment', regex_matcher(re.compile(r'(#.*)|(((/\*)+?[\w\W]+?(\*/)+))'))), 60 | ("Str", regex_matcher(re.compile(r"[A-Z]'([^\\']+|\\.)*?'|'([^\\']+|\\.)*?'"))), 61 | ("Codes", regex_matcher(re.compile(r'{{[\w\W]+?\}\}'))), 62 | 63 | ("Name", regex_matcher("[a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5\.]*")), 64 | ("Number", regex_matcher("\d+")), 65 | 66 | # do not match 67 | ("Space", regex_matcher('\s+|,')), 68 | 69 | ) 70 | 71 | token_table = tuple((unique_literal_cache_pool[k], v) for k, v in token_table) 72 | keyword = unique_literal_cache_pool['keyword'] 73 | cast_map = { 74 | 'as': keyword, 75 | 'throw': keyword, 76 | 'deftoken': keyword, 77 | 'ignore': keyword, 78 | 'for': keyword, 79 | 'of': keyword, 80 | 'cast': keyword 81 | } 82 | 83 | token_func = lambda _: Tokenizer.from_raw_strings(_, 84 | token_table, 85 | to_ignore=({'Space', 'Comment'}, {}), cast_map=cast_map) 86 | -------------------------------------------------------------------------------- /Python/Ruikowa/Bootstrap/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thautwarm/EBNFParser/101a92c4f408f9e6ce7b55aacb39cded9394521d/Python/Ruikowa/Bootstrap/__init__.py -------------------------------------------------------------------------------- /Python/Ruikowa/Bootstrap/grammar: -------------------------------------------------------------------------------- 1 | Stmts ::= [TokenIgnore | TokenDef] Equals*; 2 | 3 | TokenIgnore ::= 'ignore' '[' (Name|Str)* ']'; 4 | 5 | TokenDef ::= 'deftoken' (Name | Codes); 6 | 7 | Prefix ::= 'as' Name; 8 | Of ::= 'of' Name; 9 | 10 | 11 | Equals ::= Name ['cast'] [Prefix|Of] ':=' Str + ';'| 12 | Name [Throw] '::=' Expr ';'; 13 | 14 | Throw ::= 'throw' '[' (Name | Str)* ']'; 15 | 16 | Expr ::= Or ('|' Or)*; 17 | 18 | Or ::= AtomExpr+; 19 | 20 | AtomExpr ::= Atom Trailer*; 21 | 22 | Atom ::= Str | 23 | Name | 24 | '(' Expr ')'| 25 | '[' Expr ']'; 26 | 27 | Trailer ::= '*' | '+' | '{' Number{1 2} '}'; 28 | 29 | /* 30 | 31 | keyword : 32 | ['ignore', 'deftoken', 'as', 'throw', 'of', 'cast'] 33 | */ 34 | 35 | 36 | -------------------------------------------------------------------------------- /Python/Ruikowa/Command.py: -------------------------------------------------------------------------------- 1 | test_lang_templates = ( 2 | """ 3 | # This file is automatically generated by EBNFParser. 4 | import argparse, json 5 | 6 | cmd_parser = argparse.ArgumentParser(description='test language parsers swiftly.') 7 | cmd_parser.add_argument("parser", type=str, 8 | help='What kind of parser do you want to test with?(e.g Stmt, Expr, ...)') 9 | cmd_parser.add_argument("codes", metavar='codes', type=str, 10 | help='input some codes in your own language here.') 11 | cmd_parser.add_argument('-o', help='output. support .json and .ast suffix.', type=str) 12 | cmd_parser.add_argument("--testTk", nargs='?', default=False, const=True) 13 | cmd_parser.add_argument('--debug', nargs='?', default=False, const=True, 14 | help='print tokens of grammar file?') 15 | 16 | args = cmd_parser.parse_args() 17 | 18 | if args.debug: 19 | from Ruikowa.Config import Debug 20 | Debug.append(1) 21 | 22 | from Ruikowa.ErrorHandler import ErrorHandler, Colored 23 | from Ruikowa.ObjectRegex.ASTDef import Ast 24 | from Ruikowa.io import grace_open 25 | from {} import * 26 | print(Colored.Green,'=========================ebnfparser test script================================', Colored.Clear) 27 | 28 | print_token = args.testTk 29 | ast: Ast = ErrorHandler(eval(args.parser).match, token_func).from_source_code('', args.codes, print_token=print_token) 30 | print(Colored.Blue, ast, Colored.Clear) 31 | if args.o: 32 | o: str = args.o.lower() 33 | if o.endswith('.json'): 34 | grace_open(o).write(json.dumps(ast.dump_to_json(), indent=2)) 35 | elif o.endswith('.ast'): 36 | grace_open(o).write(ast.dump()) 37 | else: 38 | raise Exception('Unsupported file ext.') 39 | 40 | """) 41 | 42 | 43 | def main(): 44 | import argparse 45 | 46 | cmd_parser = argparse.ArgumentParser(description='using EBNFParser.') 47 | cmd_parser.add_argument("InputFile", metavar='path of input file', type=str, 48 | help='EBNF file which describes your language\'s grammar.') 49 | cmd_parser.add_argument("OutputFile", metavar='path of output file', type=str, 50 | help='generate python file(s) that makes a parser for your language.') 51 | cmd_parser.add_argument('--test', nargs='?', default=False, const=True, 52 | help='make a script to test language parsers quickly?') 53 | cmd_parser.add_argument('--testTk', nargs='?', default=False, const=True, 54 | help='print tokens of grammar file?') 55 | cmd_parser.add_argument('--debug', nargs='?', default=False, const=True, 56 | help='print tokens of grammar file?') 57 | 58 | args = cmd_parser.parse_args() 59 | 60 | if args.debug: 61 | from .Config import Debug 62 | Debug.append(1) 63 | 64 | from .Bootstrap.Compile import compile as bootstrap_comp 65 | from .io import grace_open 66 | from .color import Colored 67 | print(Colored.Green) 68 | 69 | import sys, os 70 | 71 | inp, outp = args.InputFile, args.OutputFile 72 | 73 | head_from, _ = os.path.split(sys.argv[0]) 74 | head_to, __ParserFile__ = os.path.split(outp) 75 | 76 | generated_codes = bootstrap_comp(inp, args.testTk) 77 | path = os.path.join(head_to, outp) 78 | if path[-3:].lower() != '.py': 79 | path = '{}.py'.format(path) 80 | module = os.path.splitext(os.path.basename(outp))[0] 81 | grace_open('{}'.format(path)).write(generated_codes) 82 | 83 | if args.test: 84 | print('making test script....') 85 | grace_open('{}' 86 | .format(os.path.join(head_to, 'test_lang.py')) 87 | ).write(test_lang_templates.format(module)) 88 | 89 | print(Colored.Clear) 90 | 91 | 92 | if __name__ == '__main__': 93 | main() 94 | -------------------------------------------------------------------------------- /Python/Ruikowa/Config.py: -------------------------------------------------------------------------------- 1 | Debug = [] 2 | -------------------------------------------------------------------------------- /Python/Ruikowa/Core/BaseDef.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Oct 14 17:46:02 2017 5 | 6 | @author: misakawa 7 | """ 8 | from ..ErrorFamily import * 9 | # ====== Define Generic Type Params ============= 10 | 11 | WarningInfo =""" 12 | You're trying to visit the elems that've been deprecated. 13 | If it occurred when you're using EBNFParser, report it as 14 | a BUG at 15 | `https://github.com/thautwarm/EBNFParser`. Thanks a lot! 16 | """ 17 | 18 | 19 | # ====== 20 | 21 | Undef = None 22 | class Const: 23 | def __new__(self): 24 | raise ObjectUsageError("You're trying to new an instance with a module.") 25 | UnMatched = None 26 | NameFilter = 0 27 | RawFilter = 1 28 | RegexFilter= 2 29 | 30 | 31 | class RecursiveFound(Exception): 32 | def __init__(self, node): 33 | self.node = node 34 | self.possibilities = [] 35 | def add(self, possibility): 36 | self.possibilities.append(possibility) 37 | 38 | def __str__(self): 39 | s = '=====\n' 40 | s+=self.node.name+'\n' 41 | s+='\n'.join(a.name +' | '+str([c.name for c in b]) 42 | for a,b in self.possibilities) 43 | return s 44 | 45 | 46 | class Recur: 47 | def __new__(self, name, count): 48 | return (name, count) 49 | 50 | class Trace: 51 | def __init__(self, 52 | trace = Undef, 53 | length = Undef): 54 | self.length = length if length is not Undef else\ 55 | len(trace) if trace is not Undef else\ 56 | 0 57 | self.content = trace if trace is not Undef else\ 58 | [] 59 | self._Mem = len(self.content) 60 | 61 | 62 | def __iter__(self): 63 | yield from self.content[:self.length] 64 | 65 | 66 | def __getitem__(self, item): 67 | if isinstance(item, int): 68 | if item >= self.length: 69 | warnings.warn(WarningInfo) 70 | return self.content[item] 71 | elif isinstance(item, slice): 72 | if item.stop > self.length: 73 | warnings.warn(WarningInfo) 74 | return self.content[item] 75 | 76 | 77 | 78 | def append(self, elem): 79 | # reuse the memory cache 80 | if self.length==self._Mem: 81 | self.length += 1 82 | self._Mem += 1 83 | self.content.append(elem) 84 | elif self.length < self._Mem: 85 | self.content[self.length] = elem 86 | self.length += 1 87 | 88 | def new(self, constructor): 89 | # just can be used for Type `Trace[Contrainer[T]]` 90 | # reuse the memory cache 91 | if self.length==self._Mem: 92 | self.length += 1 93 | self._Mem += 1 94 | self.content.append(constructor()) 95 | elif self.length < self._Mem: 96 | self.content[self.length].length = 0 97 | self.length += 1 98 | 99 | def pop(self): 100 | self.length -= 1 101 | assert self.length>=0 102 | 103 | def where(self, obj): 104 | for idx, elem in enumerate(self.content[:self.length]): 105 | if elem is obj: 106 | return idx 107 | return Undef 108 | 109 | def mem(self): 110 | return self._Mem 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /Python/Ruikowa/Core/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /Python/Ruikowa/ErrorFamily.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Oct 14 19:28:51 2017 5 | 6 | @author: misakawa 7 | """ 8 | from pprint import pprint 9 | from .color import Colored 10 | 11 | if False: 12 | from .ObjectRegex.MetaInfo import MetaInfo 13 | from typing import Sequence, Optional 14 | from .ObjectRegex.Tokenizer import Tokenizer 15 | 16 | use_py_error = False 17 | use_py_warnings = False 18 | 19 | import warnings 20 | 21 | 22 | class ObjectUsageError(Exception): 23 | pass 24 | 25 | 26 | class CheckConditionError(Exception): 27 | pass 28 | 29 | 30 | class UnsolvedError(Exception): 31 | pass 32 | 33 | 34 | class DSLSyntaxError(SyntaxError): 35 | pass 36 | 37 | 38 | if use_py_warnings: 39 | Warnings = warnings 40 | else: 41 | class Warnings: 42 | @classmethod 43 | def warn(cls, *msg): 44 | print(Colored.LightBlue, 'UserWarning:', *msg) 45 | 46 | if use_py_error: 47 | class Error: 48 | def __init__(self, *args): 49 | print(Colored.Purple, '{}: '.format(self.__class__.__name__), *args) 50 | raise Exception(self.__class__.__name__) 51 | else: 52 | Error = Exception 53 | 54 | 55 | class UnsupportedStringPrefix(Error): 56 | def __init__(self, mode, msg=''): 57 | Error.__init__(self, 58 | '\n' + msg + '\n' + 59 | Colored.LightBlue + "Unsupported string prefix " + Colored.Red + '{}' 60 | .format(mode) + Colored.LightBlue + "." + Colored.Clear) 61 | 62 | 63 | def find_location(filename, where: 'Tokenizer', src_code: str = None): 64 | if src_code: 65 | row = src_code.splitlines()[where.lineno] 66 | else: 67 | row = '' 68 | 69 | return "{}{}{}{} ---- at file {} line {}".format(Colored.Green, row[:where.colno], Colored.Red, row[where.colno:], 70 | filename, where.lineno + 1) + Colored.Clear 71 | 72 | 73 | class UniqueNameConstraintError(Error): 74 | def __init__(self, name, msg=''): 75 | Error.__init__(self, 76 | '\n' + msg + '\n' + 77 | Colored.Blue + "Name " + Colored.Red + '{}' 78 | .format(name) + Colored.Blue + "should be unique." + Colored.Clear) 79 | -------------------------------------------------------------------------------- /Python/Ruikowa/ErrorHandler.py: -------------------------------------------------------------------------------- 1 | if False: 2 | from .ObjectRegex.MetaInfo import MetaInfo 3 | from typing import Sequence, Optional 4 | from .ObjectRegex.Tokenizer import Tokenizer 5 | from .ErrorFamily import * 6 | from pprint import pprint 7 | 8 | 9 | class ErrorHandler: 10 | 11 | def __init__(self, parse_func, token_func=None): 12 | self.parse_func = parse_func 13 | self.token_func = token_func 14 | 15 | def mut_parser_by(self, new_func): 16 | self.parse_func = new_func(self.parse_func) 17 | 18 | def mut_token_by(self, new_func): 19 | self.token_func = new_func(self.token_func) 20 | 21 | def from_file(self, filename: str, meta: 'MetaInfo' = None, partial=False, print_token=False): 22 | with open(filename, 'r', encoding='utf8') as f: 23 | raw_string = f.read() 24 | return self.from_source_code(filename, raw_string, meta, partial, print_token) 25 | 26 | def from_source_code(self, filename: str, src_code: str, meta: 'MetaInfo' = None, partial=False, print_token=False): 27 | tokens: 'Sequence[Tokenizer]' = tuple(self.token_func(src_code)) 28 | if print_token: 29 | pprint(tokens) 30 | return self.from_tokens(filename, src_code, tokens, meta, partial) 31 | 32 | def from_tokens(self, filename: str, src_code: str, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', partial=False): 33 | if meta is None: 34 | from .ObjectRegex.MetaInfo import MetaInfo 35 | meta = MetaInfo(fileName=filename) 36 | 37 | if not meta: 38 | raise CheckConditionError("Meta Information not defined yet!") 39 | 40 | res = self.parse_func(tokens, meta=meta) 41 | if res is None or (not partial and len(tokens) != meta.count): 42 | max_fetched = meta.max_fetched 43 | try: 44 | where = tokens[max_fetched] 45 | except IndexError: 46 | for i in range(max_fetched - 1, meta.count, -1): 47 | try: 48 | where = tokens[i] 49 | break 50 | except: 51 | continue 52 | else: 53 | raise DSLSyntaxError( 54 | f"totally wrong syntax!(first word has been wrong!)") 55 | 56 | row = src_code.splitlines()[where.lineno] 57 | raise DSLSyntaxError( 58 | "{}{}{}{} ---- at file {} line {}{}" 59 | .format(Colored.Green, row[:where.colno - 1], Colored.Red, 60 | row[where.colno - 1:], 61 | filename, where.lineno + 1, Colored.Clear)) 62 | return res 63 | -------------------------------------------------------------------------------- /Python/Ruikowa/ObjectRegex/ASTDef.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Oct 14 19:23:04 2017 5 | 6 | @author: misakawa 7 | """ 8 | 9 | from .Tokenizer import Tokenizer 10 | from typing import List, Union, Sequence, Iterator, Collection 11 | 12 | if False: 13 | from .MetaInfo import MetaInfo 14 | 15 | INDENT_UNIT = ' ' * 4 16 | 17 | 18 | class Ast(list): 19 | # List[Union[Tokenizer, Ast]] 20 | 21 | def __init__(self, meta: 'MetaInfo', name: str): 22 | list.__init__(self) 23 | self.name = name 24 | self.meta = meta 25 | 26 | def appendleft(self, obj): 27 | self.reverse() 28 | self.append(obj) 29 | self.reverse() 30 | 31 | def __iter__(self) -> 'Iterator[Union[Tokenizer, Ast]]': 32 | return list.__iter__(self) 33 | 34 | def __getitem__(self, item) -> 'Union[Tokenizer, Ast]': 35 | return list.__getitem__(self, item) 36 | 37 | def __str__(self): 38 | return self.dump() 39 | 40 | def dump(self, indent=0): 41 | next_indent = indent + 1 42 | return """{INDENT}{NAME}[ 43 | {CONTENT} 44 | {INDENT}]""".format(INDENT=INDENT_UNIT * indent, 45 | NAME=self.name, 46 | CONTENT='\n'.join( 47 | node.dump(next_indent) 48 | if isinstance(node, Ast) else \ 49 | "{NEXT_INDENT}{STR}".format(NEXT_INDENT=INDENT_UNIT * next_indent, STR=node) 50 | 51 | for node in self 52 | )) 53 | 54 | def dump_to_json(self): 55 | return dict(name=self.name, 56 | value=tuple(node.dump_to_json() for node in self)) 57 | -------------------------------------------------------------------------------- /Python/Ruikowa/ObjectRegex/MetaInfo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Oct 14 18:54:45 2017 5 | 6 | @author: misakawa 7 | """ 8 | from ..Core.BaseDef import * 9 | 10 | 11 | class MetaInfo: 12 | """ 13 | Meta information when parsing. 14 | 15 | `count` is a property of MetaInfo. 16 | It shows that how many tokenized(words) have been parsed, 17 | which could be used for 18 | - Alerting. 19 | - Eliminating left recursions. 20 | 21 | `trace` is a property of MetaInfo. 22 | It shows a trace of recursive BNF Nodes, 23 | which could be used for 24 | - Debugging. 25 | - Eliminating left recursions. 26 | 27 | `rdx` is a property of MetaInfo. 28 | It shows how many lines have beeb parsed now. 29 | which could be used for 30 | - Alerting. 31 | - Debugging. 32 | 33 | `fileName` is also a property of MetaInfo. 34 | It suggests which file the parser works on. 35 | 36 | """ 37 | 38 | def __init__(self, count=0, trace=None, fileName=None): 39 | 40 | self.count = count 41 | if trace: 42 | self.trace = trace 43 | else: 44 | self.trace = Trace() 45 | self.trace.append(Trace()) 46 | # self.history = [] 47 | self.fileName = fileName if fileName else "" 48 | 49 | def new(self): 50 | self.count += 1 51 | self.trace.new(Trace) 52 | 53 | def commit(self): 54 | return self.count, self.trace[self.count].length 55 | 56 | def rollback(self, history): 57 | count, length = history 58 | self.count = count 59 | self.trace.length = count + 1 60 | self.trace[count].length = length 61 | 62 | def clone(self): 63 | """ 64 | Get a copy of 65 | (RowIdx, 66 | NumberOfParsedWords, 67 | FileName) 68 | from current meta information. 69 | """ 70 | return self.count, self.fileName 71 | 72 | def __str__(self): 73 | return """ 74 | -------------------- 75 | COUNT : {COUNT} 76 | TRACE : 77 | {TRACE} 78 | -------------------- 79 | """.format(COUNT=self.count, 80 | TRACE='\n'.join( 81 | ['[' + (','.join([item.name for item in unit])) + ']' for unit in self.trace]) 82 | ) 83 | 84 | @property 85 | def max_fetched(self): 86 | return self.trace.mem() 87 | 88 | 89 | """ 90 | use list as trace 91 | """ 92 | # class MetaInfo: 93 | # def __init__(self, count=0, rdx=0, trace=None, fileName=None): 94 | # 95 | # self.count = count 96 | # if trace: 97 | # self.trace = trace 98 | # else: 99 | # self.trace = [[]] 100 | # self.rdx = rdx 101 | # self.history = [] 102 | # self.fileName = fileName if fileName else "" 103 | # 104 | # def branch(self): 105 | # """ 106 | # Save a record of parsing history in order to trace back. 107 | # """ 108 | # self.history.append((self.count, self.rdx, len(self.trace[self.count]) )) 109 | # def rollback(self): 110 | # """ 111 | # Trace back. 112 | # """ 113 | # 114 | # try: 115 | # count, rdx, length = self.history.pop() 116 | # except IndexError: 117 | # return None 118 | # 119 | # self.count = count 120 | # self.rdx = rdx 121 | # self.trace[count] = self.trace[count][:length] 122 | # 123 | # def pull(self): 124 | # """ 125 | # Confirm the current parsing results. 126 | # Pop a record in parsing history. 127 | # """ 128 | # try: 129 | # self.history.pop() 130 | # except IndexError: 131 | # raise Exception("pull no thing") 132 | # 133 | # def new(self): 134 | # self.count += 1 135 | # self.trace.append([]) 136 | # 137 | # def clone(self): 138 | # """ 139 | # Get a copy of 140 | # (RowIdx, 141 | # NumberOfParsedWords, 142 | # FileName) 143 | # from current meta information. 144 | # """ 145 | # return (self.rdx, self.count, self.fileName) 146 | # 147 | # def __str__(self): 148 | # return """ 149 | # -------------------- 150 | # COUNT : {COUNT} 151 | # ROW_IDX : {ROW_DIX} 152 | # TRACE : 153 | # {TRACE} 154 | # -------------------- 155 | # """.format(COUNT=self.count, 156 | # ROW_DIX=self.rdx, 157 | # TRACE='\n'.join( 158 | # ['[' + (','.join([item.name for item in unit])) + ']' for unit in self.trace]) 159 | # ) 160 | -------------------------------------------------------------------------------- /Python/Ruikowa/ObjectRegex/Node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Oct 14 18:53:53 2017 5 | 6 | @author: misakawa 7 | """ 8 | from abc import ABC, abstractmethod 9 | from typing import Union, List, Tuple, Collection 10 | from ..Core.BaseDef import * 11 | from .MetaInfo import MetaInfo 12 | from ..ErrorFamily import * 13 | from .ASTDef import Ast 14 | from .Optimize import optimize 15 | from .Tokenizer import unique_lit_name, unique_lit_value, unique_literal_cache_pool, Tokenizer 16 | from ..Config import Debug 17 | 18 | if Debug: 19 | from ..Tools import function_debugger 20 | 21 | DEBUG_INDENT = 1 22 | debugger = function_debugger('tag', 'content') 23 | 24 | 25 | class Ignore: 26 | Value = 0 27 | Name = 1 28 | 29 | 30 | def debug(msg): 31 | def wrap(func): 32 | def call(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', *args, **kwargs): 33 | global DEBUG_INDENT 34 | if not isinstance(self, AstParser): 35 | now = tokens[meta.count] 36 | if hasattr(self, 'mode'): 37 | profile = f'{self.name}[{self.mode}] matching {now}' 38 | else: 39 | profile = f'{self.name} matching {now}' 40 | else: 41 | profile = self.name 42 | print(Colored.Purple2, 43 | debugger(dict( 44 | tag=f'start {self.__class__.__name__}', 45 | Profile=profile, 46 | Name=self.name, 47 | content=msg, 48 | Meta=meta.count), 49 | indent=DEBUG_INDENT * 4, 50 | inc_indent=2), 51 | Colored.Clear, '\n') 52 | 53 | DEBUG_INDENT += 1 54 | res = func(self, tokens, meta, *args, **kwargs) 55 | DEBUG_INDENT -= 1 56 | 57 | print(Colored.LightBlue, 58 | debugger( 59 | dict( 60 | tag=f'end {self.__class__.__name__}', 61 | Name=self.name, 62 | Profile=profile, 63 | content=msg, 64 | Return=True if res else False, 65 | Meta=meta.count), 66 | indent=DEBUG_INDENT * 4, 67 | inc_indent=1), 68 | Colored.Clear, '\n') 69 | 70 | return res 71 | 72 | return call if Debug else func 73 | 74 | return wrap 75 | 76 | 77 | ParserCollections = 'Union[LiteralNameParser, LiteralNameValueParser, LiteralValueParser, AstParser, SeqParser]' 78 | 79 | 80 | def parser_name_helper( 81 | pattern: 'ParserCollections'): 82 | if pattern.__class__ is LiteralNameValueParser: 83 | return f"{pattern.name}['{pattern.mode}']" 84 | elif pattern.__class__ is LiteralValueParser: 85 | return f"'{pattern.mode}'" 86 | else: 87 | return pattern.name 88 | 89 | 90 | class BaseParser(ABC): 91 | """Abstract Class""" 92 | name = Undef 93 | has_recur = Undef 94 | 95 | @abstractmethod 96 | def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef): 97 | """Abstract Method""" 98 | raise NotImplemented 99 | 100 | 101 | class LiteralNameParser(BaseParser): 102 | """ 103 | To parse tokenizer with specific name. 104 | for regex exp 105 | """ 106 | 107 | def __init__(self, name): 108 | self.name = name 109 | 110 | def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef): 111 | try: 112 | value: 'Tokenizer' = tokens[meta.count] 113 | except IndexError: 114 | return Const.UnMatched 115 | if value.name is self.name: 116 | meta.new() 117 | return value 118 | return Const.UnMatched 119 | 120 | 121 | class LiteralValueParser(BaseParser): 122 | """ 123 | for const char* 124 | """ 125 | 126 | def __init__(self, mode): 127 | self.name = self.mode = mode 128 | 129 | def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef): 130 | try: 131 | value: 'Tokenizer' = tokens[meta.count] 132 | except IndexError: 133 | return Const.UnMatched 134 | if value.string is self.mode: 135 | meta.new() 136 | return value 137 | return Const.UnMatched 138 | 139 | 140 | class LiteralNameValueParser(BaseParser): 141 | """ 142 | for const char* and its group name 143 | """ 144 | 145 | def __init__(self, name, mode): 146 | self.name = name 147 | self.mode = mode 148 | 149 | @debug('literal name value') 150 | def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef): 151 | try: 152 | value: 'Tokenizer' = tokens[meta.count] 153 | except IndexError: 154 | return Const.UnMatched 155 | if value.name is self.name and value.string is self.mode: 156 | meta.new() 157 | return value 158 | return Const.UnMatched 159 | 160 | 161 | class Ref(BaseParser): 162 | def __init__(self, name): 163 | self.name = unique_literal_cache_pool[name] 164 | 165 | def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef): 166 | raise NotImplemented 167 | 168 | 169 | class AstParser(BaseParser): 170 | 171 | def __init__(self, *cases, name=Undef, to_ignore=Undef): 172 | # each in the cache will be processed into a parser. 173 | cases = tuple( 174 | tuple( 175 | LiteralValueParser(each) if isinstance(each, str) else 176 | LiteralNameValueParser(each[0], each[1]) if isinstance(each, tuple) else 177 | each 178 | for each in p) 179 | for p in cases) 180 | self.cache: 'Tuple[Tuple[ParserCollections]]' = optimize(cases) 181 | 182 | # the possible output types for an series of input tokenized words. 183 | self.possibilities = [] 184 | 185 | # whether this parser will refer to itself. 186 | self.has_recur = False 187 | 188 | # the identity of a parser. 189 | 190 | self.name = name if name is not Undef else \ 191 | ' | '.join( 192 | ' '.join( 193 | map(parser_name_helper, case)) for case in cases) 194 | 195 | # is this parser compiled, must be False when initializing. 196 | self.compiled = False 197 | 198 | # if a parser's name is in this set, the result it output will be ignored when parsing. 199 | self.to_ignore = to_ignore 200 | 201 | def compile(self, namespace: dict, recur_searcher: set): 202 | if self.name in recur_searcher: 203 | self.has_recur = True 204 | self.compiled = True 205 | else: 206 | recur_searcher.add(self.name) 207 | 208 | if self.compiled: 209 | return self 210 | 211 | for es in self.cache: 212 | self.possibilities.append([]) 213 | 214 | for e in es: 215 | 216 | if e.__class__ is LiteralNameParser: 217 | 218 | if e.name not in namespace: 219 | unique_lit_name(e) 220 | namespace[e.name] = e 221 | 222 | else: 223 | e = namespace[e.name] 224 | 225 | self.possibilities[-1].append(e) 226 | 227 | elif e.__class__ is LiteralValueParser: 228 | literal = parser_name_helper(e) 229 | 230 | if literal not in namespace: 231 | unique_lit_value(e) 232 | namespace[literal] = e 233 | 234 | else: 235 | e = namespace[literal] 236 | 237 | self.possibilities[-1].append(e) 238 | 239 | elif e.__class__ is LiteralNameValueParser: 240 | name_literal = parser_name_helper(e) 241 | 242 | if name_literal not in namespace: 243 | unique_lit_value(e) 244 | unique_lit_name(e) 245 | namespace[name_literal] = e 246 | else: 247 | e = namespace[name_literal] 248 | 249 | self.possibilities[-1].append(e) 250 | 251 | elif e.__class__ is Ref: 252 | e = namespace[e.name] 253 | 254 | if isinstance(e, AstParser): 255 | e.compile(namespace, recur_searcher) 256 | 257 | self.possibilities[-1].append(e) 258 | 259 | if not self.has_recur and e.has_recur: 260 | self.has_recur = True 261 | 262 | else: 263 | if e.name not in namespace: 264 | unique_lit_name(e) 265 | namespace[e.name] = e 266 | else: 267 | e = namespace[e.name] 268 | 269 | e.compile(namespace, recur_searcher) 270 | self.possibilities[-1].append(e) 271 | 272 | if not self.has_recur and e.has_recur: 273 | self.has_recur = True 274 | 275 | if hasattr(self, 'cache'): 276 | del self.cache 277 | 278 | if self.name in recur_searcher: 279 | recur_searcher.remove(self.name) 280 | 281 | if not self.compiled: 282 | self.compiled = True 283 | 284 | @debug("match") 285 | def match(self, tokens, meta: 'MetaInfo', recur: 'Recur' = Undef): 286 | if self.has_recur and self in meta.trace[meta.count]: 287 | if isinstance(self, SeqParser) or recur is self: 288 | return Const.UnMatched 289 | 290 | raise RecursiveFound(self) 291 | history = meta.commit() 292 | if self.has_recur: 293 | meta.trace[meta.count].append(self) 294 | 295 | for possibility in self.possibilities: 296 | result = self.pattern_match(tokens, meta, possibility, recur=recur) 297 | if result is Const.UnMatched: 298 | meta.rollback(history) 299 | continue 300 | elif isinstance(result, Ast): 301 | break 302 | elif isinstance(result, RecursiveFound): 303 | meta.rollback(history) 304 | break 305 | else: 306 | return Const.UnMatched 307 | 308 | return result 309 | 310 | def pattern_match(self, tokens, meta, possibility, recur=Undef): 311 | 312 | try: # Not recur 313 | result = Ast(meta.clone(), self.name) 314 | for parser in possibility: 315 | r = parser.match(tokens, meta=meta, recur=recur) 316 | # if `result` is still empty, it might not allow LR now. 317 | if isinstance(r, Tokenizer) or isinstance(r, Ast): 318 | result_merge(result, r, parser, self.to_ignore) 319 | 320 | elif r is Const.UnMatched: 321 | return Const.UnMatched 322 | 323 | elif isinstance(r, RecursiveFound): 324 | raise r 325 | 326 | else: 327 | raise UnsolvedError("Unsolved return type. {}".format(r.__class__)) 328 | else: 329 | return result 330 | 331 | except RecursiveFound as RecurInfo: 332 | parser: 'ParserCollections' 333 | RecurInfo.add((self, possibility[possibility.index(parser) + 1:])) 334 | 335 | # RecurInfo has a trace of Beginning Recur Node to Next Recur Node with 336 | # specific possibility. 337 | if RecurInfo.node is not self: 338 | return RecurInfo 339 | 340 | return left_recursion(tokens, meta, possibility, RecurInfo) 341 | 342 | 343 | def result_merge(result, r, parser, to_ignore): 344 | if parser.__class__ is SeqParser or parser.__class__ is AccompaniedAstParser: 345 | 346 | if to_ignore is Undef: 347 | result.extend(r) 348 | else: 349 | result.extend([item for item in r if 350 | ((item.string not in to_ignore[Const.RawFilter] 351 | and item.name not in to_ignore[Const.NameFilter] 352 | ) if item.__class__ is Tokenizer else ( 353 | item.name not in to_ignore[Const.NameFilter]))]) 354 | else: 355 | if to_ignore is Undef: 356 | result.append(r) 357 | else: 358 | if r.__class__ is Tokenizer: 359 | if r.string not in to_ignore[Const.RawFilter] and r.name not in to_ignore[Const.NameFilter]: 360 | result.append(r) 361 | elif r.name not in to_ignore[Const.NameFilter]: 362 | result.append(r) 363 | 364 | 365 | def left_recursion(cases, meta: 'MetaInfo', recur_case, recur_info): 366 | recur = recur_info.node 367 | for case in recur.possibilities: 368 | if case is recur_case: 369 | continue 370 | 371 | very_first = recur.pattern_match(cases, meta, case, recur=recur) 372 | if isinstance(very_first, RecursiveFound) or very_first is Const.UnMatched: 373 | continue 374 | else: 375 | history = meta.commit() 376 | first = very_first 377 | recur_depth_count = 0 378 | while True: 379 | for parser, possibility in recur_info.possibilities: 380 | result = parser.pattern_match(cases, meta, possibility, recur=recur) 381 | if result is Const.UnMatched: 382 | meta.rollback(history) 383 | return Const.UnMatched if recur_depth_count is 0 else very_first 384 | elif isinstance(result, Ast): 385 | result.appendleft(first) 386 | elif isinstance(result, RecursiveFound): 387 | raise UnsolvedError("Error occurs : found a new left recursion when handling an other.") 388 | else: 389 | raise UnsolvedError("Unsolved return from method `patternMatch`.") 390 | first = result 391 | recur_depth_count += 1 392 | very_first = first 393 | else: 394 | # Fail to match any case. 395 | return Const.UnMatched 396 | 397 | 398 | class AccompaniedAstParser(AstParser): 399 | pass 400 | 401 | 402 | class SeqParser(AstParser): 403 | 404 | def __init__(self, *cases, name=Undef, at_least=0, at_most=Undef): 405 | super(SeqParser, self).__init__(*cases, name=name) 406 | 407 | if at_most is Undef: 408 | if at_least is 0: 409 | self.name = f"({self.name})*" 410 | else: 411 | self.name = f'({self.name}){{{at_least}}}' 412 | else: 413 | self.name = f"({self.name}){{{at_least},{at_most}}}" 414 | 415 | self.at_least = at_least 416 | self.at_most = at_most 417 | 418 | def match(self, tokens, meta: 'MetaInfo', recur=Undef): 419 | 420 | result = Ast(meta.clone(), self.name) 421 | 422 | if meta.count == len(tokens): # boundary cases 423 | if self.at_least is 0: 424 | return result 425 | return Const.UnMatched 426 | 427 | history = meta.commit() 428 | matched_num = 0 429 | if self.at_most is not Undef: 430 | """ (ast){a b} """ 431 | while True: 432 | if matched_num >= self.at_most: 433 | break 434 | try: 435 | r = AstParser.match(self, tokens, meta=meta, recur=recur) 436 | except IndexError: 437 | break 438 | 439 | if r is Const.UnMatched: 440 | break 441 | 442 | elif isinstance(r, RecursiveFound): 443 | raise UnsolvedError("Cannot make left recursions in SeqParser!!!") 444 | 445 | result.extend(r) 446 | matched_num += 1 447 | else: 448 | """ ast{a} | [ast] | ast* """ 449 | while True: 450 | try: 451 | r = AstParser.match(self, tokens, meta=meta, recur=recur) 452 | except IndexError: 453 | break 454 | 455 | if r is Const.UnMatched: 456 | break 457 | 458 | elif isinstance(r, RecursiveFound): 459 | raise UnsolvedError("Cannot make left recursions in SeqParser!!!") 460 | 461 | result.extend(r) 462 | matched_num += 1 463 | 464 | if matched_num < self.at_least: 465 | meta.rollback(history) 466 | return Const.UnMatched 467 | 468 | return result 469 | -------------------------------------------------------------------------------- /Python/Ruikowa/ObjectRegex/Optimize.py: -------------------------------------------------------------------------------- 1 | def analyze(cases): 2 | from .Node import LiteralValueParser, LiteralNameValueParser 3 | if len(cases) is 1 or not all(cases): 4 | return None 5 | 6 | groups = dict() 7 | group_order = [] 8 | 9 | for case in cases: 10 | head = case[0] 11 | if isinstance(head, LiteralValueParser): 12 | group_id = "value:" + head.mode 13 | elif isinstance(head, LiteralNameValueParser): 14 | group_id = f'ref: {head.name} value: {head.mode}' 15 | else: 16 | group_id = "ref:" + head.name 17 | 18 | if group_id not in group_order: 19 | 20 | groups[group_id] = [case] 21 | group_order.append(group_id) 22 | else: 23 | groups[group_id].append(case) 24 | 25 | if len(group_order) is 1: 26 | return None 27 | 28 | return groups, group_order 29 | 30 | 31 | def grammar_remake(groups, group_order): 32 | from .Node import AccompaniedAstParser 33 | return tuple( 34 | ( 35 | (groups[groupId][0][0], 36 | AccompaniedAstParser(*[case[1:] for case in groups[groupId]]) 37 | ) 38 | if len(groups[groupId]) > 1 else groups[groupId][0] 39 | ) 40 | for groupId in group_order) 41 | 42 | 43 | def optimize(cases): 44 | analyzed = analyze(cases) 45 | if analyzed is None: 46 | return cases 47 | groups, group_order = analyzed 48 | return grammar_remake(groups, group_order) 49 | -------------------------------------------------------------------------------- /Python/Ruikowa/ObjectRegex/Tokenizer.py: -------------------------------------------------------------------------------- 1 | try: 2 | from typing import Iterable, Tuple, List, Dict, Set 3 | 4 | if False: 5 | from re import __Regex 6 | except ModuleNotFoundError: 7 | pass 8 | 9 | import re 10 | import json 11 | import linq 12 | from collections import defaultdict 13 | from ..ErrorFamily import UniqueNameConstraintError 14 | from ..ErrorHandler import Colored, Warnings as warnings 15 | 16 | 17 | class Mode: 18 | regex = 0 19 | keyword = const = 1 20 | char = 2 21 | 22 | 23 | class TokenSpec: 24 | def __init__(self): 25 | self.enums: 'Dict[str, str]' = {} 26 | # enum name -> const string 27 | 28 | self.tokens: 'List[Tuple[str, int, str]]' = [] 29 | 30 | def to_token_table(self, indent=15): 31 | generated_tokens = set() 32 | _join = f',\n{" "*indent}'.join 33 | if not self.tokens: 34 | return '()' 35 | groups = linq.Flow(self.tokens).Group(lambda name, mode, string: (name, mode if mode is not Mode.regex else string)).Unboxed() 36 | 37 | def make_each(group: 'List[Tuple[str, int, str]]'): 38 | name, mode, string = group.__iter__().__next__() 39 | if mode is Mode.regex: 40 | return '(unique_literal_cache_pool["{name}"], regex_matcher({string}))'.format(name=name, string=string) 41 | 42 | modes = [] 43 | for _, _, string in group: 44 | 45 | tp = (name, string) 46 | if tp not in generated_tokens: 47 | modes.append(string) 48 | generated_tokens.add(tp) 49 | 50 | if not modes: 51 | return None 52 | 53 | match_mode = ', '.join(sorted(modes, reverse=True)) 54 | 55 | if mode is Mode.char: 56 | return '(unique_literal_cache_pool["{}"], char_matcher(({})))'.format(name, match_mode) 57 | 58 | return '(unique_literal_cache_pool["{}"], str_matcher(({})))'.format(name, match_mode) 59 | 60 | token_items = linq.Flow(groups).Map(make_each).Filter(lambda x: x).Then(_join).Unboxed() 61 | return '({},)'.format(token_items) 62 | 63 | def to_name_enum(self): 64 | 65 | if not self.enums: 66 | return "" 67 | indent = f'\n{" "*4}' 68 | _join = indent.join 69 | 70 | name_enums = linq.Flow( 71 | self.enums.items() 72 | ).Map( 73 | lambda name, string: f"{name} = unique_literal_cache_pool[{string}]" 74 | ).Then( 75 | _join 76 | ).Unboxed() 77 | 78 | enum_class_spec = """ 79 | class UNameEnum: 80 | # names 81 | {}{} 82 | """.format(indent, 83 | name_enums) 84 | 85 | return enum_class_spec 86 | 87 | 88 | class Tokenizer: 89 | def __init__(self, name: str, string: str, lineno: int, colno: int): 90 | self.name = name 91 | self.lineno = lineno 92 | self.colno = colno 93 | self.string = string 94 | 95 | def dump_to_json(self): 96 | return dict(name=self.name, string=self.string, lineno=self.lineno, colno=self.colno) 97 | 98 | def dump(self): 99 | return self.__str__() 100 | 101 | def __repr__(self): 102 | return f'[name: {self.name}, string: "{self.string}", lineno: {self.lineno}, colno: {self.colno}]' 103 | 104 | def __str__(self): 105 | 106 | return '[name: {}, string: "{}"]'.format(self.name, self.string) 107 | 108 | @staticmethod 109 | def from_raw_strings(raw_string: str, token_table: 'Iterable', to_ignore=({}, {}), cast_map: dict = None): 110 | if cast_map is None: 111 | cast_map = {} 112 | 113 | if not raw_string: 114 | return () 115 | lineno = 0 116 | colno = 0 117 | pos = 0 118 | n = len(raw_string) 119 | while True: 120 | for name, pat in token_table: 121 | w = pat(raw_string, pos) 122 | if w: 123 | row_inc = w.count('\n') 124 | length = len(w) 125 | 126 | if row_inc: 127 | lineno += row_inc 128 | colno = length - w.rfind('\n') - 1 129 | else: 130 | colno += length 131 | 132 | pos += length 133 | 134 | if name not in to_ignore[0] and w not in to_ignore[1]: 135 | if w in cast_map: 136 | name = cast_map[w] 137 | w = unique_literal_cache_pool[w] 138 | yield Tokenizer(name, w, lineno, colno) 139 | else: 140 | yield Tokenizer(unique_literal_cache_pool[name], w, lineno, colno) 141 | 142 | if n == pos: 143 | return 144 | break 145 | 146 | else: 147 | warnings.warn('no token def {}'.format(raw_string[pos].encode())) 148 | if raw_string[pos] is '\n': 149 | colno = 0 150 | lineno += 1 151 | else: 152 | colno += 1 153 | pos += 1 154 | if n == pos: 155 | return 156 | break 157 | 158 | 159 | def char_matcher(mode): 160 | """ 161 | a faster way for characters to generate token strings cache 162 | """ 163 | 164 | def f_raw(inp_str, pos): 165 | return mode if inp_str[pos] is mode else None 166 | 167 | def f_collection(inp_str, pos): 168 | ch = inp_str[pos] 169 | for each in mode: 170 | if ch is each: 171 | return ch 172 | return None 173 | 174 | if isinstance(mode, str): 175 | return f_raw 176 | 177 | if len(mode) is 1: 178 | mode = mode[0] 179 | return f_raw 180 | 181 | return f_collection 182 | 183 | 184 | def str_matcher(mode): 185 | """ 186 | generate token strings' cache 187 | """ 188 | 189 | def f_raw(inp_str, pos): 190 | return unique_literal_cache_pool[mode] if inp_str.startswith(mode, pos) else None 191 | 192 | def f_collection(inp_str, pos): 193 | for each in mode: 194 | if inp_str.startswith(each, pos): 195 | return unique_literal_cache_pool[each] 196 | return None 197 | 198 | if isinstance(mode, str): 199 | return f_raw 200 | 201 | if len(mode) is 1: 202 | mode = mode[0] 203 | return f_raw 204 | 205 | return f_collection 206 | 207 | 208 | def regex_matcher(regex_pat): 209 | """ 210 | generate token names' cache 211 | :param regex_pat: 212 | :return: 213 | """ 214 | if isinstance(regex_pat, str): 215 | regex_pat = re.compile(regex_pat) 216 | 217 | def f(inp_str, pos): 218 | m = regex_pat.match(inp_str, pos) 219 | return m.group() if m else None 220 | 221 | return f 222 | 223 | 224 | class UniqueLiteralCachePool: 225 | def __init__(self, dictionary: dict): 226 | self.content = dictionary 227 | 228 | def __getitem__(self, item): 229 | try: 230 | return self.content[item] 231 | except KeyError: 232 | self.content[item] = item 233 | return item 234 | 235 | 236 | unique_literal_cache_pool = UniqueLiteralCachePool({}) 237 | 238 | 239 | def unique_lit_name(obj): 240 | if obj.name is not unique_literal_cache_pool[obj.name]: 241 | obj.name = unique_literal_cache_pool[obj.name] 242 | 243 | 244 | def unique_lit_value(obj): 245 | if obj.mode is not unique_literal_cache_pool[obj.mode]: 246 | obj.mode = unique_literal_cache_pool[obj.mode] 247 | -------------------------------------------------------------------------------- /Python/Ruikowa/ObjectRegex/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /Python/Ruikowa/Tools/__init__.py: -------------------------------------------------------------------------------- 1 | import linq 2 | 3 | try: 4 | from cytoolz import curry 5 | except ModuleNotFoundError: 6 | from toolz import curry 7 | 8 | 9 | @curry 10 | def function_debugger(tag: str, content: str, dictionary: dict, indent: int, inc_indent: int): 11 | case_map = {tag: 1, 12 | content: 2} 13 | 14 | indent = " " * indent 15 | inc_indent = f"{indent}" + " " * inc_indent; 16 | 17 | groups = linq.Flow(dictionary.items()).Map(lambda a, b: (a, b)).GroupBy( 18 | lambda a, b: case_map.get(a, 0)).Unboxed() 19 | 20 | others = '\n'.join(map(lambda each: f"{inc_indent}<{each[0]}> {each[1]} ", groups[0])) 21 | 22 | content = f"<{groups[2][0][1]}>" 23 | 24 | return (f"{indent}<{groups[1][0][1]}>\n" 25 | f"{others}\n" 26 | f'{inc_indent}{content}\n' 27 | f"{indent}") 28 | -------------------------------------------------------------------------------- /Python/Ruikowa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thautwarm/EBNFParser/101a92c4f408f9e6ce7b55aacb39cded9394521d/Python/Ruikowa/__init__.py -------------------------------------------------------------------------------- /Python/Ruikowa/color.py: -------------------------------------------------------------------------------- 1 | class Colored: 2 | Red = '\033[31m' 3 | Green = '\033[32m' 4 | Yellow = '\033[33m' 5 | Blue = '\033[34m' 6 | Purple = '\033[35m' 7 | LightBlue = '\033[36m' 8 | Clear = '\033[39m' 9 | Purple2 = '\033[95m' -------------------------------------------------------------------------------- /Python/Ruikowa/io.py: -------------------------------------------------------------------------------- 1 | encodings = ('utf8', 'gb18030', 'latin1', 'gbk') 2 | 3 | 4 | class grace_open: 5 | 6 | def __init__(self, filename): 7 | self.filename = filename 8 | 9 | def write(self, string: str): 10 | for encoding in encodings: 11 | try: 12 | with open(self.filename, 'w', encoding=encoding) as f: 13 | f.write(string) 14 | return self 15 | except UnicodeEncodeError: 16 | continue 17 | raise UnicodeEncodeError 18 | 19 | def read(self): 20 | for encoding in encodings: 21 | try: 22 | with open(self.filename, 'r', encoding=encoding) as f: 23 | return f.read() 24 | except UnicodeEncodeError: 25 | continue 26 | raise UnicodeEncodeError 27 | -------------------------------------------------------------------------------- /Python/release-note: -------------------------------------------------------------------------------- 1 | ## what's new in EBNFParser 0.1.2: 2 | 1. 3 | separate parser with tokenizer. 4 | You are allowed to whether to define tokenizer automatically in EBNF files. 5 | You are allowed to define a funtion to be the tokenizer at the first line in a EBNF file 6 | for instance: 7 | using {{ lambda string:list(string) }} # or just list 8 | 9 | And you are allowed to write a tokenizer definition in another file, for instance: 10 | 11 | file: ./xxx.eebnf 12 | using python.token 13 | file: ./python/token 14 | lambda x : string(x) 15 | Take care that you can just write an expression!!! 16 | 2. 17 | the meta information format has changed from 18 | 19 | "meta":{"rowIdx":, "count":, "fileName":} 20 | to 21 | "meta":[, , ] 22 | 23 | Enjoy it:) 24 | 25 | 26 | ## what's new in EBNFParser 0.1.2.2: 27 | 28 | You're now allowed to define parsers named by chinese characters. 29 | 30 | 31 | ## what's new in EBNFParser 0.1.3.1: 32 | 33 | There is a module named `token` which is in CPython STL. 34 | As a result, I changed the name of a specific file generated by Parser Generator from `token.py` to `etoken.py`. 35 | 36 | ## what's new in EBNFParser 0.1.3.4(0.1.4): 37 | 38 | Fixed bugs for Windows users. 39 | Now just use following command 40 | 41 | ``` 42 | parserGenerator 43 | -lang 44 | -comment 45 | -multiline 46 | ``` 47 | to generate a parser. 48 | 49 | ## what's new in EBNFParser 0.2.0: 50 | 51 | I think that I have just found a fantastic method to solve left recursion problem. 52 | The way to do it is just: 53 | 54 | - mark the left-recursive parser node and store the tail of each epsilon production of this parser as TAIL. 55 | ``` 56 | a ::= a b c | a '=>' d | c 57 | # TAIL = b c | '=>' d 58 | ``` 59 | - when an ast named RESULT has been parsed, just 60 | 61 | * make an new ast also named RESULT', take RESULT as the first elem in RESULT'!!! 62 | * continue parsing by using TAIL. 63 | 64 | I will refactor this method sooner to make my codes more elegant. 65 | 66 | - P.S 67 | Sometimes I think my ideas might come from God's Revelation. 68 | It seems so incredible for me to totally solve a part of Principles by myself. 69 | 70 | ## what's new in EBNFParser 1.0: 71 | 72 | - Totally support any kind of left recursion now. 73 | 74 | - Some optimization on Bootstrap Compiler. 75 | 76 | for instance, the result of following one 77 | `a ::= b c d | b d e | b d f` 78 | can be transformed to the same as 79 | `a ::= b(c d | b d (e|f) )` 80 | 81 | - LiteralParser will get a result which `Type` is `str`, previously it got an `Ast`. 82 | 83 | In terms of the following case: 84 | 85 | ``` test.txt 86 | 87 | B := 'b' 88 | A ::= B 89 | 90 | ``` 91 | 92 | Let test the parser. 93 | 94 | - Misakawa(Old version). 95 | 96 | ``` 97 | parserGenerator ./test.txt ./testParser.py 98 | python testLang.py A "b" 99 | A[B['b'] 100 | ] 101 | ``` 102 | 103 | - Ruikowa 104 | 105 | ``` 106 | ruiko ./test.txt ./testParser.py 107 | python testLang.py A "b" 108 | A[ 109 | "b" 110 | ] 111 | ``` 112 | - Support Python3.4+! 113 | 114 | ## what's new in EBNFParser 1.0.1: 115 | 116 | - fix a bug in `Throw` syntax. 117 | 118 | The following syntax should define an ASTParser which will ignore the characters `'\n' and ','` in parsed results, however it used to ignore "'\\n'" and "','". 119 | a Throw ['\n', ','] ::= ... 120 | 121 | Now this problem has been fixed. 122 | 123 | 124 | ## what's new in EBNFParser 1.0.3: 125 | 126 | - fix a bug in `Throw` syntax: 127 | 128 | There is a bug that ignoring the specific AST in previous versions could be incorrect. Fixed now. 129 | 130 | ## what's new in EBNFParser 1.0.4: 131 | 132 | - make `SyntaxError` caught by `handle_error` be a more specific `DSLSyntaxError`. 133 | 134 | ## what's new in EBNFParser 1.0.5: 135 | 136 | - add a new api `MetaInfo.max_fetched` to get max possible parsed words count, for making interactive tools. 137 | 138 | ## quite a exciting step in EBNFParser 1.1 139 | 140 | - support escape literal now: 141 | 142 | single.Quote := '\''; 143 | 144 | 145 | # EBNFParser 2.0 146 | 147 | Fast, powerful and human-friendly with intelligent and comfortable error alert. 148 | 149 | - all the matchings now could be made by address comparing(use `is` and `not`). 150 | 151 | - better auto-tokenizer which can handle every scene belonging to Context-Free Syntax. 152 | 153 | - use object tokenizer for precise error raising. 154 | 155 | - codes refactored and follow PEP8 strictly. More readable. 156 | 157 | 158 | 159 | 160 | # 2.0.9 161 | 162 | Add custom literal prefixes. 163 | 164 | ``` 165 | keyword as K := 'def' 'let'; 166 | 167 | some ::= K'let' args '=' expr; 168 | ``` 169 | 170 | Take care that you cannot overload `R` prefix. 171 | 172 | 173 | 174 | 175 | # 2.1.1 176 | 177 | make a convenience for the case that all combined parsers can reach each other.(Fixed parser compiler and bootstrap code-gen) 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /Python/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Oct 6 00:51:38 2017 5 | 6 | @author: misakawa 7 | """ 8 | 9 | from setuptools import setup, find_packages 10 | 11 | with open('./README.rst', encoding='utf-8') as f: 12 | readme = f.read() 13 | 14 | setup(name='EBNFParser', 15 | version='2.1.3', 16 | keywords='parser, parser framework, parser generator, gramamr, ast, tokenizer, EBNF, BNF', 17 | description="very powerful and optional parser framework for python", 18 | long_description=readme, 19 | license='MIT', 20 | url='https://github.com/thautwarm/EBNFParser', 21 | author='thautwarm', 22 | author_email='twshere@outlook.com', 23 | include_package_data=True, 24 | packages=['Ruikowa'], 25 | entry_points={ 26 | 'console_scripts': [ 27 | 'ruiko=Ruikowa.Command:main'] 28 | }, 29 | install_requires=[ 30 | 'Linq==0.3.1' 31 | ], 32 | platforms='any', 33 | classifiers=[ 34 | 'Programming Language :: Python :: 3.6', 35 | 'Programming Language :: Python :: 3.7', 36 | 'Programming Language :: Python :: Implementation :: CPython'], 37 | zip_safe=False 38 | ) 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/thautwarm/EBNFParser.svg?branch=boating-new)](https://travis-ci.org/thautwarm/EBNFParser) 2 | [![PyPI version](https://img.shields.io/pypi/v/EBNFParser.svg)](https://pypi.python.org/pypi/EBNFParser) 3 | [![Release Note](https://img.shields.io/badge/note-release-orange.svg)](https://github.com/thautwarm/EBNFParser/blob/boating-new/Python/release-note) 4 | [![MIT License](https://img.shields.io/badge/license-MIT-Green.svg?style=flat)](https://github.com/thautwarm/EBNFParser/blob/boating-new/LICENSE) 5 | 6 | # EBNFParser 7 | Parse Many, Any, Every [![Doc](https://img.shields.io/badge/document-2.1.2-yellow.svg?style=flat)](http://ebnfparser.readthedocs.io/en/boating-new) 8 | ----------------------- 9 | 10 | ``` 11 | LR ::= LR 'a' 'b' | LR 'c' | 'd'; 12 | ``` 13 | 14 | 15 | - [Python Project(Support Python 3.6+)](https://github.com/thautwarm/EBNFParser/tree/boating-new/Python) (v 2.0+) 16 | - [Old Version : Misakawa v0.x](https://github.com/thautwarm/EBNFParser/tree/master/Misakawa.md) 17 | - [Old Version : Ruikowa v1.x](https://github.com/thautwarm/EBNFParser/tree/master/README.md) 18 | 19 | -------------------- 20 | 21 | ## Install 22 | - Python 23 | - pip 24 | 25 | `pip installl -U EBNFParser` 26 | 27 | - setup 28 | 29 | ```shell 30 | git clone https://github.com/thautwarm/EBNFParser 31 | cd EBNFParser/Python 32 | python setup.py install 33 | ``` 34 | 35 | ## Usage 36 | 37 | - Command Line Tools 38 | - `ruiko`. 39 | 40 | ```shell 41 | ruiko ./ ./ 42 | [--testTk] # print tokenized words or not 43 | [--test] # generate test script "test_lang.py" 44 | ``` 45 | Use command `ruiko` to generate parser and token files, and then you can use `test_lang.py` to test your parser. 46 | 47 | ```shell 48 | python ./test_lang.py Stmt " (+ 1 2) " -o test.json --testTk 49 | ``` 50 | 51 | - Integrated into your own project 52 | 53 | ```python 54 | 55 | from Ruikowa.ObjectRegex.ASTDef import Ast 56 | from Ruikowa.ErrorHandler import ErrorHandler 57 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo 58 | from Ruikowa.ObjectRegex.Tokenizer import Tokenizer 59 | 60 | from import , token_table 61 | 62 | 63 | import typing as t 64 | 65 | def token_func(src_code: str) -> t.Iterable[Tokenizer]: 66 | return Tokenizer.from_raw_strings(src_code, token_table, ({}, {})) 67 | 68 | parser = ErrorHandler(.match, token_func) 69 | 70 | def parse(filename: str) -> Ast: 71 | 72 | return parser.from_file(filename) 73 | 74 | 75 | print(parse()) 76 | 77 | ``` 78 | 79 | Need more? See [the documents](http://ebnfparser.readthedocs.io/en/boating-new). 80 | 81 | ## Examples 82 | 83 | Here are some examples to refer: 84 | 85 | EBNFParser 2.0 86 | 87 | - [Rem](https://github.com/thautwarm/Rem) 88 | The Rem programming language. 89 | 90 | 91 | Old version(Before EBNFParser 1.1). 92 | 93 | - [DBG-Lang](https://github.com/thautwarm/dbg-lang) 94 | A DSL for SQL development in Python areas. 95 | 96 | - [Rem(Based EBNFParser1.1)](https://github.com/thautwarm/Rem/tree/backend-ebnfparser1.1) 97 | A full featured modern language to enhance program readability based on CPython. 98 | 99 | - [Lang.Red](https://github.com/thautwarm/lang.red) 100 | An attempt to making ASDL in CPython(unfinished yet) 101 | 102 | Will support F# and Rem. 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /Ruiko/README.rst: -------------------------------------------------------------------------------- 1 | 2 | Ruiko Language 3 | ------------------------------ 4 | 5 | **Ruiko** is EBNF-like language for tokenizing and parsing which could handle context-sensitive cases easily(`closure` could be handled in **Ruiko**). 6 | 7 | What's more, you can even write the compiling actions easily when writing parsers, and the efficiency might be amazing high. -------------------------------------------------------------------------------- /Ruiko/ast.cpp: -------------------------------------------------------------------------------- 1 | #include "flowerq/List.hpp" 2 | #include "flowerq/IO.hpp" 3 | #include "flowerq/Macro.hpp" 4 | #include 5 | #define DEBUB 6 | 7 | 8 | class Mixed; 9 | using Ast = flowerq::List; 10 | 11 | #ifdef DEBUB 12 | typedef StringBuff TokenType; 13 | #else 14 | typedef int TokenType; 15 | #endif 16 | 17 | struct Token{ 18 | 19 | public: 20 | int lineno; 21 | int colno; 22 | TokenType name; 23 | StringBuff value; 24 | Token(int lineno, int colno, TokenType name, StringBuff value){ 25 | this->lineno = lineno; 26 | this->colno = colno; 27 | this->name = name; 28 | this->value = value; 29 | } 30 | 31 | StringBuff toString(){ 32 | return flowerq::IO::inspect(this->name) + rstr("[") + flowerq::IO::inspect(this->value) + rstr("]"); 33 | } 34 | Token() = default; 35 | 36 | }; 37 | 38 | class Mixed{ 39 | public: 40 | Token* token_ptr; 41 | 42 | Ast* ast_ptr; 43 | 44 | bool is_primitive(){ 45 | return ast_ptr == nullptr; 46 | } 47 | 48 | StringBuff toString(){ 49 | if (is_primitive()){ 50 | return token_ptr -> toString(); 51 | } 52 | return ast_ptr -> toString(); 53 | } 54 | 55 | Mixed() = default; 56 | 57 | }; 58 | 59 | -------------------------------------------------------------------------------- /Ruiko/bootstrap.ruiko: -------------------------------------------------------------------------------- 1 | use Token.Std.{ 2 | Name, 3 | String, 4 | Codes, 5 | Number 6 | } 7 | 8 | Definition ::= Name '::=' OrExp ['where' Codes] 9 | OrExp ::= AndExp ('|', AndExp) 10 | AndExp ::= NotExp+ 11 | NotExp ::= 'Not' Exp 12 | Exp ::= AtomExp | '<' AtomExp ['by' (Name | Codes)+] ['as' (Name | Codes)] ['then' (Name | Codes)+]'>' 13 | Trailer ::= '{' Number{1 2} '}' | '+' | '*' 14 | AtomExp ::= Atom [Trailer] 15 | Atom ::= Name | Indent | Dedent | String | Codes 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /Ruiko/dev_bnf.cpp: -------------------------------------------------------------------------------- 1 | #include "ast.cpp" 2 | #include "flowerq/IO.hpp" 3 | 4 | int main(){ 5 | using namespace flowerq; 6 | Token tk; 7 | Mixed m; 8 | m.token_ptr = &tk; 9 | tk.name = rstr("definition"); 10 | tk.value = rstr("def"); 11 | IO::puts(tk); 12 | } -------------------------------------------------------------------------------- /Ruiko/flowerq/Composite.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | #ifndef FLOWERQ_COMP 3 | #include "Composite.hpp" 4 | #endif 5 | */ 6 | #ifndef FLOWERQ_COMP 7 | #define FLOWERQ_COMP 8 | #include 9 | 10 | namespace flowerq{ 11 | 12 | 13 | template 14 | std::function and_then(std::function f1, std::function f2){ 15 | return [=](T input){ 16 | f2(f1(input)); 17 | }; 18 | } 19 | 20 | // template 21 | // auto and_then(std::function f1, std::function f2){ 22 | // return [=](T input){ 23 | // return f2(f1(input)); 24 | // }; 25 | // } 26 | 27 | 28 | } 29 | 30 | #endif -------------------------------------------------------------------------------- /Ruiko/flowerq/IO.File.hpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | TODO: cahcing 4 | */ 5 | struct Writer 6 | { 7 | public: 8 | ofstream stream; 9 | void write(const Char *buf) 10 | { 11 | stream << buf; 12 | } 13 | void close() 14 | { 15 | stream.close(); 16 | } 17 | Writer(const char *filename) 18 | : stream(filename){}; 19 | }; 20 | 21 | struct Reader 22 | { 23 | public: 24 | ifstream stream; 25 | 26 | StringBuff read(const Char split) 27 | { 28 | StringBuff s; 29 | Char c; 30 | while (stream.get(c) && (c != split) && !stream.eof()) 31 | { 32 | s.push_back(c); 33 | } 34 | return s; 35 | } 36 | 37 | StringBuff read() 38 | { 39 | StringBuff s; 40 | Char c; 41 | while (stream.get(c) && !stream.eof()) 42 | { 43 | s.push_back(c); 44 | } 45 | return s; 46 | } 47 | void close() 48 | { 49 | stream.close(); 50 | } 51 | Reader(const char *filename) 52 | : stream(filename) 53 | { 54 | } 55 | }; 56 | template 57 | R open(const char *filename) 58 | { 59 | } 60 | 61 | template <> 62 | Writer open(const char *filename) 63 | { 64 | return Writer(filename); 65 | } 66 | 67 | template <> 68 | Reader open(const char *filename) 69 | { 70 | return Reader(filename); 71 | } 72 | -------------------------------------------------------------------------------- /Ruiko/flowerq/IO.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | #ifndef FLOWERQ_IO 3 | #include "IO.hpp" 4 | #endif 5 | */ 6 | #ifndef FLOWERQ_IO 7 | #define FLOWERQ_IO 8 | 9 | #include "Match.hpp" 10 | #include "Macro.hpp" 11 | 12 | #include 13 | #include 14 | #include 15 | namespace flowerq{ 16 | namespace IO{ 17 | 18 | StringBuff 19 | inspect(int e) { 20 | return to_string(e); 21 | } 22 | 23 | StringBuff 24 | inspect(Char e) { 25 | return to_string(e); 26 | } 27 | 28 | 29 | StringBuff 30 | inspect(const Char *buf) { 31 | if (buf == NULL){ 32 | //TODO: not sure how to handle the null value. 33 | return rstr(""); 34 | } 35 | StringBuff s(buf); 36 | return s; 37 | } 38 | 39 | StringBuff 40 | inspect(Char *buf) { 41 | if (buf == NULL){ 42 | //TODO: not sure how to handle the null value. 43 | return rstr(""); 44 | } 45 | StringBuff s(buf); 46 | return s; 47 | } 48 | 49 | StringBuff 50 | inspect(float e) { 51 | return to_string(e); 52 | } 53 | 54 | StringBuff 55 | inspect(double e) { 56 | return to_string(e); 57 | } 58 | 59 | StringBuff 60 | inspect(std::string e) { 61 | #ifdef UNICODE 62 | StringBuff ws; 63 | ws.assign(e.begin(), e.end()); 64 | return ws; 65 | #else 66 | return e; 67 | #endif 68 | } 69 | 70 | StringBuff 71 | inspect(std::wstring e){ 72 | #ifndef UNICODE 73 | StringBuff s; 74 | s.assign(e.begin(), e.end()); 75 | return s; 76 | #else 77 | return e; 78 | #endif 79 | } 80 | 81 | 82 | StringBuff 83 | inspect(bool e) { 84 | return e ? rstr("true") : rstr("false"); 85 | 86 | } 87 | 88 | template 89 | StringBuff inspect(T t); 90 | 91 | 92 | template 93 | StringBuff tuple_inspect(std::tuple tp){ 94 | return inspect(std::get<0> (tp)); 95 | } 96 | 97 | template 98 | StringBuff tuple_inspect(std::tuple tp){ 99 | return inspect(std::get<0> (tp)) + rstr(", ") + inspect(std::get<1> (tp)); 100 | } 101 | 102 | template 103 | StringBuff tuple_inspect(std::tuple tp){ 104 | return inspect(std::get<0> (tp)) + rstr(", ") + tuple_inspect(dependency::tail(tp)); 105 | } 106 | 107 | template 108 | StringBuff inspect(std::tuple tp) { 109 | return rstr("(") + tuple_inspect(tp) + rstr(",)"); 110 | } 111 | 112 | template 113 | StringBuff inspect(std::tuple tp) { 114 | return rstr("(") + tuple_inspect(tp) + rstr(")"); 115 | } 116 | 117 | template 118 | StringBuff inspect(T t) { 119 | return t.toString(); 120 | } 121 | 122 | template 123 | void puts(T t) { 124 | StringBuff res = inspect(t); 125 | cout << res << '\t'; 126 | } 127 | 128 | 129 | template 130 | void putstrln(T t) { 131 | StringBuff res = inspect(t); 132 | cout << res << std::endl; 133 | } 134 | 135 | void putstrln() { 136 | printf("\n"); 137 | } 138 | 139 | #include "IO.File.hpp" 140 | 141 | } 142 | } 143 | #endif -------------------------------------------------------------------------------- /Ruiko/flowerq/List.BaseMethods.hpp: -------------------------------------------------------------------------------- 1 | void forEach(std::function action) { 2 | Node *list_ptr = this->head_ptr->Next; 3 | while (list_ptr != nullptr) { 4 | action(list_ptr->value); 5 | list_ptr = list_ptr->Next; 6 | } 7 | } 8 | 9 | template 10 | List map(std::function fn){ 11 | 12 | const int n = length(); 13 | List new_list; 14 | Node* src_list_ptr = new_list.head_ptr = Node::_new_head(n); 15 | if (n == 0){ 16 | src_list_ptr -> Next = nullptr; 17 | return new_list; 18 | } 19 | this->forEach([&](T e){ 20 | src_list_ptr->Next = new Node(fn(e)); 21 | src_list_ptr = src_list_ptr -> Next; 22 | }); 23 | src_list_ptr -> Next = nullptr; 24 | return new_list; 25 | } 26 | 27 | List filter(std::function predicate){ 28 | const int n = length(); 29 | List new_list; 30 | Node* src_list_ptr = new_list.head_ptr = Node::_new_head(); 31 | if (n == 0){ 32 | src_list_ptr -> Next = nullptr; 33 | return new_list; 34 | } 35 | int length = 0; 36 | this->forEach([&](T e){ 37 | if (predicate(e)){ 38 | ++ length; 39 | src_list_ptr->Next = new Node(e); 40 | src_list_ptr = src_list_ptr -> Next; 41 | } 42 | }); 43 | src_list_ptr -> Next = nullptr; 44 | new_list.head_ptr->size = length; 45 | return new_list; 46 | } 47 | 48 | template 49 | G reduce(std::function fold_fn, G start_elem) { 50 | this->forEach([&](int e) { 51 | start_elem = fold_fn(start_elem, e); 52 | }); 53 | return start_elem; 54 | } 55 | 56 | template 57 | List> zip(List traversal) { 58 | return flowerq::zip(*this, traversal); 59 | } -------------------------------------------------------------------------------- /Ruiko/flowerq/List.Constructor.hpp: -------------------------------------------------------------------------------- 1 | template 2 | static List list::create() { 3 | List new_list = List(); 4 | new_list.head_ptr = Node::_new_head(0); 5 | new_list.head_ptr->Next = nullptr; 6 | return new_list; 7 | } 8 | 9 | template 10 | static List list::create(A value) { 11 | List new_list = List(); 12 | new_list.head_ptr = Node::_new_head(1); 13 | new_list.head_ptr->Next = Node::_new_ptr(value); 14 | return new_list; 15 | } 16 | 17 | template 18 | static List list::create(A value, VARARGS... varargs){ 19 | List new_list = List(); 20 | int count = 0; 21 | auto src_ptr = Node::_new_ptr(count, value, varargs...); 22 | new_list.head_ptr = Node::_new_head(count, src_ptr); 23 | return new_list; 24 | } 25 | 26 | template 27 | static List list::cons(A value, List list){ 28 | List new_list; 29 | auto node = Node::_new_ptr(value); 30 | node -> Next = list.head_ptr->Next; 31 | new_list.head_ptr = Node::_new_head(list.length() + 1, node); 32 | return new_list; 33 | } -------------------------------------------------------------------------------- /Ruiko/flowerq/List.Node.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | */ 4 | 5 | template 6 | struct Node{ 7 | 8 | public: 9 | union{ 10 | T value; 11 | int size; 12 | }; 13 | Node* Next = nullptr; 14 | 15 | 16 | Node(){} 17 | Node(T v) { 18 | value = v; 19 | }; 20 | 21 | template 22 | static Node *_new_ptr(int &count, T value, Args... varargs) { 23 | auto list_ptr = new Node(value); 24 | list_ptr->Next = _new_ptr(++count, varargs...); 25 | return list_ptr; 26 | } 27 | 28 | static Node *_new_ptr(int &count, T value) { 29 | ++count; 30 | auto list_ptr = new Node(value); 31 | list_ptr->Next = nullptr; 32 | return list_ptr; 33 | } 34 | 35 | static Node *_new_ptr(T value) { 36 | auto list_ptr = new Node(value); 37 | list_ptr->Next = nullptr; 38 | return list_ptr; 39 | } 40 | 41 | static Node *_new_head(int size, Node* next) { 42 | auto head_ptr = new Node; 43 | head_ptr -> size = size; 44 | head_ptr -> Next = next; 45 | return head_ptr; 46 | } 47 | 48 | static Node *_new_head(int size) { 49 | auto head_ptr = new Node; 50 | head_ptr -> size = size; 51 | return head_ptr; 52 | } 53 | 54 | static Node *_new_head() { 55 | auto head_ptr = new Node; 56 | return head_ptr; 57 | } 58 | 59 | }; 60 | 61 | template 62 | static void del(Node &list) { 63 | del(list.Next); 64 | list.Next = nullptr; 65 | } 66 | 67 | template 68 | static void del(Node *list_ptr) { 69 | if (list_ptr == nullptr) 70 | return; 71 | Node *next_ptr = list_ptr->Next; 72 | delete list_ptr; 73 | del(next_ptr); 74 | } 75 | -------------------------------------------------------------------------------- /Ruiko/flowerq/List.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | #ifndef FLOWERQ_LIST 3 | #include "List.hpp" 4 | #endif 5 | */ 6 | #ifndef FLOWERQ_LIST 7 | #define FLOWERQ_LIST 8 | 9 | 10 | #include "Macro.hpp" 11 | #include "IO.hpp" 12 | #include "Match.hpp" 13 | #include 14 | 15 | 16 | namespace flowerq{ 17 | 18 | // struct Node definition 19 | #include "List.Node.hpp" 20 | 21 | 22 | #pragma region declare 23 | template 24 | class List; 25 | 26 | template 27 | static void del(List* list_ptr); 28 | 29 | template 30 | static void del(List &list); 31 | 32 | 33 | template 34 | static List> zip(List list1, List list2); 35 | 36 | namespace list{ 37 | template 38 | static List create(); 39 | 40 | template 41 | static List create(A value); 42 | 43 | template 44 | static List create(A value, VARARGS... varargs); 45 | 46 | template 47 | static List cons(A value, List list); 48 | } 49 | #pragma endregion 50 | 51 | template 52 | class List{ 53 | 54 | protected: 55 | Node* head_ptr; // the head ptr does not contain values but the length of list. 56 | public: 57 | 58 | int length(){ 59 | return head_ptr->size; 60 | } 61 | 62 | template 63 | typename std::enable_if::value, StringBuff>::type 64 | toString(){ 65 | 66 | const int n = length(); 67 | if (n == 0){ 68 | return rstr("List<0>[]"); 69 | } 70 | 71 | T head; 72 | List tail; 73 | auto tp = destruct(); 74 | pattern::match(tp, head, tail); 75 | 76 | StringBuff res = rstr("List<") + to_string(n) + rstr(">[") + IO::inspect(head); 77 | 78 | tail.forEach([=, &res](T e) { 79 | res += rstr(", ") + IO::inspect(e); 80 | }); 81 | 82 | return res + rstr("]"); 83 | 84 | } 85 | 86 | template 87 | typename std::enable_if::value, StringBuff>::type 88 | toString(){ 89 | Char *buffer = new Char[length()]; 90 | int idx = 0; 91 | forEach([&](Char& ch){ 92 | buffer[idx++] = ch; 93 | }); 94 | return buffer; 95 | } 96 | 97 | T at(int idx){ 98 | Node *list_ptr = this->head_ptr->Next; 99 | int i = 0; 100 | while(i++ < idx){ 101 | if (list_ptr == nullptr){ 102 | const char* err_info = "Runtime IndexError: List ended before found the index."; 103 | printf("%s\n", err_info); 104 | const auto err = std::runtime_error(err_info); 105 | throw err; 106 | } 107 | list_ptr = list_ptr->Next; 108 | } 109 | return list_ptr->value; 110 | } 111 | 112 | T head(){ 113 | return head_ptr->Next->value; 114 | } 115 | 116 | List tail() { 117 | List new_list; 118 | const int n = length(); 119 | if (n == 0){ 120 | new_list.head_ptr = Node::_new_head(0); 121 | return new_list; 122 | } 123 | new_list.head_ptr = Node::_new_head(n - 1, this->head_ptr->Next->Next); 124 | return new_list; 125 | } 126 | 127 | std::tuple> destruct() { 128 | return std::make_tuple(head(), tail()); 129 | }; 130 | 131 | #include "List.BaseMethods.hpp" 132 | 133 | 134 | 135 | 136 | template 137 | friend List list::create(); 138 | 139 | template 140 | friend List list::create(A value); 141 | 142 | template 143 | friend List list::create(A value, VARARGS... varargs); 144 | 145 | template 146 | friend List list::cons(A value, List list); 147 | 148 | template 149 | friend void del(List* list_ptr); 150 | 151 | template 152 | friend void del(List &list); 153 | 154 | template 155 | friend List> zip(List list1, List list2); 156 | }; 157 | 158 | 159 | template 160 | List> zip(List list1, List list2){ 161 | const int len = std::min(list1.length(), list2.length()); 162 | List> new_list; 163 | auto h = new_list.head_ptr = Node>::_new_head(len); 164 | 165 | Node *h1 = list1.head_ptr->Next; 166 | Node *h2 = list2.head_ptr->Next; 167 | for(int i=0; i < len; ++i){ 168 | h -> Next = new Node>(std::make_tuple(h1->value, h2->value)); 169 | h = h->Next; 170 | } 171 | h -> Next = nullptr; 172 | return new_list; 173 | } 174 | // define ways to construct list. 175 | #include "List.Constructor.hpp" 176 | 177 | using Str = List; 178 | // not sure whether to use haskell like string. 179 | // if so, string concat could be slow a lot. 180 | 181 | template 182 | void del(List* list_ptr){ 183 | del(list_ptr->head_ptr); 184 | delete list_ptr; 185 | } 186 | 187 | template 188 | void del(List &list){ 189 | del(list.head_ptr); 190 | } 191 | 192 | } 193 | #endif 194 | -------------------------------------------------------------------------------- /Ruiko/flowerq/Macro.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | #ifndef FLOWERQ_MACRO 3 | #include "Macro.hpp" 4 | #endif 5 | */ 6 | #ifndef FLOWERQ_MACRO 7 | #define FLOWERQ_MACRO 8 | #include 9 | #include 10 | #include 11 | #include 12 | #ifdef UNICODE 13 | 14 | #define rstr(src) L ## src 15 | using Char = wchar_t; 16 | using ofstream = std::wofstream; 17 | using ifstream = std::wifstream; 18 | using fstream = std::wfstream; 19 | static std::wostream& cout = std::wcout; 20 | static auto str_len = std::wcslen; 21 | typedef std::wstring StringBuff; 22 | template 23 | static StringBuff to_string(T t){ 24 | return std::to_wstring(t); 25 | } 26 | // #define __FLOWER_MACRO_GETBUFFLEN__ std::wcslen 27 | // #define __FLOWER_MACRO_TO_BUFF__ std::to_wstring 28 | #else 29 | #define rstr(src) src 30 | using Char = char; 31 | using ofstream = std::ofstream; 32 | using ifstream = std::ifstream; 33 | using fstream = std::fstream; 34 | static std::ostream& cout = std::cout; 35 | static auto str_len = std::strlen; 36 | typedef std::string StringBuff; 37 | template 38 | static StringBuff to_string(T t){ 39 | return std::to_string(t); 40 | } 41 | #endif 42 | #endif -------------------------------------------------------------------------------- /Ruiko/flowerq/Match.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | #ifndef FLOWERQ_MATCH 3 | #include "Match.hpp" 4 | #endif 5 | */ 6 | #ifndef FLOWERQ_MATCH 7 | #define FLOWERQ_MATCH 8 | #include 9 | #include 10 | 11 | namespace flowerq { 12 | namespace dependency { 13 | 14 | /// reference: 15 | /// url : https://stackoverflow.com/questions/10626856/how-to-split-a-tuple 16 | /// author: André Bergner 17 | 18 | 19 | 20 | template 21 | auto tail_impl(std::index_sequence, std::tuple t) { 22 | return std::make_tuple(std::get(t)...); 23 | } 24 | 25 | template 26 | auto tail(std::tuple t) { 27 | return tail_impl(std::make_index_sequence(), t); 28 | } 29 | 30 | template 31 | auto tail2_impl(std::index_sequence, std::tuple t) { 32 | return std::make_tuple(std::get(t)...); 33 | } 34 | 35 | template 36 | auto tail2(std::tuple t){ 37 | return tail_impl(std::make_index_sequence(), t); 38 | } 39 | } 40 | 41 | namespace pattern { 42 | 43 | template 44 | void match(std::tuple tp, T &t, G &g) { 45 | t = std::get<0>(tp); 46 | g = std::get<1>(tp); 47 | } 48 | 49 | template 50 | void match(std::tuple tp, T &t, VARARGS &... args) { 51 | t = std::get<0>(tp); 52 | match(dependency::tail(tp), args...); 53 | } 54 | 55 | template 56 | void match(std::tuple tp, T &t) { 57 | t = std::get<0>(tp); 58 | } 59 | } 60 | } 61 | #endif 62 | -------------------------------------------------------------------------------- /Ruiko/main.cpp: -------------------------------------------------------------------------------- 1 | // #define UNICODE 2 | #include 3 | #include "flowerq/List.hpp" 4 | #include "flowerq/Composite.hpp" 5 | #include "flowerq/IO.hpp" 6 | 7 | int main() { 8 | #ifdef UNICODE 9 | setlocale(LC_ALL,""); 10 | #endif 11 | using namespace flowerq; 12 | 13 | // IO::putstrln(list::create(1, 2, 3, 4, 5)); 14 | 15 | auto lst = list::create(1, 2, 3, 4, 5); 16 | 17 | // IO::putstrln(list::create(list::create(1, 2, 3, 5), list::create(2, 3, 4))); 18 | // IO::putstrln(std::make_tuple(lst)); 19 | IO::putstrln(lst.destruct()); 20 | 21 | auto new_lst = list::cons(2, lst); 22 | 23 | IO::putstrln(new_lst.map([=](int e){return e+1;}).filter([=](int e){return e%2==0;})); 24 | 25 | 26 | auto lst2 = list::create(); 27 | 28 | 29 | IO::putstrln(new_lst); 30 | 31 | IO::putstrln(new_lst.tail()); 32 | 33 | 34 | IO::putstrln(new_lst.reduce([=](int a, int b){return a+b;}, 0)); 35 | 36 | IO::putstrln(lst); 37 | IO::putstrln(lst.at(2)); 38 | List string_ = list::create(rstr('1'), rstr('2'), rstr('3'), rstr('4')); 39 | IO::putstrln(string_.length()); 40 | IO::putstrln("string here:"); 41 | IO::putstrln(string_); 42 | 43 | // 垃圾推导 44 | list::create(0, 1, 2).map([&](auto e){ return lst.at(e);}).forEach(IO::puts); 45 | 46 | 47 | // 陈独秀同学你先下来 48 | std::function f1 = [&](auto e){ return lst.at(e);}; 49 | std::function f2 = IO::puts; 50 | and_then(f1, f2)(2); 51 | 52 | auto writer = IO::open("test.txt"); 53 | writer.write(rstr("a ::= b [c [d [e f{2, 3}]]]")); 54 | writer.close(); 55 | 56 | auto reader = IO::open("test.txt"); 57 | auto s = reader.read(); 58 | IO::puts(s); 59 | 60 | 61 | auto xxx = lst; 62 | IO::putstrln(lst); 63 | lst = list::create(-1, -1, -1); 64 | IO::putstrln(xxx); 65 | IO::putstrln(lst); 66 | IO::putstrln(lst.zip(lst)); 67 | } -------------------------------------------------------------------------------- /Ruiko/test.txt: -------------------------------------------------------------------------------- 1 | a ::= b [c [d [e f{2, 3}]]] -------------------------------------------------------------------------------- /Ruiko/xml.ruiko: -------------------------------------------------------------------------------- 1 | use Token.Std.{ 2 | Name 3 | }; 4 | 5 | Tag ::= '<' Name '>' 6 | EndTag ::= '' 7 | Block ::= 8 | Block* 9 | # use context-sensitive syntax 10 | | Not (Tag | EndTag) # use negative matching 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/RuikoEBNF.rst: -------------------------------------------------------------------------------- 1 | Ruiko EBNF 2 | ===================== 3 | 4 | Grammar 5 | ----------- 6 | 7 | .. code :: 8 | 9 | ignore [token1, token2, ...] 10 | # optional, discard some tokenizers with specific names. 11 | # it only affects when you're using EBNFParser automatical tokenizing function. 12 | 13 | deftoken directory1.directory2...directoryn.filename 14 | # your custom token function. cannot be applied when you're using auto token. 15 | 16 | 17 | token1 := ...; 18 | token2 := ...; 19 | 20 | token3 cast := ...; 21 | # define a cast map 22 | 23 | token4 cast as K := ...; 24 | # def cast map and custom prefix 25 | 26 | token5 as K := ...; 27 | # only def custom prefix 28 | 29 | 30 | token6 := ...; 31 | 32 | token7 of token5 := ...; 33 | # add more patterns to token5 34 | 35 | parser1 ::= token3 token5+ | [token6] token4* (parser2 parser3){3, 10}; 36 | # define a combined parser 37 | /* 38 | `|` means `or`, 39 | `[]` means `optional`, 40 | `()` means `make a new pattern by several patterns`, 41 | `pattern+` means one or more, 42 | `pattern*` means zero or more, 43 | `pattern{a, b}` means matching this pattern more than `a` times and less than b; 44 | `pattern{a}` means matching this pattern more than `a` times. 45 | */ 46 | 47 | parser2 throw [parser3 ';'] = parser3 parser1 ';'; 48 | /* 49 | the result from `parser2` will not contains 50 | a term(`Tokenizer` or `Ast`) with name=parser3 or string=";" 51 | */ 52 | 53 | More accurately, see the `bootstrap grammar 54 | `_ here. 55 | 56 | 57 | Regex Prefix 58 | ------------------------- 59 | 60 | Regex prefix in ruiko EBNF would add a regex pattern to :code:`token_table`, 61 | which might be used for generating an automatical tokenizing function(unless you use your custom tokenizing function). 62 | 63 | When you want to use Regex prefix, just type :code:`R''`. 64 | 65 | - url.ruiko 66 | 67 | .. code :: 68 | 69 | url := R'https.*?\.(com|cn|org|net)'; 70 | other := R'.'; 71 | parserToTest throw [other] ::= (url | other)+; 72 | 73 | test it 74 | .. code :: 75 | 76 | ruiko url.ruiko url --test 77 | python test_lang.py parserToTest "https://github.comasdas https://123.net" 78 | =========================ebnfparser test script================================ 79 | parserToTest[ 80 | [name: url, string: "https://github.com"] 81 | [name: url, string: "https://123.net"] 82 | ] 83 | 84 | You should take care that there is only regex matching in tokenizing process, 85 | and when literal parsers and combined parsers are parsing tokenizers, they are matching 86 | whether the name is what they expect(in fact, what parsers are comparing by is not the **name**, it's the **memory address**, 87 | so EBNFParser is very quick in this process). 88 | 89 | 90 | Cast Map 91 | -------------------------- 92 | 93 | 94 | .. code :: 95 | 96 | SomeToken cast as S := 'abc'; 97 | Alpha := R'[a-z]+'; 98 | F ::= S'abc' | Alpha; 99 | 100 | 101 | The ruiko codes above defines a tokenize named :code:`SomeToken` with a prefix :code:`S`. 102 | 103 | 104 | When the input source is splitted into a sequence of tokenizers , however, even the literal parser 105 | :code:`Alpha` is supposed to match all string matched by regex pattern :code:`"[a-z]+"`, it cannot match a tokenizer 106 | with attribute :code:`string="abc"` generated by EBNFParser automatical tokenizing, 107 | that's because all the :code:`"all"` has been casted into a unique string in a buffer pool, 108 | and **all of them have the same name** :code:`SomeToken`, **not** :code:`Alpha`. 109 | 110 | Here is a string with value :code:`"abc"` located at an unique memory address, 111 | and every literal parser defined by :code:`"abc"` just matched it only. 112 | 113 | Just as what I told you at Section :code:`Regex Prefix` , 114 | The literal parser defined as :code:`Alpha := R'[a-z]+'` just matches the tokenizer whose name is :code:`Alpha`. 115 | 116 | 117 | Custom Prefix 118 | -------------------------- 119 | 120 | If you're using custom tokenizing, several :code:`Ruikowa.ObjectRegex.Tokenizer` objects 121 | with the same attribute :code:`string="abc"` (and have the same memory address) 122 | could have different names. 123 | 124 | To distinguish from each other, you can do as the following: 125 | 126 | - Grammar 127 | 128 | .. code :: 129 | 130 | SomeToken as S := 'abc'; 131 | Alpha := R'[a-z]+'; 132 | F ::= S'abc' | Alpha; 133 | G ::= 'abc'; 134 | H ::= G | F ; 135 | 136 | .. code :: 137 | 138 | [name: SomeToken, string: "abc"] 139 | ... 140 | 141 | 142 | If you are using combined parser :code:`G` to match above tokenizers, you'll fail, 143 | because in the grammar :code:`G` is defined as :code:`G::='abc'` , it means :code:`G` only accepts 144 | the a tokenizer who has an attribute :code:`name="auto_const"` and another attribute :code:`string="abc"` 145 | (and it's from the unique buff pool, not a string created by regex matching). 146 | -------------------------------------------------------------------------------- /docs/codes/just.py: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by EBNFParser. 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef 4 | namespace = globals() 5 | recur_searcher = set() 6 | token_table = ((unique_literal_cache_pool["auto_const"], str_matcher(('just'))),) 7 | 8 | class UNameEnum: 9 | # names 10 | 11 | Just = unique_literal_cache_pool['Just'] 12 | 13 | cast_map = {} 14 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({}, {}),cast_map=cast_map) 15 | 16 | Just = AstParser([SeqParser(['just'], at_least=1,at_most=Undef)], 17 | name="Just", 18 | to_ignore=({}, {})) 19 | Just.compile(namespace, recur_searcher) -------------------------------------------------------------------------------- /docs/codes/just.ruiko: -------------------------------------------------------------------------------- 1 | Just ::= 'just'+; -------------------------------------------------------------------------------- /docs/codes/lisp.ruiko: -------------------------------------------------------------------------------- 1 | 2 | ignore [space] 3 | 4 | space := R'\s'; 5 | 6 | Atom := R'[^\(\)\s\`]+'; # use Regex 7 | 8 | Expr ::= Atom 9 | | Quote 10 | | '(' Expr* ')'; 11 | 12 | 13 | Quote ::= '`' Expr ; 14 | 15 | Stmts ::= Expr*; -------------------------------------------------------------------------------- /docs/codes/lisp_parser.py: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by EBNFParser. 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef 4 | namespace = globals() 5 | recur_searcher = set() 6 | token_table = ((unique_literal_cache_pool["space"], regex_matcher('\s')), 7 | (unique_literal_cache_pool["Atom"], regex_matcher('[^\(\)\s\`]+')), 8 | (unique_literal_cache_pool["auto_const"], char_matcher(('`', ')', '('))),) 9 | 10 | class UNameEnum: 11 | # names 12 | 13 | space = unique_literal_cache_pool['space'] 14 | Atom = unique_literal_cache_pool['Atom'] 15 | Expr = unique_literal_cache_pool['Expr'] 16 | Quote = unique_literal_cache_pool['Quote'] 17 | Stmts = unique_literal_cache_pool['Stmts'] 18 | 19 | cast_map = {} 20 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({"space"}, {}),cast_map=cast_map) 21 | space = LiteralNameParser('space') 22 | Atom = LiteralNameParser('Atom') 23 | Expr = AstParser([Ref('Atom')], 24 | [Ref('Quote')], 25 | ['(', SeqParser([Ref('Expr')], at_least=0,at_most=Undef), ')'], 26 | name="Expr", 27 | to_ignore=({}, {})) 28 | Quote = AstParser(['`', Ref('Expr')], 29 | name="Quote", 30 | to_ignore=({}, {})) 31 | Stmts = AstParser([SeqParser([Ref('Expr')], at_least=0,at_most=Undef)], 32 | name="Stmts", 33 | to_ignore=({}, {})) 34 | Stmts.compile(namespace, recur_searcher) -------------------------------------------------------------------------------- /docs/codes/parsing_CastMap.py: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by EBNFParser. 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef 4 | namespace = globals() 5 | recur_searcher = set() 6 | token_table = ((unique_literal_cache_pool["space"], regex_matcher('\s+')), 7 | (unique_literal_cache_pool["identifier"], regex_matcher('[a-zA-Z_]{1}[a-zA-Z_0-9]*')), 8 | (unique_literal_cache_pool["keyword"], str_matcher(('public', 'for', 'def'))),) 9 | 10 | class UNameEnum: 11 | # names 12 | 13 | space = unique_literal_cache_pool['space'] 14 | identifier = unique_literal_cache_pool['identifier'] 15 | keyword_def = unique_literal_cache_pool['def'] 16 | keyword_for = unique_literal_cache_pool['for'] 17 | keyword_public = unique_literal_cache_pool['public'] 18 | keyword = unique_literal_cache_pool['keyword'] 19 | parserToTest = unique_literal_cache_pool['parserToTest'] 20 | 21 | cast_map = {'def': unique_literal_cache_pool['keyword'], 'for': unique_literal_cache_pool['keyword'], 'public': unique_literal_cache_pool['keyword']} 22 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({"space"}, {}),cast_map=cast_map) 23 | space = LiteralNameParser('space') 24 | identifier = LiteralNameParser('identifier') 25 | keyword = LiteralNameParser('keyword') 26 | parserToTest = AstParser([SeqParser([Ref('identifier')], [Ref('keyword')], at_least=1,at_most=Undef)], 27 | name="parserToTest", 28 | to_ignore=({}, {})) 29 | parserToTest.compile(namespace, recur_searcher) -------------------------------------------------------------------------------- /docs/codes/parsing_CastMap.ruiko: -------------------------------------------------------------------------------- 1 | 2 | 3 | ignore [space] 4 | space := R'\s+'; 5 | # ignore the whitespace characters. 6 | 7 | 8 | identifier := R'[a-zA-Z_]{1}[a-zA-Z_0-9]*'; 9 | keyword cast := 'def' 'for' 'public'; 10 | 11 | 12 | parserToTest ::= (identifier | keyword)+; -------------------------------------------------------------------------------- /docs/codes/parsing_tokenizer.py: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by EBNFParser. 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef 4 | namespace = globals() 5 | recur_searcher = set() 6 | token_table = ((unique_literal_cache_pool["MyTokenType"], str_matcher(('abc', '233'))),) 7 | 8 | class UNameEnum: 9 | # names 10 | 11 | MyTokenType_abc = unique_literal_cache_pool['abc'] 12 | MyTokenType = unique_literal_cache_pool['MyTokenType'] 13 | parserToTest = unique_literal_cache_pool['parserToTest'] 14 | 15 | cast_map = {} 16 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({}, {}),cast_map=cast_map) 17 | MyTokenType = LiteralNameParser('MyTokenType') 18 | parserToTest = AstParser([SeqParser([Ref('MyTokenType')], at_least=1,at_most=Undef)], 19 | name="parserToTest", 20 | to_ignore=({}, {})) 21 | parserToTest.compile(namespace, recur_searcher) -------------------------------------------------------------------------------- /docs/codes/parsing_tokenizer.ruiko: -------------------------------------------------------------------------------- 1 | MyTokenType := 'abc' '233'; 2 | parserToTest ::= MyTokenType+; -------------------------------------------------------------------------------- /docs/codes/proj.py: -------------------------------------------------------------------------------- 1 | from Ruikowa.ObjectRegex.ASTDef import Ast 2 | from Ruikowa.ErrorHandler import ErrorHandler 3 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo 4 | from Ruikowa.ObjectRegex.Tokenizer import Tokenizer 5 | 6 | from lisp_parser import Stmts, token_table 7 | 8 | import typing as t 9 | 10 | def token_func(src_code: str) -> t.Iterable[Tokenizer]: 11 | return Tokenizer.from_raw_strings(src_code, token_table, ({"space"}, {})) 12 | 13 | parser = ErrorHandler(Stmts.match, token_func) 14 | 15 | def parse(filename: str) -> Ast: 16 | 17 | return parser.from_file(filename) 18 | 19 | 20 | print(parse("test.lisp")) -------------------------------------------------------------------------------- /docs/codes/test.lisp: -------------------------------------------------------------------------------- 1 | (define f (x y z) 2 | (+ x 3 | (+ y z))) -------------------------------------------------------------------------------- /docs/codes/test_lang.py: -------------------------------------------------------------------------------- 1 | 2 | # This file is automatically generated by EBNFParser. 3 | import argparse, json 4 | 5 | cmd_parser = argparse.ArgumentParser(description='test language parsers swiftly.') 6 | cmd_parser.add_argument("parser", type=str, 7 | help='What kind of parser do you want to test with?(e.g Stmt, Expr, ...)') 8 | cmd_parser.add_argument("codes", metavar='codes', type=str, 9 | help='input some codes in your own language here.') 10 | cmd_parser.add_argument('-o', help='output. support .json and .ast suffix.', type=str) 11 | cmd_parser.add_argument("--testTk", nargs='?', default=False, const=True) 12 | cmd_parser.add_argument('--debug', nargs='?', default=False, const=True, 13 | help='print tokens of grammar file?') 14 | 15 | args = cmd_parser.parse_args() 16 | 17 | if args.debug: 18 | from Ruikowa.Config import Debug 19 | Debug.append(1) 20 | 21 | from Ruikowa.ErrorHandler import ErrorHandler, Colored 22 | from Ruikowa.ObjectRegex.ASTDef import Ast 23 | from Ruikowa.io import grace_open 24 | from just import * 25 | print(Colored.Green,'=========================ebnfparser test script================================', Colored.Clear) 26 | 27 | print_token = args.testTk 28 | ast: Ast = ErrorHandler(eval(args.parser).match, token_func).from_source_code('', args.codes, print_token=print_token) 29 | print(Colored.Blue, ast, Colored.Clear) 30 | if args.o: 31 | o: str = args.o.lower() 32 | if o.endswith('.json'): 33 | grace_open(o).write(json.dumps(ast.dump_to_json(), indent=2)) 34 | elif o.endswith('.ast'): 35 | grace_open(o).write(ast.dump()) 36 | else: 37 | raise Exception('Unsupported file ext.') 38 | 39 | -------------------------------------------------------------------------------- /docs/codes/url.py: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by EBNFParser. 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef 4 | namespace = globals() 5 | recur_searcher = set() 6 | token_table = ((unique_literal_cache_pool["url"], regex_matcher('https.*?\.(com|cn|org|net)')), 7 | (unique_literal_cache_pool["other"], regex_matcher('.')),) 8 | 9 | class UNameEnum: 10 | # names 11 | 12 | url = unique_literal_cache_pool['url'] 13 | other = unique_literal_cache_pool['other'] 14 | parserToTest = unique_literal_cache_pool['parserToTest'] 15 | 16 | cast_map = {} 17 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({}, {}),cast_map=cast_map) 18 | url = LiteralNameParser('url') 19 | other = LiteralNameParser('other') 20 | parserToTest = AstParser([SeqParser([Ref('url')], [Ref('other')], at_least=1,at_most=Undef)], 21 | name="parserToTest", 22 | to_ignore=({"other"}, {})) 23 | parserToTest.compile(namespace, recur_searcher) -------------------------------------------------------------------------------- /docs/codes/url.ruiko: -------------------------------------------------------------------------------- 1 | url := R'https.*?\.(com|cn|org|net)'; 2 | other := R'.'; 3 | parserToTest throw [other] ::= (url | other)+; -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # EBNFParser documentation build configuration file, created by 5 | # sphinx-quickstart on Wed Apr 4 17:20:13 2018. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | # import os 21 | # import sys 22 | # sys.path.insert(0, os.path.abspath('.')) 23 | 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | # 29 | # needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = ['sphinx.ext.autodoc', 35 | 'sphinx.ext.doctest', 36 | 'sphinx.ext.todo', 37 | 'sphinx.ext.mathjax', 38 | 'sphinx.ext.viewcode', 39 | 'sphinx.ext.githubpages'] 40 | 41 | # Add any paths that contain templates here, relative to this directory. 42 | templates_path = ['_templates'] 43 | 44 | # The suffix(es) of source filenames. 45 | # You can specify multiple suffix as a list of string: 46 | # 47 | # source_suffix = ['.rst', '.md'] 48 | source_suffix = '.rst' 49 | 50 | # The master toctree document. 51 | master_doc = 'index' 52 | 53 | # General information about the project. 54 | project = 'EBNFParser' 55 | copyright = '2018, thautwarm' 56 | author = 'thautwarm' 57 | 58 | # The version info for the project you're documenting, acts as replacement for 59 | # |version| and |release|, also used in various other places throughout the 60 | # built documents. 61 | # 62 | # The short X.Y version. 63 | version = '2.0' 64 | # The full version, including alpha/beta/rc tags. 65 | release = '2.0' 66 | 67 | # The language for content autogenerated by Sphinx. Refer to documentation 68 | # for a list of supported languages. 69 | # 70 | # This is also used if you do content translation via gettext catalogs. 71 | # Usually you set "language" from the command line for these cases. 72 | language = None 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | # This patterns also effect to html_static_path and html_extra_path 77 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 78 | 79 | # The name of the Pygments (syntax highlighting) style to use. 80 | pygments_style = 'sphinx' 81 | 82 | # If true, `todo` and `todoList` produce output, else they produce nothing. 83 | todo_include_todos = True 84 | 85 | 86 | # -- Options for HTML output ---------------------------------------------- 87 | 88 | # The theme to use for HTML and HTML Help pages. See the documentation for 89 | # a list of builtin themes. 90 | # 91 | html_theme = 'alabaster' 92 | 93 | # Theme options are theme-specific and customize the look and feel of a theme 94 | # further. For a list of options available for each theme, see the 95 | # documentation. 96 | # 97 | # html_theme_options = {} 98 | 99 | # Add any paths that contain custom static files (such as style sheets) here, 100 | # relative to this directory. They are copied after the builtin static files, 101 | # so a file named "default.css" will overwrite the builtin "default.css". 102 | html_static_path = ['_static'] 103 | 104 | # Custom sidebar templates, must be a dictionary that maps document names 105 | # to template names. 106 | # 107 | # This is required for the alabaster theme 108 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 109 | html_sidebars = { 110 | '**': [ 111 | 'relations.html', # needs 'show_related': True theme option to display 112 | 'searchbox.html', 113 | ] 114 | } 115 | 116 | 117 | # -- Options for HTMLHelp output ------------------------------------------ 118 | 119 | # Output file base name for HTML help builder. 120 | htmlhelp_basename = 'EBNFParserdoc' 121 | 122 | 123 | # -- Options for LaTeX output --------------------------------------------- 124 | 125 | latex_elements = { 126 | # The paper size ('letterpaper' or 'a4paper'). 127 | # 128 | # 'papersize': 'letterpaper', 129 | 130 | # The font size ('10pt', '11pt' or '12pt'). 131 | # 132 | # 'pointsize': '10pt', 133 | 134 | # Additional stuff for the LaTeX preamble. 135 | # 136 | # 'preamble': '', 137 | 138 | # Latex figure (float) alignment 139 | # 140 | # 'figure_align': 'htbp', 141 | } 142 | 143 | # Grouping the document tree into LaTeX files. List of tuples 144 | # (source start file, target name, title, 145 | # author, documentclass [howto, manual, or own class]). 146 | latex_documents = [ 147 | (master_doc, 'EBNFParser.tex', 'EBNFParser Documentation', 148 | 'thautwarm', 'manual'), 149 | ] 150 | 151 | 152 | # -- Options for manual page output --------------------------------------- 153 | 154 | # One entry per manual page. List of tuples 155 | # (source start file, name, description, authors, manual section). 156 | man_pages = [ 157 | (master_doc, 'ebnfparser', 'EBNFParser Documentation', 158 | [author], 1) 159 | ] 160 | 161 | 162 | # -- Options for Texinfo output ------------------------------------------- 163 | 164 | # Grouping the document tree into Texinfo files. List of tuples 165 | # (source start file, target name, title, author, 166 | # dir menu entry, description, category) 167 | texinfo_documents = [ 168 | (master_doc, 'EBNFParser', 'EBNFParser Documentation', 169 | author, 'EBNFParser', 'One line description of project.', 170 | 'Miscellaneous'), 171 | ] 172 | 173 | 174 | source_suffix = ['.rst', '.md', '.MD'] 175 | html_theme = 'sphinx_rtd_theme' 176 | 177 | from recommonmark.parser import CommonMarkParser 178 | source_parsers = { 179 | '.md': CommonMarkParser, 180 | '.MD': CommonMarkParser, 181 | } -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. EBNFParser documentation master file, created by 2 | sphinx-quickstart on Wed Apr 4 17:20:13 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to EBNFParser's documentation! 7 | ====================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Notes 12 | 13 | quickstart 14 | parsing 15 | RuikoEBNF 16 | 17 | 18 | -------------------------------------------------------------------------------- /docs/parsing.rst: -------------------------------------------------------------------------------- 1 | Parsing in EBNFParser 2 | ======================= 3 | 4 | 5 | EBNFParser is a parser generator framework to parse raw string into structured nested list(AST). 6 | 7 | Pasring of EBNFParser has following steps: 8 | 9 | Tokenizing 10 | --------------- 11 | 12 | Tokenizing is the very first step to split input string into a sequence of :code:`Ruikowa.ObjectRegex.Tokenizer` objects. 13 | 14 | A :code:`Ruikowa.ObjectRegex.Tokenizer` has the following **readonly** attributes: 15 | 16 | - name : str 17 | type of the tokenizer. 18 | - string : str 19 | string content(from input raw string) of the tokenizer. 20 | - colno : int 21 | column number in current file. 22 | - lineno : int 23 | row number in current file. 24 | 25 | Example: 26 | 27 | - parsing_tokenizing.ruiko 28 | 29 | .. code :: shell 30 | 31 | MyTokenType := 'abc' '233'; 32 | # The above syntax defines a literal parser to parse strings like "abc" or "233". 33 | # "abc", "233" will be added into `token_table` to generate automatical tokenizing function. 34 | 35 | parserToTest ::= MyTokenType+; 36 | # The above syntax defines a combined parser with `MyTokenType`. 37 | 38 | # A combined parser is combined by several literal parsers and other combined parsers, 39 | # which can handle very complicated cases of a sequence of `Ruikowa.ObjectRegex.Tokenizer` objects. 40 | 41 | - compile it 42 | 43 | .. code :: shell 44 | 45 | ruiko parsing_tokenizing.ruiko parsing_tokenizing.py --test 46 | 47 | - test it 48 | 49 | .. code :: shell 50 | 51 | python test_lang.py parserToTest "abc233233" 52 | =========================ebnfparser test script================================ 53 | parserToTest[ 54 | [name: MyTokenType, string: "abc"] 55 | [name: MyTokenType, string: "233"] 56 | [name: MyTokenType, string: "233"] 57 | ] 58 | 59 | Take care that if you're using anonymous literal pattern when definining a combined parser, 60 | like the following: 61 | 62 | .. code :: 63 | 64 | Just ::= 'just'+; 65 | 66 | Then name of all the anonymous tokenizers is just :code:`"auto_const"` : 67 | 68 | .. code :: 69 | 70 | ruiko just.ruiko just --test 71 | python test_lang.py Just "justjustjust" 72 | =========================ebnfparser test script================================ 73 | Just[ 74 | [name: auto_const, string: "just"] 75 | [name: auto_const, string: "just"] 76 | [name: auto_const, string: "just"] 77 | ] 78 | 79 | 80 | CastMap(Optional) 81 | ------------------------ 82 | 83 | Sometimes we need special cases, a vivid instance is :code:`keyword` . 84 | 85 | The string content of a :code:`keyword` could be also matched 86 | by :code:`identifier` (in most programming languages we have identifiers), 87 | just as the following case: 88 | 89 | - parsing_CastMap.ruiko 90 | 91 | .. code :: 92 | 93 | 94 | ignore [space] 95 | space := R'\s+'; 96 | # ignore the whitespace characters. 97 | 98 | 99 | identifier := R'[a-zA-Z_]{1}[a-zA-Z_0-9]*'; 100 | keyword := 'def' 'for' 'public'; 101 | 102 | parserToTest ::= (identifier | keyword)+; 103 | 104 | There is no doubt that :code:`identifier` will cover the cases of :code:`keyword` 105 | 106 | .. code :: shell 107 | 108 | ruiko parsing_CastMap.ruiko parsing_CastMap.py --test 109 | python test.py parserToTest "def for public" 110 | =========================ebnfparser test script================================ 111 | parserToTest[ 112 | [name: identifier, string: "def"] 113 | [name: identifier, string: "for"] 114 | [name: identifier, string: "public"] 115 | ] 116 | 117 | 118 | Take care that all of the Tokenizers have name **identifier**, not **keyword** ! 119 | As as result, the keyword could be used in some illegal places, just like: 120 | 121 | .. code :: 122 | 123 | for = 1 124 | for for <- [for] do 125 | for 126 | 127 | The above example might not trouble you, but of course there could be something severer. 128 | 129 | I'd like to give a solution adopted by EBNFParser auto-token. 130 | 131 | (modify parsing_CastMap.ruiko 132 | 133 | .. code :: 134 | 135 | identifier := R'[a-zA-Z_]{1}[a-zA-Z_0-9]*'; 136 | keyword cast := 'def' 'for' 'public'; 137 | 138 | Here we define a :code:`cast map` that will map the string tokenized by :code:`identifier`(like 139 | :code:`"def"`, :code:`"for"` and :code:`"public"`) to a **const string**, and 140 | output a :code:`Ruikowa.ObjectRegex.Tokenizer` which name is a **const string** :code:`"keyword"`. 141 | 142 | .. code :: shell 143 | 144 | ruiko parsing_CastMap.ruiko parsing_CastMap.py --test 145 | python test.py parserToTest "def for public other" 146 | =========================ebnfparser test script================================ 147 | parserToTest[ 148 | [name: keyword, string: "def"] 149 | [name: keyword, string: "for"] 150 | [name: keyword, string: "public"] 151 | [name: identifier, string: "other"] 152 | ] 153 | 154 | 155 | Perfect! 156 | 157 | 158 | ReStructure Tokenizers 159 | ----------------------------- 160 | 161 | This is what the word "parsing" accurately means. 162 | 163 | Maybe you've heard about some sequence operation like 164 | :code:`flatMap` (Scala-flatMap_) , :code:`collect` (FSharp-collect_) , :code:`selectMany` (Linq-SelectMany_), 165 | that's great, because parsing is its inverse! 166 | 167 | .. code :: 168 | 169 | raw words : 170 | 171 | ["def", "f", "(", "x", ")", "=", "x"] 172 | 173 | after parsing there is an AST: 174 | 175 | FunctionDef[ 176 | "f" 177 | # "def" is thrown away because it's useless to semantics, but you can 178 | # preserve it, causing noises. The same below. 179 | ArgList[ 180 | "x" 181 | ], 182 | 183 | Expression[ 184 | "x" 185 | ] 186 | ] 187 | 188 | And structures of the parsed just match what you defined with EBNF_. 189 | 190 | Here is an example to generate above AST by using a EBNF idiom - :code:`ruiko` 191 | which is proposed by EBNFParser to extend primary EBNF. 192 | 193 | .. code :: ebnf 194 | 195 | keyword cast as K := 'def'; 196 | identifier := R'[a-zA-Z_]{1}[a-zA-Z_0-9]*'; 197 | FunctionDef throw ['def'] ::= K'def' identifier '(' ArgList ')' '=' Expression; 198 | Expression ::= ... # omit 199 | ArgList ::= ... # omit 200 | 201 | 202 | What's more, EBNFParser supports unlimited **left recursions**. 203 | 204 | .. _Scala-flatMap: https://www.scala-lang.org/api/current/?search=flatMap 205 | 206 | .. _FSharp-collect: https://msdn.microsoft.com/en-us/visualfsharpdocs/conceptual/list.collect['t,'u]-function-[fsharp] 207 | 208 | .. _Linq-SelectMany: https://msdn.microsoft.com/en-us/library/bb534336(v=vs.110).aspx 209 | 210 | .. _EBNF: https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form 211 | 212 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quick Start 2 | ================ 3 | 4 | 5 | 6 | Installing 7 | -------------------------------- 8 | 9 | The EBNFParser only supports Python 3.6+ now. 10 | 11 | You can install it by using **PyPI**. 12 | 13 | .. code :: shell 14 | 15 | pip install -U EBNFParser 16 | 17 | 18 | 19 | Hello World 20 | -------------------------------- 21 | 22 | We can try to parse Lisp grammar syntax into AST(Abstract Synatx Tree) as our first attempt. 23 | 24 | .. code :: lisp 25 | 26 | (define add3 (x y z) 27 | (add x 28 | (add y z))) 29 | 30 | 31 | Here is a source code example: 32 | 33 | - lisp.ruiko 34 | 35 | .. code :: 36 | 37 | ignore [space] # ignore the tokens with this(these) name(s). 38 | 39 | space := R'\s'; 40 | 41 | Atom := R'[^\(\)\s\`]+'; # use Regex 42 | 43 | Expr ::= Atom 44 | | Quote 45 | | '(' Expr* ')'; 46 | 47 | 48 | Quote ::= '`' Expr ; 49 | 50 | Stmts ::= Expr*; 51 | 52 | 53 | 54 | And then use it to generate a parser and make a test script automatically by EBNFParser. 55 | 56 | Finally, test it. 57 | 58 | .. code :: 59 | 60 | ruiko lisp.ruiko lisp_parser.py --test 61 | python test_lang.py Stmts "(definie f (x y z) (add (add x y) z))" 62 | =========================ebnfparser test script================================ 63 | Stmts[ 64 | Expr[ 65 | [name: auto_const, string: "("] 66 | Expr[ 67 | [name: Atom, string: "definie"] 68 | ...(omit) 69 | 70 | 71 | 72 | Integrate EBNFParser Into Your Own Project 73 | --------------------------------------------- 74 | 75 | For example, if we have generated the lisp parser file like the above as a module :code:`MyProject.Lisp.parser` . 76 | 77 | .. code :: python 78 | 79 | from Ruikowa.ObjectRegex.ASTDef import Ast 80 | from Ruikowa.ErrorHandler import ErrorHandler 81 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo 82 | from Ruikowa.ObjectRegex.Tokenizer import Tokenizer 83 | 84 | from lisp_parser import Stmts, token_table 85 | 86 | import typing as t 87 | 88 | def token_func(src_code: str) -> t.Iterable[Tokenizer]: 89 | return Tokenizer.from_raw_strings( 90 | src_code, token_table, ({"space"}, {})) 91 | 92 | parser = ErrorHandler(Stmts.match, token_func) 93 | 94 | def parse(filename: str) -> Ast: 95 | 96 | return parser.from_file(filename) 97 | 98 | # just create a file `test.lisp` and write some lisp codes. 99 | print(parse("./test.lisp")) 100 | 101 | 102 | 103 | An :code:`Ruikowa.ObjectRegex.Ast` is a nested list of Tokenizers, for instance: 104 | 105 | .. code :: 106 | 107 | AstName[ 108 | AstName[ 109 | Tokenizer1 110 | Tokenizer2 111 | AstName[ 112 | ... 113 | ] 114 | ] 115 | Tokenizer3 116 | ] 117 | 118 | You can use :code:`obj.name` to get the name of an instance of :code:`Ast` or :code:`Tokenizer`. 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | # install 2 | bash INSTALL.sh 3 | 4 | # ruiko 5 | bash testRuikowa.sh 6 | -------------------------------------------------------------------------------- /testRuikowa.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if test -n $1 3 | then 4 | bash INSTALL.sh 5 | 6 | fi 7 | cd tests/Ruikowa/Lang 8 | 9 | bash Lisp/testLisp.sh 10 | -------------------------------------------------------------------------------- /tests/Ruikowa/Lang/Lisp/grammar: -------------------------------------------------------------------------------- 1 | ignore [N] 2 | 3 | someConst cast as K := 'as' 'we' 'can'; 4 | 5 | N := R'\n', R'\t', ' '; 6 | 7 | Atom := R'[^\(\)\s\`]+'; # use Regex 8 | 9 | Expr ::= Atom 10 | | Quote 11 | | '(' Expr* ')' (K'as' K'we' K'can'); 12 | 13 | 14 | Quote ::= '`' Expr ; 15 | Stmt ::= Expr*; 16 | 17 | 18 | -------------------------------------------------------------------------------- /tests/Ruikowa/Lang/Lisp/pparser.py: -------------------------------------------------------------------------------- 1 | # This file is automatically generated by EBNFParser. 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef 4 | namespace = globals() 5 | recur_searcher = set() 6 | token_table = ((unique_literal_cache_pool["someConst"], str_matcher(('we', 'can', 'as'))), 7 | (unique_literal_cache_pool["N"], regex_matcher('\n')), 8 | (unique_literal_cache_pool["N"], regex_matcher('\t')), 9 | (unique_literal_cache_pool["N"], char_matcher((' '))), 10 | (unique_literal_cache_pool["Atom"], regex_matcher('[^\(\)\s\`]+')), 11 | (unique_literal_cache_pool["auto_const"], char_matcher(('`', ')', '('))),) 12 | 13 | class UNameEnum: 14 | # names 15 | 16 | someConst_as = unique_literal_cache_pool['as'] 17 | someConst_we = unique_literal_cache_pool['we'] 18 | someConst_can = unique_literal_cache_pool['can'] 19 | someConst = unique_literal_cache_pool['someConst'] 20 | N = unique_literal_cache_pool['N'] 21 | Atom = unique_literal_cache_pool['Atom'] 22 | Expr = unique_literal_cache_pool['Expr'] 23 | Quote = unique_literal_cache_pool['Quote'] 24 | Stmt = unique_literal_cache_pool['Stmt'] 25 | 26 | cast_map = {'as': unique_literal_cache_pool['someConst'], 'we': unique_literal_cache_pool['someConst'], 'can': unique_literal_cache_pool['someConst']} 27 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({"N"}, {}),cast_map=cast_map) 28 | someConst = LiteralNameParser('someConst') 29 | N = LiteralNameParser('N') 30 | Atom = LiteralNameParser('Atom') 31 | Expr = AstParser([Ref('Atom')], 32 | [Ref('Quote')], 33 | ['(', SeqParser([Ref('Expr')], at_least=0,at_most=Undef), ')', SeqParser([('someConst', 'as'), ('someConst', 'we'), ('someConst', 'can')], at_least=1,at_most=1)], 34 | name="Expr", 35 | to_ignore=({}, {})) 36 | Quote = AstParser(['`', Ref('Expr')], 37 | name="Quote", 38 | to_ignore=({}, {})) 39 | Stmt = AstParser([SeqParser([Ref('Expr')], at_least=0,at_most=Undef)], 40 | name="Stmt", 41 | to_ignore=({}, {})) 42 | Stmt.compile(namespace, recur_searcher) -------------------------------------------------------------------------------- /tests/Ruikowa/Lang/Lisp/test.ast: -------------------------------------------------------------------------------- 1 | Stmt[ 2 | Expr[ 3 | [name: :char, string: "("] 4 | Expr[ 5 | [name: Atom, string: "+"] 6 | ] 7 | Expr[ 8 | [name: Atom, string: "1"] 9 | ] 10 | Expr[ 11 | [name: Atom, string: "2"] 12 | ] 13 | [name: :char, string: ")"] 14 | ] 15 | ] -------------------------------------------------------------------------------- /tests/Ruikowa/Lang/Lisp/test.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Stmt", 3 | "value": [ 4 | { 5 | "name": "Expr", 6 | "value": [ 7 | { 8 | "name": "auto_const", 9 | "string": "(", 10 | "lineno": 1, 11 | "colno": 5 12 | }, 13 | { 14 | "name": "Expr", 15 | "value": [ 16 | { 17 | "name": "Atom", 18 | "string": "+", 19 | "lineno": 1, 20 | "colno": 6 21 | } 22 | ] 23 | }, 24 | { 25 | "name": "Expr", 26 | "value": [ 27 | { 28 | "name": "Atom", 29 | "string": "1", 30 | "lineno": 1, 31 | "colno": 8 32 | } 33 | ] 34 | }, 35 | { 36 | "name": "Expr", 37 | "value": [ 38 | { 39 | "name": "Atom", 40 | "string": "2", 41 | "lineno": 1, 42 | "colno": 10 43 | } 44 | ] 45 | }, 46 | { 47 | "name": "auto_const", 48 | "string": ")", 49 | "lineno": 1, 50 | "colno": 11 51 | }, 52 | { 53 | "name": "someConst", 54 | "string": "as", 55 | "lineno": 1, 56 | "colno": 14 57 | }, 58 | { 59 | "name": "someConst", 60 | "string": "we", 61 | "lineno": 1, 62 | "colno": 17 63 | }, 64 | { 65 | "name": "someConst", 66 | "string": "can", 67 | "lineno": 1, 68 | "colno": 21 69 | } 70 | ] 71 | } 72 | ] 73 | } -------------------------------------------------------------------------------- /tests/Ruikowa/Lang/Lisp/testLisp.sh: -------------------------------------------------------------------------------- 1 | cd Lisp 2 | 3 | ruiko grammar pparser.py 4 | 5 | python test_lang.py Stmt " 6 | (+ 1 2) as we can 7 | " -o test.json --testTk 8 | -------------------------------------------------------------------------------- /tests/Ruikowa/Lang/Lisp/test_lang.py: -------------------------------------------------------------------------------- 1 | 2 | # This file is automatically generated by EBNFParser. 3 | import argparse, json 4 | 5 | cmd_parser = argparse.ArgumentParser(description='test language parsers swiftly.') 6 | cmd_parser.add_argument("parser", type=str, 7 | help='What kind of parser do you want to test with?(e.g Stmt, Expr, ...)') 8 | cmd_parser.add_argument("codes", metavar='codes', type=str, 9 | help='input some codes in your own language here.') 10 | cmd_parser.add_argument('-o', help='output. support .json and .ast suffix.', type=str) 11 | cmd_parser.add_argument("--testTk", nargs='?', default=False, const=True) 12 | cmd_parser.add_argument('--debug', nargs='?', default=False, const=True, 13 | help='print tokens of grammar file?') 14 | 15 | args = cmd_parser.parse_args() 16 | 17 | if args.debug: 18 | from Ruikowa.Config import Debug 19 | Debug.append(1) 20 | 21 | from Ruikowa.ErrorHandler import ErrorHandler, Colored 22 | from Ruikowa.ObjectRegex.ASTDef import Ast 23 | from Ruikowa.io import grace_open 24 | from pparser import * 25 | print(Colored.Green,'=========================ebnfparser test script================================', Colored.Clear) 26 | 27 | print_token = args.testTk 28 | ast: Ast = ErrorHandler(eval(args.parser).match, token_func).from_source_code('', args.codes, print_token=print_token) 29 | print(Colored.Blue, ast, Colored.Clear) 30 | if args.o: 31 | o: str = args.o.lower() 32 | if o.endswith('.json'): 33 | grace_open(o).write(json.dumps(ast.dump_to_json(), indent=2)) 34 | elif o.endswith('.ast'): 35 | grace_open(o).write(ast.dump()) 36 | else: 37 | raise Exception('Unsupported file ext.') 38 | 39 | -------------------------------------------------------------------------------- /tests/Ruikowa/test.py: -------------------------------------------------------------------------------- 1 | 2 | from Ruikowa.ObjectRegex.Node import Ast, Ref, LiteralParser, CharParser, SeqParser, AstParser 3 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo 4 | from Ruikowa.Core.BaseDef import Trace 5 | inputs = ['a', '\n', 'abc'] 6 | charParser1 = CharParser('a') 7 | charParser2 = CharParser('\n') 8 | litParser = LiteralParser.RawFormDealer(rawStr='abc', name = 'ABC') 9 | meta = MetaInfo() 10 | assert charParser1.match(inputs, meta) is 'a' 11 | assert litParser.match(inputs, meta) is None 12 | assert charParser2.match(inputs, meta) is '\n' 13 | assert litParser.match(inputs, meta) == 'abc' 14 | 15 | a = LiteralParser('a', name = 'a') 16 | c = LiteralParser('c', name = 'c') 17 | d = LiteralParser('d', name = 'd') 18 | ASeq = AstParser([Ref('U'), d],[a], name = 'ASeq') 19 | U = AstParser([Ref('ASeq'), c], name = 'U') 20 | namespace = globals() 21 | seset = set() 22 | ASeq.compile(namespace, seset) 23 | x = MetaInfo() 24 | print(ASeq.match(['a', 'c','d','c','d','k'], x)) 25 | 26 | 27 | a = LiteralParser('a', name = 'a') 28 | c = LiteralParser('c', name = 'c') 29 | d = LiteralParser('d', name = 'd') 30 | ASeq = AstParser([Ref('ASeq'), d],[a], name = 'ASeq') 31 | #U = AstParser([Ref('ASeq'), c], name = 'U') 32 | namespace = globals() 33 | seset = set() 34 | ASeq.compile(namespace, seset) 35 | x = MetaInfo() 36 | print(ASeq.match(['a', 'd','d','d','d','d','g'], x).dump_to_json()) 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /tests/Ruikowa/testBootstrap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Oct 17 20:07:44 2017 5 | 6 | @author: misakawa 7 | """ 8 | 9 | from Ruikowa.Bootstrap.Parser import * 10 | from Ruikowa.ObjectRegex.MetaInfo import * 11 | from Ruikowa.Bootstrap.Ast import ast_for_stmts 12 | words = token.findall(r"""Token {{ 13 | def token(input_str): 14 | return list(input_str) 15 | }} 16 | Stmt Throw ['\n'] ::= (NEWLINE* Equals* NEWLINE*)* 17 | Expr ::= Or ('|' Or)* 18 | Or ::= AtomExpr+ 19 | AtomExpr::= Atom [Trailer] 20 | Atom ::= Str | Name | '[' Expr ']' | '(' Expr ')' 21 | Equals ::= Name LitDef Str | Name Def Expr 22 | Trailer::= '*' | '+' | '{' Number{1 2} '}' 23 | Def := '::=' 24 | LitDef := ':=' 25 | Str := R'"[\w|\W]*?"' 26 | Name := R'[a-zA-Z_][a-zA-Z0-9]*' 27 | Number := R'\d+' 28 | NEWLINE:= '\n'""") 29 | meta = MetaInfo() 30 | res = Stmt.match(words, meta) 31 | print(res) 32 | -------------------------------------------------------------------------------- /tests/Ruikowa/testCycleLeftRecur.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Oct 17 20:03:23 2017 5 | 6 | @author: misakawa 7 | """ 8 | 9 | from Ruikowa.ObjectRegex.Node import Ast, Ref, LiteralParser, CharParser, SeqParser, AstParser 10 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo 11 | from Ruikowa.Core.BaseDef import Trace 12 | a = LiteralParser('a', name = 'a') 13 | c = LiteralParser('c', name = 'c') 14 | d = LiteralParser('d', name = 'd') 15 | ASeq = AstParser([Ref('U'), d],[a], name = 'ASeq') 16 | U = AstParser([Ref('ASeq'), c], name = 'U') 17 | namespace = globals() 18 | seset = set() 19 | ASeq.compile(namespace, seset) 20 | x = MetaInfo() 21 | print('test result => ') 22 | print(ASeq.match(['a', 'c','d','c','d','k'], x)) -------------------------------------------------------------------------------- /tests/Ruikowa/testCycleLeftRecur3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Oct 19 18:38:03 2017 5 | 6 | @author: misakawa 7 | """ 8 | 9 | from Ruikowa.ObjectRegex.Node import Ref, AstParser, SeqParser, LiteralParser, CharParser, MetaInfo 10 | import re 11 | token = re.compile("t|\)|\(").findall 12 | namespace = globals() 13 | recurSearcher = set() 14 | type = LiteralParser('t', name = 'type') 15 | prefix = AstParser([Ref('prefix'), 16 | LiteralParser('(', name='LP'), 17 | SeqParser([Ref('prefix')]), 18 | LiteralParser(')', name='RP')], 19 | [Ref('type')], name = 'prefix') 20 | 21 | prefix.compile(namespace, recurSearcher) -------------------------------------------------------------------------------- /tests/Ruikowa/testCycleLeftRecurAndDumpToJSON.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Oct 17 20:06:10 2017 5 | 6 | @author: misakawa 7 | """ 8 | 9 | from Ruikowa.ObjectRegex.Node import Ast, Ref, LiteralParser, CharParser, SeqParser, AstParser 10 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo 11 | from Ruikowa.Core.BaseDef import Trace 12 | 13 | a = LiteralParser('a', name = 'a') 14 | c = LiteralParser('c', name = 'c') 15 | d = LiteralParser('d', name = 'd') 16 | ASeq = AstParser([Ref('ASeq'), d],[a], name = 'ASeq') 17 | 18 | 19 | #U = AstParser([Ref('ASeq'), c], name = 'U') 20 | namespace = globals() 21 | seset = set() 22 | ASeq.compile(namespace, seset) 23 | x = MetaInfo() 24 | print(ASeq.match(['a', 'd','d','d','d','d'], x)) -------------------------------------------------------------------------------- /tests/Ruikowa/testLiteralParser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Oct 17 20:03:08 2017 5 | 6 | @author: misakawa 7 | """ 8 | 9 | from Ruikowa.ObjectRegex.Node import Ast, Ref, LiteralParser, CharParser, SeqParser, AstParser 10 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo 11 | from Ruikowa.Core.BaseDef import Trace 12 | inputs = ['a', '\n', 'abc'] 13 | charParser1 = CharParser('a') 14 | charParser2 = CharParser('\n') 15 | litParser = LiteralParser.RawFormDealer(rawStr='abc', name = 'ABC') 16 | meta = MetaInfo() 17 | assert charParser1.match(inputs, meta) is 'a' 18 | assert litParser.match(inputs, meta) is None 19 | assert charParser2.match(inputs, meta) is '\n' 20 | assert litParser.match(inputs, meta) == 'abc' --------------------------------------------------------------------------------