├── .gitattributes
├── .gitignore
├── .travis.yml
├── INSTALL.sh
├── LICENSE
├── Python
    ├── .gitignore
    ├── MANIFEST.in
    ├── README.rst
    ├── Ruikowa
    │   ├── Bootstrap
    │   │   ├── Ast.py
    │   │   ├── Compile.py
    │   │   ├── Parser.py
    │   │   ├── Token.py
    │   │   ├── __init__.py
    │   │   └── grammar
    │   ├── Command.py
    │   ├── Config.py
    │   ├── Core
    │   │   ├── BaseDef.py
    │   │   └── __init__.py
    │   ├── ErrorFamily.py
    │   ├── ErrorHandler.py
    │   ├── ObjectRegex
    │   │   ├── ASTDef.py
    │   │   ├── MetaInfo.py
    │   │   ├── Node.py
    │   │   ├── Optimize.py
    │   │   ├── Tokenizer.py
    │   │   └── __init__.py
    │   ├── Tools
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── color.py
    │   └── io.py
    ├── release-note
    └── setup.py
├── README.md
├── Ruiko
    ├── README.rst
    ├── ast.cpp
    ├── bootstrap.ruiko
    ├── dev_bnf.cpp
    ├── flowerq
    │   ├── Composite.hpp
    │   ├── IO.File.hpp
    │   ├── IO.hpp
    │   ├── List.BaseMethods.hpp
    │   ├── List.Constructor.hpp
    │   ├── List.Node.hpp
    │   ├── List.hpp
    │   ├── Macro.hpp
    │   └── Match.hpp
    ├── main.cpp
    ├── test.txt
    └── xml.ruiko
├── docs
    ├── RuikoEBNF.rst
    ├── codes
    │   ├── just.py
    │   ├── just.ruiko
    │   ├── lisp.ruiko
    │   ├── lisp_parser.py
    │   ├── parsing_CastMap.py
    │   ├── parsing_CastMap.ruiko
    │   ├── parsing_tokenizer.py
    │   ├── parsing_tokenizer.ruiko
    │   ├── proj.py
    │   ├── test.lisp
    │   ├── test_lang.py
    │   ├── url.py
    │   └── url.ruiko
    ├── conf.py
    ├── index.rst
    ├── parsing.rst
    └── quickstart.rst
├── test.sh
├── testRuikowa.sh
└── tests
    └── Ruikowa
        ├── Lang
            └── Lisp
            │   ├── grammar
            │   ├── pparser.py
            │   ├── test.ast
            │   ├── test.json
            │   ├── testLisp.sh
            │   └── test_lang.py
        ├── test.py
        ├── testBootstrap.py
        ├── testCycleLeftRecur.py
        ├── testCycleLeftRecur3.py
        ├── testCycleLeftRecurAndDumpToJSON.py
        └── testLiteralParser.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py linguist-language=python
2 | *.cs linguist-language=csharp
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | __pycache__/
  2 | .spyproject/
  3 | Attempts-which-failed/
  4 | .vscode/
  5 | *.idea/
  6 | Ruiko/cmake-build-debug/
  7 | CSharp/
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | .idea/
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | env/
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | .hypothesis/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # pyenv
 82 | .python-version
 83 | 
 84 | # celery beat schedule file
 85 | celerybeat-schedule
 86 | 
 87 | # SageMath parsed files
 88 | *.sage.py
 89 | 
 90 | # dotenv
 91 | .env
 92 | 
 93 | # virtualenv
 94 | .venv
 95 | venv/
 96 | ENV/
 97 | 
 98 | # Spyder project settings
 99 | .spyderproject
100 | .spyproject
101 | 
102 | # Rope project settings
103 | .ropeproject
104 | 
105 | # mkdocs documentation
106 | /site
107 | 
108 | # mypy
109 | .mypy_cache/
110 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 |   - "3.6-dev"
 5 |   - "3.7-dev"
 6 | script:
 7 |   - pip install Linq
 8 |   - bash ./testRuikowa.sh installAndTest
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/INSTALL.sh:
--------------------------------------------------------------------------------
1 | cd ./Python
2 | python setup.py install
3 | rm -r ./build
4 | rm -r ./EBNFParser.egg-info
5 | rm -r ./dist


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Python/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .spyproject/
3 | Attempts-which-failed/
4 | .vscode/
5 | .idea/
6 | EBNFParser.egg-info/
7 | build/
8 | dist/
9 | 


--------------------------------------------------------------------------------
/Python/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include Misakawa *.py
2 | include Misakawa
3 | recursive-include Ruikowa *.py
4 | include Ruikowa
5 | 


--------------------------------------------------------------------------------
/Python/README.rst:
--------------------------------------------------------------------------------
  1 | |Build Status| |PyPI version| |Release Note| |MIT License|
  2 | 
  3 | EBNFParser
  4 | ==========
  5 | 
  6 | Parse Many, Any, Every |Doc|
  7 | ----------------------------
  8 | 
  9 | ::
 10 | 
 11 |     LR ::= LR 'a' 'b' | LR 'c' | 'd';
 12 | 
 13 | -  `Python Project(Support Python
 14 |    3.6+) <https://github.com/thautwarm/EBNFParser/tree/boating-new/Python>`__
 15 |    (v 2.0+)
 16 | 
 17 |    -  `Old Version : Misakawa
 18 |       v0.x <https://github.com/thautwarm/EBNFParser/tree/master/Misakawa.md>`__
 19 |    -  `Old Version : Ruikowa
 20 |       v1.x <https://github.com/thautwarm/EBNFParser/tree/master/README.md>`__
 21 | 
 22 | --------------
 23 | 
 24 | Install
 25 | -------
 26 | 
 27 | -  Python
 28 | 
 29 |    -  pip
 30 | 
 31 |    ``pip installl -U EBNFParser``
 32 | 
 33 |    -  setup
 34 | 
 35 |    .. code:: shell
 36 | 
 37 |        git clone https://github.com/thautwarm/EBNFParser
 38 |        cd EBNFParser/Python
 39 |        python setup.py install
 40 | 
 41 | Usage
 42 | -----
 43 | 
 44 | -  Command Line Tools
 45 | 
 46 |    -  ``ruiko``.
 47 | 
 48 |    .. code:: shell
 49 | 
 50 |        ruiko ./<grammar File> ./<output filename>
 51 |                [--testTk] # print tokenized words or not
 52 |                [--test] # generate test script "test_lang.py"
 53 | 
 54 |    Use command ``ruiko`` to generate parser and token files, and then
 55 |    you can use ``test_lang.py`` to test your parser.
 56 | 
 57 |    .. code:: shell
 58 | 
 59 |        python ./test_lang.py Stmt " (+ 1 2) " -o test.json --testTk
 60 | 
 61 | -  Integrated into your own project
 62 | 
 63 |    .. code:: python
 64 | 
 65 | 
 66 |            from Ruikowa.ObjectRegex.ASTDef import Ast
 67 |            from Ruikowa.ErrorHandler import ErrorHandler
 68 |            from Ruikowa.ObjectRegex.MetaInfo import MetaInfo
 69 |            from Ruikowa.ObjectRegex.Tokenizer import Tokenizer
 70 | 
 71 |            from <your own generated parser module> import <top parser>, token_table
 72 | 
 73 | 
 74 |            import typing as t
 75 | 
 76 |            def token_func(src_code: str) -> t.Iterable[Tokenizer]:
 77 |                return Tokenizer.from_raw_strings(src_code, token_table, ({<the names of tokenizers you would ignore>}, {<the string contents of tokenizers you would ignore>}))
 78 | 
 79 |            parser = ErrorHandler(<top parser>.match, token_func)
 80 | 
 81 |            def parse(filename: str) -> Ast:
 82 | 
 83 |                return parser.from_file(filename)
 84 | 
 85 | 
 86 |            print(parse(<filename of your dsl source code>))
 87 | 
 88 | Need more? See `the
 89 | documents <http://ebnfparser.readthedocs.io/en/boating-new>`__.
 90 | 
 91 | Examples
 92 | --------
 93 | 
 94 | Here are some examples to refer:
 95 | 
 96 | EBNFParser 2.0
 97 | 
 98 | -  `Rem <https://github.com/thautwarm/Rem>`__
 99 |    The Rem programming language.
100 | 
101 | Old version(Before EBNFParser 1.1).
102 | 
103 | -  | `DBG-Lang <https://github.com/thautwarm/dbg-lang>`__
104 |    | A DSL for SQL development in Python areas.
105 | 
106 | -  | `Rem(Based
107 |      EBNFParser1.1) <https://github.com/thautwarm/Rem/tree/backend-ebnfparser1.1>`__
108 |    | A full featured modern language to enhance program readability
109 |      based on CPython.
110 | 
111 | -  | `Lang.Red <https://github.com/thautwarm/lang.red>`__
112 |    | An attempt to making ASDL in CPython(unfinished yet)
113 | 
114 | Will support F# and Rem.
115 | 
116 | .. |Build Status| image:: https://travis-ci.org/thautwarm/EBNFParser.svg?branch=boating-new
117 |    :target: https://travis-ci.org/thautwarm/EBNFParser
118 | .. |PyPI version| image:: https://img.shields.io/pypi/v/EBNFParser.svg
119 |    :target: https://pypi.python.org/pypi/EBNFParser
120 | .. |Release Note| image:: https://img.shields.io/badge/note-release-orange.svg
121 |    :target: https://github.com/thautwarm/EBNFParser/blob/boating-new/Python/release-note
122 | .. |MIT License| image:: https://img.shields.io/badge/license-MIT-Green.svg?style=flat
123 |    :target: https://github.com/thautwarm/EBNFParser/blob/boating-new/LICENSE
124 | .. |Doc| image:: https://img.shields.io/badge/document-2.1.2-yellow.svg?style=flat
125 |    :target: http://ebnfparser.readthedocs.io/en/boating-new
126 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/Bootstrap/Ast.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import linq
  3 | from collections import namedtuple
  4 | from typing import List, Tuple
  5 | from .Token import NameEnum
  6 | from ..Core.BaseDef import *
  7 | from ..ErrorFamily import UnsupportedStringPrefix, find_location
  8 | from ..ObjectRegex.Node import Ast
  9 | from ..ObjectRegex.Tokenizer import Mode, TokenSpec, Tokenizer
 10 | from ..color import Colored
 11 | from ..io import grace_open
 12 | 
 13 | SeqParserParams = namedtuple('DA', ['at_least', 'at_most'])
 14 | CompilingNodes = namedtuple('CN', ['reachable', 'alone'])
 15 | 
 16 | T = 'Union[Ast, List[Union[Ast, Tokenizer]]]'
 17 | 
 18 | 
 19 | def get_string_and_mode(prefix_string: str) -> 'Tuple[Optional[str],  str]':
 20 |     if prefix_string[0] is not '\'':
 21 |         return prefix_string[0], prefix_string[1:]
 22 |     else:
 23 |         return None, prefix_string
 24 | 
 25 | 
 26 | def surround_with_double_quotes(string):
 27 |     return '"{}"'.format(string)
 28 | 
 29 | 
 30 | class Compiler:
 31 |     # TODO: refactor and clear redundant items.
 32 |     def __init__(self, filename: str = None, src_code: str = None):
 33 |         self.src = src_code
 34 |         self.filename = filename
 35 | 
 36 |         self.token_func_src = None
 37 |         self.token_spec = TokenSpec()
 38 |         self.token_ignores = ('{}', '{}')  # define what to ignore when tokenizing.
 39 |         self.prefix_mapping = {}
 40 |         self.cast_map = {}
 41 |         self.c_macro = {}
 42 | 
 43 |         self.generated_token_names = set()
 44 | 
 45 |         self.literal_parser_definitions = []
 46 |         self.combined_parsers = []
 47 | 
 48 |         self.compile_helper = CompilingNodes(set(), set())
 49 |         self._current_indent = None
 50 |         self._current__combined_parser_name = None
 51 |         self._current_events = None
 52 |         self._current_anonymous_count = 0
 53 | 
 54 |     def ast_for_stmts(self, stmts: T) -> None:
 55 |         """
 56 |         Stmts    ::= TokenDef{0, 1} Equals*;
 57 |         """
 58 |         if not stmts:
 59 |             raise ValueError('no ast found!')
 60 |         head, *equals = stmts
 61 | 
 62 |         if head.name is NameEnum.TokenDef:
 63 |             self.ast_for_token_def(head)
 64 |         elif head.name is NameEnum.TokenIgnore:
 65 |             self.ast_for_token_ignore(head)
 66 |         else:
 67 |             self.ast_for_equals(head)
 68 | 
 69 |         for each in equals:
 70 |             self.ast_for_equals(each)
 71 | 
 72 |         # if every combined parser can reach any other combined, 
 73 |         # just take any of them and compile it!
 74 |         if not self.compile_helper.alone and self._current__combined_parser_name:
 75 |             self.compile_helper.alone.add(self._current__combined_parser_name)
 76 | 
 77 |     def ast_for_token_ignore(self, token_ignore: T):
 78 |         _, _, *items, _ = token_ignore
 79 |         grouped = linq.Flow(items).GroupBy(lambda x: x.name is NameEnum.Str).Unboxed()
 80 |         lit_ignore = "{{{}}}".format(', '.join(map(lambda _: _.string, grouped[True])))
 81 |         name_ignore = "{{{}}}".format(', '.join(map(lambda _: '"' + _.string + '"', grouped[False])))
 82 |         self.token_ignores = (name_ignore, lit_ignore)
 83 | 
 84 |     def ast_for_token_def(self, token_def: T):
 85 |         content = token_def[1]
 86 |         if content.name is NameEnum.Name:
 87 |             path = os.path.join(*
 88 |                                 map(lambda _: '..' if _ == 'parent' else _,
 89 |                                     content.string.split('.')))
 90 |             self.token_func_src = grace_open(path).read()
 91 |             return
 92 |         else:
 93 |             self.token_func_src = content.string[2:-2]
 94 | 
 95 |     def ast_for_combined_parser_def(self, equals: T):
 96 |         if equals[1].name is NameEnum.Throw:
 97 |             name, throw, _, expr, _ = equals
 98 |             throw: 'T' = self.ast_for_throw(throw)
 99 |             grouped = linq.Flow(throw).GroupBy(lambda x: x.name is NameEnum.Str).Unboxed()
100 |         else:
101 |             name, _, expr, _ = equals
102 |             grouped = {True: (), False: ()}
103 | 
104 |         name = self._current__combined_parser_name = name.string
105 |         self.token_spec.enums.__setitem__(name, f"'{name}'")
106 | 
107 |         if name not in self.compile_helper.reachable:
108 |             self.compile_helper.alone.add(name)
109 | 
110 |         indent = '             ' + " " * len(name)
111 |         self.combined_parsers.append(
112 |             '{name} = AstParser({possibilities},\n'
113 |             '{indent}name="{name}",\n'
114 |             '{indent}to_ignore=({name_ignore}, {lit_ignore}))'
115 |             ''.format(
116 |                 indent=indent,
117 |                 name=name,
118 |                 possibilities=(',\n{}'.format(indent)).join(self.ast_for_expr(expr)),
119 |                 lit_ignore="{{{}}}".format(', '.join(map(lambda _: _.string, grouped[True]))),
120 |                 name_ignore="{{{}}}".format(', '.join(map(lambda _: '"' + _.string + '"', grouped[False])))
121 |             ))
122 | 
123 |     def ast_for_literal_parser_def(self, equals: T):
124 |         str_tks: 'List[Tokenizer]'
125 |         defining_cast_map = False
126 |         if equals[-2].name is NameEnum.Str:
127 | 
128 |             if equals[1].string is NameEnum.keyword_cast:
129 |                 defining_cast_map = True
130 |                 h, _, *t = equals
131 |                 equals = [h, *t]
132 | 
133 |             if equals[1].name is NameEnum.Prefix:
134 |                 name, prefix, _, *str_tks, _ = equals
135 |                 prefix: 'Ast'
136 |                 prefix_string = prefix[1].string
137 |                 if len(prefix_string) > 1:
138 |                     raise UnsupportedStringPrefix(prefix_string,
139 |                                                   " the length of prefix name should be 1 only." +
140 |                                                   find_location(self.filename, prefix[1], self.src))
141 |                 self.prefix_mapping[prefix_string] = name.string
142 | 
143 |             elif equals[1].name is NameEnum.Of:
144 | 
145 |                 ref_name, of, _, *str_tks, _ = equals
146 |                 name = of[1]
147 |                 self.c_macro[ref_name.string] = name.string
148 | 
149 |             else:
150 |                 name, _, *str_tks, _ = equals
151 | 
152 |             name = name.string
153 |             if defining_cast_map:
154 |                 # define cast map
155 |                 for str_tk in str_tks:
156 |                     mode, string = get_string_and_mode(str_tk.string)
157 |                     if mode:
158 |                         raise UnsupportedStringPrefix(mode,
159 |                                                       'do not support setting prefix when defining custom prefix.' +
160 |                                                       find_location(self.filename, str_tk, self.src))
161 |                     self.cast_map[string] = name
162 | 
163 |             # define how to tokenize
164 |             for str_tk in str_tks:
165 |                 mode, string = get_string_and_mode(str_tk.string)
166 |                 if mode is 'R':
167 |                     mode = Mode.regex
168 |                 elif len(string) is 3:
169 |                     mode = Mode.char
170 |                 else:
171 |                     mode = Mode.const
172 |                 self.token_spec.tokens.append((name, mode, string))
173 |                 if string[1:-1].isidentifier():
174 |                     self.token_spec.enums.__setitem__(f'{name}_{string[1:-1]}', string)
175 | 
176 |             if name not in self.generated_token_names:
177 |                 self.literal_parser_definitions.append("{} = LiteralNameParser('{}')".format(name, name))
178 |                 self.generated_token_names.add(name)
179 |             self.token_spec.enums.__setitem__(name, f"'{name}'")
180 | 
181 |     def ast_for_equals(self, equals: T):
182 |         if equals[-2].name is NameEnum.Str:
183 |             self.ast_for_literal_parser_def(equals)
184 |             return
185 |         self.ast_for_combined_parser_def(equals)
186 | 
187 |     @classmethod
188 |     def ast_for_throw(cls, throw: T):
189 |         _, _, *items, _ = throw
190 |         return items
191 | 
192 |     def ast_for_expr(self, expr: T):
193 |         return (self.ast_for_or(each) for each in expr[::2])
194 | 
195 |     def ast_for_or(self, or_expr: T):
196 | 
197 |         return '[{}]'.format(', '.join(self.ast_for_atom_expr(each) for each in or_expr))
198 | 
199 |     def handle_atom_with_trailer(self, atom: T):
200 |         maybe_tk, default_attrs = self.ast_for_atom(atom)
201 |         default_attrs: 'SeqParserParams'
202 |         if maybe_tk.__class__ is Tokenizer:
203 |             if maybe_tk.name is NameEnum.Name:
204 | 
205 |                 name = self.c_macro.get(maybe_tk.string, maybe_tk.string)
206 | 
207 |                 if name in self.compile_helper.alone:
208 |                     self.compile_helper.alone.remove(name)
209 | 
210 |                 if name not in self.compile_helper.reachable:
211 |                     self.compile_helper.reachable.add(name)
212 | 
213 |                 return "Ref('{}')".format(name)
214 | 
215 |             else:
216 |                 mode, string = get_string_and_mode(maybe_tk.string)
217 |                 if not mode:
218 |                     for k, mode, v in self.token_spec.tokens:
219 |                         # check if need to create a new token pattern
220 |                         if v is string and k == 'auto_const':
221 |                             break
222 | 
223 |                     else:
224 |                         if len(string) is 3:
225 |                             self.token_spec.tokens.append(('auto_const', Mode.char, string))
226 |                         else:
227 |                             self.token_spec.tokens.append(('auto_const', Mode.const, string))
228 |                     return string
229 | 
230 |                 if mode is 'R':
231 |                     for k, mode, v in self.token_spec.tokens:
232 |                         if mode is Mode.regex and v == string:
233 | 
234 |                             if k in self.compile_helper.alone:
235 |                                 self.compile_helper.alone.remove(k)
236 | 
237 |                             if k not in self.compile_helper.reachable:
238 |                                 self.compile_helper.reachable.add(k)
239 | 
240 |                             return "Ref('{}')".format(k)
241 | 
242 |                     name: str = 'anonymous_{}'.format(self._current_anonymous_count)
243 |                     self._current_anonymous_count += 1
244 |                     warnings.warn(
245 |                         Colored.LightBlue +
246 |                         '\nFor efficiency of the parser, '
247 |                         'we do not do regex matching when parsing(only in tokenizing we use regex), '
248 |                         'you are now creating a anonymous regex literal parser '
249 |                         '{}<{}>{} when defining combined parser{}\n'
250 |                         .format(Colored.Red, name, Colored.LightBlue, Colored.Clear))
251 | 
252 |                     self.token_spec.tokens.append((name, Mode.regex, string))
253 |                     self.token_spec.enums.__setitem__(name, f"'{name}'")
254 |                     self.literal_parser_definitions.append("{} = LiteralNameParser('{}')".format(name, name))
255 | 
256 |                     if name in self.compile_helper.alone:
257 |                         self.compile_helper.alone.remove(name)
258 | 
259 |                     if name not in self.compile_helper.reachable:
260 |                         self.compile_helper.reachable.add(name)
261 | 
262 |                     return "Ref('{}')".format(name)
263 | 
264 |                 elif mode is 'L':
265 |                     return f"L({string})"
266 | 
267 |                 elif mode not in self.prefix_mapping:
268 |                     raise UnsupportedStringPrefix(mode, "Prefix not defined."
269 |                                                   + find_location(self.filename, maybe_tk, self.src))
270 | 
271 |                 else:
272 |                     name = self.prefix_mapping[mode]
273 |                     self.cast_map[string] = name
274 |                     return f"('{name}', {string})"
275 | 
276 |         return dict(possibilities=', '.join(maybe_tk),
277 |                     at_least=default_attrs.at_least,
278 |                     at_most=default_attrs.at_most)
279 | 
280 |     def ast_for_atom_expr(self, atom_expr: T):
281 |         if len(atom_expr) is 1:
282 |             res = self.handle_atom_with_trailer(atom_expr[0])
283 |             if res.__class__ is dict:
284 |                 return ('SeqParser({possibilities}, '
285 |                         'at_least={at_least},'
286 |                         'at_most={at_most})'.format(**res))
287 |             return res
288 | 
289 |         atom, trailer = atom_expr
290 |         res = self.handle_atom_with_trailer(atom)
291 |         attrs = self.ast_for_trailer(trailer)
292 | 
293 |         if res.__class__ is dict:
294 |             res.update(at_least=attrs.at_least, at_most=attrs.at_most)
295 | 
296 |             return ('SeqParser({possibilities}, '
297 |                     'at_least={at_least},'
298 |                     'at_most={at_most})'.format(**res))
299 | 
300 |         return ('SeqParser({possibilities}, '
301 |                 'at_least={at_least},'
302 |                 'at_most={at_most})'.format(possibilities=res if res[0] is '[' else f'[{res}]',
303 |                                             at_most=attrs.at_most,
304 |                                             at_least=attrs.at_least))
305 | 
306 |     def ast_for_atom(self, atom: 'Ast'):
307 |         if atom[0].string is '(':
308 |             return self.ast_for_expr(atom[1]), SeqParserParams(1, 1)
309 |         elif atom[0].string is '[':
310 |             return self.ast_for_expr(atom[1]), SeqParserParams(0, 1)
311 | 
312 |         return atom[0], None
313 | 
314 |     @classmethod
315 |     def ast_for_trailer(cls, trailer):
316 |         if len(trailer) is 1:
317 |             trailer: 'Tokenizer' = trailer[0]
318 |             return SeqParserParams(0, 'Undef') if trailer.string is '*' else SeqParserParams(1, 'Undef')
319 |         else:
320 |             _, *numbers, _ = trailer
321 |             numbers: 'List[Tokenizer]'
322 |             if len(numbers) is 2:
323 |                 a, b = numbers
324 |                 return SeqParserParams(a.string, a.string)
325 |             else:
326 |                 return SeqParserParams(numbers[0].string, 'Undef')
327 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/Bootstrap/Compile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu Oct 19 18:00:43 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | 
 9 | from .Ast import Compiler
10 | from .Parser import Stmts
11 | from ..ObjectRegex.Node import MetaInfo
12 | from ..ErrorHandler import ErrorHandler
13 | from .Token import token_func
14 | from ..io import grace_open
15 | 
16 | include = (
17 |     "# This file is automatically generated by EBNFParser.\n"
18 |     "from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer\n"
19 |     "from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef\n"
20 |     "namespace = globals()\n"
21 |     "recur_searcher = set()")
22 | 
23 | 
24 | def compile(src_path, print_token=False):
25 |     parser = ErrorHandler(Stmts.match, token_func)
26 |     src_code = grace_open(src_path).read()
27 |     stmts = parser.from_file(src_path, MetaInfo(fileName=src_path), print_token=print_token)
28 |     compiler = Compiler(filename=src_path, src_code=src_code)
29 |     compiler.ast_for_stmts(stmts)
30 |     cast_map_dumps = "{{{}}}".format(
31 |         ', '.join(f"{k}: unique_literal_cache_pool['{v}']" for k, v in compiler.cast_map.items()))
32 |     if compiler.token_func_src:
33 |         token_func_src = (f"token_table = {compiler.token_spec.to_token_table()}\n"
34 |                           f"{compiler.token_spec.to_name_enum()}\n"
35 |                           f"{cast_map_dumps}\n"
36 |                           f"{compiler.token_func_src}")
37 | 
38 |     else:
39 |         token_func_src = (f"token_table = {compiler.token_spec.to_token_table()}\n"
40 |                           f"{compiler.token_spec.to_name_enum()}\n"
41 |                           f"cast_map = {cast_map_dumps}\n"
42 |                           f"token_func = lambda _: "
43 |                           "Tokenizer.from_raw_strings(_, token_table, "
44 |                           f"({compiler.token_ignores[0]}, "
45 |                           f"{compiler.token_ignores[1]}),"
46 |                           f"cast_map=cast_map)")
47 | 
48 |     literal_parsers = '\n'.join(compiler.literal_parser_definitions)
49 | 
50 |     combined_parsers = '\n'.join(compiler.combined_parsers)
51 | 
52 |     compiling = '\n'.join(
53 |         map(lambda _: '{}.compile(namespace, recur_searcher)'.format(_), compiler.compile_helper.alone))
54 | 
55 |     return '{}\n{}\n{}\n{}\n{}'.format(include, token_func_src, literal_parsers, combined_parsers, compiling)
56 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/Bootstrap/Parser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Oct 17 10:16:52 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | from ..ObjectRegex.Node import Ref, AstParser, SeqParser, LiteralNameParser, LiteralNameValueParser
 9 | from ..ObjectRegex.Tokenizer import Tokenizer
10 | from ..ObjectRegex.MetaInfo import MetaInfo
11 | 
12 | Str = LiteralNameParser('Str')
13 | Name = LiteralNameParser('Name')
14 | Number = LiteralNameParser('Number')
15 | Codes = LiteralNameParser('Codes')
16 | 
17 | namespace = globals()
18 | recurSearcher = set()
19 | 
20 | TokenIgnore = AstParser(
21 |     [('keyword', 'ignore'),
22 |      '[',
23 |      SeqParser([Name], [Str]),
24 |      ']'],
25 |     name='TokenIgnore')
26 | 
27 | Prefix = AstParser(
28 |     [('keyword', 'as'), Name],
29 |     name='Prefix')
30 | 
31 | Of = AstParser(
32 |     [('keyword', 'of'), Name],
33 |     name='Of')
34 | 
35 | Stmts = AstParser(
36 |     [SeqParser([Ref('TokenIgnore')],
37 |                [Ref('TokenDef')],
38 |                at_most=1),
39 |      SeqParser([Ref('Equals')])],
40 |     name='Stmts')
41 | 
42 | TokenDef = AstParser(
43 |     [('keyword', 'deftoken'), SeqParser([Name], [Codes], at_most=1, at_least=1)],
44 |     name='TokenDef')
45 | 
46 | Equals = AstParser(
47 |     [Name, SeqParser(['cast'], at_most=1) ,SeqParser([Ref('Prefix')], [Ref('Of')], at_most=1), ':=', SeqParser([Str]), ';'],
48 |     [Name, SeqParser([Ref('Throw')], at_most=1), '::=', Ref('Expr'), ';'],
49 |     name='Equals')
50 | 
51 | Throw = AstParser(
52 |     [('keyword', 'throw'),
53 |      '[',
54 |      SeqParser([Name], [Str]),
55 |      ']'
56 |      ],
57 |     name='Throw')
58 | 
59 | Expr = AstParser(
60 |     [Ref('Or'), SeqParser(['|', Ref('Or')])],
61 |     name='Expr')
62 | 
63 | Or = AstParser(
64 |     [SeqParser([Ref('AtomExpr')], at_least=1)],
65 |     name=' Or')
66 | 
67 | AtomExpr = AstParser(
68 |     [Ref('Atom'), SeqParser([Ref('Trailer')])],
69 |     name='AtomExpr')
70 | 
71 | Atom = AstParser(
72 |     [Str],
73 |     [Name],
74 |     ['[', Ref('Expr'), ']'],
75 |     ['(', Ref('Expr'), ')'],
76 |     name='Atom')
77 | 
78 | Trailer = AstParser(
79 |     ['+'],
80 |     ['*'],
81 |     ['{', SeqParser([Number], at_least=1, at_most=2), '}'],
82 |     name='Trailer')
83 | 
84 | Stmts.compile(namespace, recurSearcher)
85 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/Bootstrap/Token.py:
--------------------------------------------------------------------------------
 1 | import re as re
 2 | from ..ObjectRegex.Tokenizer import (Tokenizer, str_matcher, regex_matcher,
 3 |                                      char_matcher, unique_literal_cache_pool)
 4 | 
 5 | 
 6 | def _escape(*str_s):
 7 |     return '|'.join([re.escape(string) for string in str_s])
 8 | 
 9 | 
10 | class NameEnum:
11 |     keyword_as = unique_literal_cache_pool['as']
12 |     keyword_of = unique_literal_cache_pool['of']
13 |     keyword_throw = unique_literal_cache_pool['throw']
14 |     keyword_deftoken = unique_literal_cache_pool['deftoken']
15 |     keyword_ignore = unique_literal_cache_pool['ignore']
16 |     keyword_cast = unique_literal_cache_pool['cast']
17 | 
18 |     Of = unique_literal_cache_pool['Of']
19 |     Prefix = unique_literal_cache_pool['Prefix']
20 |     Comments = unique_literal_cache_pool['Comments']
21 |     Str = unique_literal_cache_pool['Str']
22 |     Codes = unique_literal_cache_pool['Codes']
23 | 
24 |     Name = unique_literal_cache_pool['Name']
25 |     Number = unique_literal_cache_pool['Number']
26 |     Newline = unique_literal_cache_pool['Newline']
27 | 
28 |     TokenIgnore = unique_literal_cache_pool['TokenIgnore']
29 |     Single = unique_literal_cache_pool['Single']
30 |     Eq = unique_literal_cache_pool['Eq']
31 |     TokenRelated = unique_literal_cache_pool['TokenRelated']
32 | 
33 |     TokenDef = unique_literal_cache_pool['TokenDef']
34 |     Throw = unique_literal_cache_pool['Throw']
35 | 
36 | 
37 | token_table = (
38 |     # match by value
39 |     ("auto_const", char_matcher(
40 |         ('|',
41 |          '{',
42 |          '}',
43 |          ';',
44 |          '[',
45 |          ']',
46 |          '(',
47 |          ')',
48 |          '+',
49 |          '*',
50 |          '.')
51 |     )),
52 | 
53 |     # match by value
54 |     ("auto_const", str_matcher(
55 |         ("::=", ":=")
56 |     )),
57 | 
58 |     # match by name
59 |     ('Comment', regex_matcher(re.compile(r'(#.*)|(((/\*)+?[\w\W]+?(\*/)+))'))),
60 |     ("Str", regex_matcher(re.compile(r"[A-Z]'([^\\']+|\\.)*?'|'([^\\']+|\\.)*?'"))),
61 |     ("Codes", regex_matcher(re.compile(r'{{[\w\W]+?\}\}'))),
62 | 
63 |     ("Name", regex_matcher("[a-zA-Z_\u4e00-\u9fa5][a-zA-Z0-9_\u4e00-\u9fa5\.]*")),
64 |     ("Number", regex_matcher("\d+")),
65 | 
66 |     # do not match
67 |     ("Space", regex_matcher('\s+|,')),
68 | 
69 | )
70 | 
71 | token_table = tuple((unique_literal_cache_pool[k], v) for k, v in token_table)
72 | keyword = unique_literal_cache_pool['keyword']
73 | cast_map = {
74 |     'as': keyword,
75 |     'throw': keyword,
76 |     'deftoken': keyword,
77 |     'ignore': keyword,
78 |     'for': keyword,
79 |     'of': keyword,
80 |     'cast': keyword
81 | }
82 | 
83 | token_func = lambda _: Tokenizer.from_raw_strings(_,
84 |                                                   token_table,
85 |                                                   to_ignore=({'Space', 'Comment'}, {}), cast_map=cast_map)
86 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/Bootstrap/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thautwarm/EBNFParser/101a92c4f408f9e6ce7b55aacb39cded9394521d/Python/Ruikowa/Bootstrap/__init__.py


--------------------------------------------------------------------------------
/Python/Ruikowa/Bootstrap/grammar:
--------------------------------------------------------------------------------
 1 | Stmts    ::= [TokenIgnore | TokenDef] Equals*;
 2 | 
 3 | TokenIgnore ::= 'ignore' '[' (Name|Str)* ']';
 4 | 
 5 | TokenDef ::= 'deftoken' (Name | Codes);
 6 | 
 7 | Prefix   ::= 'as' Name;
 8 | Of       ::= 'of' Name;
 9 | 
10 | 
11 | Equals   ::= Name ['cast'] [Prefix|Of] ':='  Str + ';'|
12 |              Name [Throw] '::=' Expr ';';
13 | 
14 | Throw    ::= 'throw' '[' (Name | Str)* ']';
15 | 
16 | Expr     ::= Or ('|' Or)*;
17 | 
18 | Or       ::= AtomExpr+;
19 | 
20 | AtomExpr ::= Atom Trailer*;
21 | 
22 | Atom     ::=  Str         |
23 |               Name        |
24 |               '(' Expr ')'|
25 |               '[' Expr ']';
26 | 
27 | Trailer  ::= '*' | '+' | '{' Number{1 2} '}';
28 | 
29 | /*
30 | 
31 | keyword :
32 |     ['ignore', 'deftoken', 'as', 'throw', 'of'， 'cast']
33 | */
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/Command.py:
--------------------------------------------------------------------------------
 1 | test_lang_templates = (
 2 |     """
 3 | # This file is automatically generated by EBNFParser.
 4 | import argparse, json
 5 | 
 6 | cmd_parser = argparse.ArgumentParser(description='test language parsers swiftly.')
 7 | cmd_parser.add_argument("parser", type=str,
 8 |                        help='What kind of parser do you want to test with?(e.g Stmt, Expr, ...)')
 9 | cmd_parser.add_argument("codes", metavar='codes', type=str,
10 |                         help='input some codes in your own language here.')
11 | cmd_parser.add_argument('-o', help='output. support .json and .ast suffix.', type=str)
12 | cmd_parser.add_argument("--testTk", nargs='?', default=False, const=True)
13 | cmd_parser.add_argument('--debug', nargs='?', default=False, const=True,
14 |                        help='print tokens of grammar file?')
15 | 
16 | args = cmd_parser.parse_args()
17 |                        
18 | if args.debug:
19 |     from Ruikowa.Config import Debug
20 |     Debug.append(1)         
21 |     
22 | from Ruikowa.ErrorHandler import ErrorHandler, Colored
23 | from Ruikowa.ObjectRegex.ASTDef import Ast
24 | from Ruikowa.io import grace_open
25 | from {} import *              
26 | print(Colored.Green,'=========================ebnfparser test script================================', Colored.Clear)
27 | 
28 | print_token = args.testTk
29 | ast: Ast = ErrorHandler(eval(args.parser).match, token_func).from_source_code('<input>', args.codes, print_token=print_token)
30 | print(Colored.Blue, ast, Colored.Clear)
31 | if args.o:
32 |     o: str = args.o.lower()
33 |     if o.endswith('.json'):
34 |         grace_open(o).write(json.dumps(ast.dump_to_json(), indent=2))
35 |     elif o.endswith('.ast'):
36 |         grace_open(o).write(ast.dump())
37 |     else:
38 |         raise Exception('Unsupported file ext.')    
39 | 
40 |     """)
41 | 
42 | 
43 | def main():
44 |     import argparse
45 | 
46 |     cmd_parser = argparse.ArgumentParser(description='using EBNFParser.')
47 |     cmd_parser.add_argument("InputFile", metavar='path of input file', type=str,
48 |                            help='EBNF file which describes your language\'s grammar.')
49 |     cmd_parser.add_argument("OutputFile", metavar='path of output file', type=str,
50 |                            help='generate python file(s) that makes a parser for your language.')
51 |     cmd_parser.add_argument('--test', nargs='?', default=False, const=True,
52 |                            help='make a script to test language parsers quickly?')
53 |     cmd_parser.add_argument('--testTk', nargs='?', default=False, const=True,
54 |                            help='print tokens of grammar file?')
55 |     cmd_parser.add_argument('--debug', nargs='?', default=False, const=True,
56 |                            help='print tokens of grammar file?')
57 | 
58 |     args = cmd_parser.parse_args()
59 | 
60 |     if args.debug:
61 |         from .Config import Debug
62 |         Debug.append(1)
63 | 
64 |     from .Bootstrap.Compile import compile as bootstrap_comp
65 |     from .io import grace_open
66 |     from .color import Colored
67 |     print(Colored.Green)
68 | 
69 |     import sys, os
70 | 
71 |     inp, outp = args.InputFile, args.OutputFile
72 | 
73 |     head_from, _ = os.path.split(sys.argv[0])
74 |     head_to, __ParserFile__ = os.path.split(outp)
75 | 
76 |     generated_codes = bootstrap_comp(inp, args.testTk)
77 |     path = os.path.join(head_to, outp)
78 |     if path[-3:].lower() != '.py':
79 |         path = '{}.py'.format(path)
80 |     module = os.path.splitext(os.path.basename(outp))[0]
81 |     grace_open('{}'.format(path)).write(generated_codes)
82 | 
83 |     if args.test:
84 |         print('making test script....')
85 |         grace_open('{}'
86 |                    .format(os.path.join(head_to, 'test_lang.py'))
87 |                    ).write(test_lang_templates.format(module))
88 | 
89 |     print(Colored.Clear)
90 | 
91 | 
92 | if __name__ == '__main__':
93 |     main()
94 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/Config.py:
--------------------------------------------------------------------------------
1 | Debug = []
2 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/Core/BaseDef.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Oct 14 17:46:02 2017
  5 | 
  6 | @author: misakawa
  7 | """
  8 | from ..ErrorFamily import *
  9 | # ====== Define Generic Type Params =============
 10 | 
 11 | WarningInfo ="""
 12 |                 You're trying to visit the elems that've been deprecated.
 13 |                 If it occurred when you're using EBNFParser, report it as 
 14 |                 a BUG at 
 15 |                 `https://github.com/thautwarm/EBNFParser`. Thanks a lot!
 16 |             """
 17 | 
 18 | 
 19 | # ======
 20 | 
 21 | Undef = None
 22 | class Const:
 23 |     def __new__(self):
 24 |         raise ObjectUsageError("You're trying to new an instance with a module.")
 25 |     UnMatched  = None
 26 |     NameFilter = 0
 27 |     RawFilter  = 1
 28 |     RegexFilter= 2
 29 | 
 30 | 
 31 | class RecursiveFound(Exception):
 32 |     def __init__(self, node):
 33 |         self.node =  node
 34 |         self.possibilities = []
 35 |     def add(self, possibility):
 36 |         self.possibilities.append(possibility)
 37 | 
 38 |     def __str__(self):
 39 |         s = '=====\n'
 40 |         s+=self.node.name+'\n'
 41 |         s+='\n'.join(a.name +' | '+str([c.name for c in b])
 42 |                         for a,b in self.possibilities)
 43 |         return s
 44 | 
 45 | 
 46 | class Recur:
 47 |     def __new__(self, name, count):
 48 |         return (name, count)
 49 | 
 50 | class Trace:
 51 |     def __init__(self,
 52 |                  trace  = Undef,
 53 |                  length = Undef):
 54 |         self.length  = length     if length is not Undef else\
 55 |                        len(trace) if trace  is not Undef else\
 56 |                        0
 57 |         self.content = trace if trace is not Undef else\
 58 |                        []
 59 |         self._Mem    = len(self.content)
 60 | 
 61 | 
 62 |     def __iter__(self):
 63 |         yield from self.content[:self.length]
 64 | 
 65 | 
 66 |     def __getitem__(self, item):
 67 |         if isinstance(item, int):
 68 |             if item >= self.length:
 69 |                 warnings.warn(WarningInfo)
 70 |             return self.content[item]
 71 |         elif isinstance(item, slice):
 72 |             if item.stop > self.length:
 73 |                 warnings.warn(WarningInfo)
 74 |             return self.content[item]
 75 | 
 76 | 
 77 | 
 78 |     def append(self, elem):
 79 |         # reuse the memory cache
 80 |         if self.length==self._Mem:
 81 |             self.length += 1
 82 |             self._Mem   += 1
 83 |             self.content.append(elem)
 84 |         elif self.length < self._Mem:
 85 |             self.content[self.length] = elem
 86 |             self.length += 1
 87 | 
 88 |     def new(self, constructor):
 89 |         # just can be used for Type `Trace[Contrainer[T]]`
 90 |         # reuse the memory cache
 91 |         if self.length==self._Mem:
 92 |             self.length += 1
 93 |             self._Mem   += 1
 94 |             self.content.append(constructor())
 95 |         elif self.length < self._Mem:
 96 |             self.content[self.length].length = 0
 97 |             self.length += 1
 98 | 
 99 |     def pop(self):
100 |         self.length -= 1
101 |         assert self.length>=0
102 | 
103 |     def where(self, obj):
104 |         for idx, elem in enumerate(self.content[:self.length]):
105 |             if elem is obj:
106 |                 return idx
107 |         return Undef
108 | 
109 |     def mem(self):
110 |         return self._Mem
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/Core/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/Python/Ruikowa/ErrorFamily.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Oct 14 19:28:51 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | from pprint import pprint
 9 | from .color import Colored
10 | 
11 | if False:
12 |     from .ObjectRegex.MetaInfo import MetaInfo
13 |     from typing import Sequence, Optional
14 |     from .ObjectRegex.Tokenizer import Tokenizer
15 | 
16 | use_py_error = False
17 | use_py_warnings = False
18 | 
19 | import warnings
20 | 
21 | 
22 | class ObjectUsageError(Exception):
23 |     pass
24 | 
25 | 
26 | class CheckConditionError(Exception):
27 |     pass
28 | 
29 | 
30 | class UnsolvedError(Exception):
31 |     pass
32 | 
33 | 
34 | class DSLSyntaxError(SyntaxError):
35 |     pass
36 | 
37 | 
38 | if use_py_warnings:
39 |     Warnings = warnings
40 | else:
41 |     class Warnings:
42 |         @classmethod
43 |         def warn(cls, *msg):
44 |             print(Colored.LightBlue, 'UserWarning:', *msg)
45 | 
46 | if use_py_error:
47 |     class Error:
48 |         def __init__(self, *args):
49 |             print(Colored.Purple, '{}: '.format(self.__class__.__name__), *args)
50 |             raise Exception(self.__class__.__name__)
51 | else:
52 |     Error = Exception
53 | 
54 | 
55 | class UnsupportedStringPrefix(Error):
56 |     def __init__(self, mode, msg=''):
57 |         Error.__init__(self,
58 |                        '\n' + msg + '\n' +
59 |                        Colored.LightBlue + "Unsupported string prefix " + Colored.Red + '{}'
60 |                        .format(mode) + Colored.LightBlue + "." + Colored.Clear)
61 | 
62 | 
63 | def find_location(filename, where: 'Tokenizer', src_code: str = None):
64 |     if src_code:
65 |         row = src_code.splitlines()[where.lineno]
66 |     else:
67 |         row = ''
68 | 
69 |     return "{}{}{}{} ---- at file {} line {}".format(Colored.Green, row[:where.colno], Colored.Red, row[where.colno:],
70 |                                                      filename, where.lineno + 1) + Colored.Clear
71 | 
72 | 
73 | class UniqueNameConstraintError(Error):
74 |     def __init__(self, name, msg=''):
75 |         Error.__init__(self,
76 |                        '\n' + msg + '\n' +
77 |                        Colored.Blue + "Name " + Colored.Red + '{}'
78 |                        .format(name) + Colored.Blue + "should be unique." + Colored.Clear)
79 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/ErrorHandler.py:
--------------------------------------------------------------------------------
 1 | if False:
 2 |     from .ObjectRegex.MetaInfo import MetaInfo
 3 |     from typing import Sequence, Optional
 4 |     from .ObjectRegex.Tokenizer import Tokenizer
 5 | from .ErrorFamily import *
 6 | from pprint import pprint
 7 | 
 8 | 
 9 | class ErrorHandler:
10 | 
11 |     def __init__(self, parse_func, token_func=None):
12 |         self.parse_func = parse_func
13 |         self.token_func = token_func
14 | 
15 |     def mut_parser_by(self, new_func):
16 |         self.parse_func = new_func(self.parse_func)
17 | 
18 |     def mut_token_by(self, new_func):
19 |         self.token_func = new_func(self.token_func)
20 | 
21 |     def from_file(self, filename: str, meta: 'MetaInfo' = None, partial=False, print_token=False):
22 |         with open(filename, 'r', encoding='utf8') as f:
23 |             raw_string = f.read()
24 |         return self.from_source_code(filename, raw_string, meta, partial, print_token)
25 | 
26 |     def from_source_code(self, filename: str, src_code: str, meta: 'MetaInfo' = None, partial=False, print_token=False):
27 |         tokens: 'Sequence[Tokenizer]' = tuple(self.token_func(src_code))
28 |         if print_token:
29 |             pprint(tokens)
30 |         return self.from_tokens(filename, src_code, tokens, meta, partial)
31 | 
32 |     def from_tokens(self, filename: str, src_code: str, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', partial=False):
33 |         if meta is None:
34 |             from .ObjectRegex.MetaInfo import MetaInfo
35 |             meta = MetaInfo(fileName=filename)
36 | 
37 |         if not meta:
38 |             raise CheckConditionError("Meta Information not defined yet!")
39 | 
40 |         res = self.parse_func(tokens, meta=meta)
41 |         if res is None or (not partial and len(tokens) != meta.count):
42 |             max_fetched = meta.max_fetched
43 |             try:
44 |                 where = tokens[max_fetched]
45 |             except IndexError:
46 |                 for i in range(max_fetched - 1, meta.count, -1):
47 |                     try:
48 |                         where = tokens[i]
49 |                         break
50 |                     except:
51 |                         continue
52 |                 else:
53 |                     raise DSLSyntaxError(
54 |                         f"totally wrong syntax!(first word has been wrong!)")
55 | 
56 |             row = src_code.splitlines()[where.lineno]
57 |             raise DSLSyntaxError(
58 |                 "{}{}{}{} ---- at file {} line {}{}"
59 |                     .format(Colored.Green, row[:where.colno - 1], Colored.Red,
60 |                             row[where.colno - 1:],
61 |                             filename, where.lineno + 1, Colored.Clear))
62 |         return res
63 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/ObjectRegex/ASTDef.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Oct 14 19:23:04 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | 
 9 | from .Tokenizer import Tokenizer
10 | from typing import List, Union, Sequence, Iterator, Collection
11 | 
12 | if False:
13 |     from .MetaInfo import MetaInfo
14 | 
15 | INDENT_UNIT = ' ' * 4
16 | 
17 | 
18 | class Ast(list):
19 |     # List[Union[Tokenizer, Ast]]
20 | 
21 |     def __init__(self, meta: 'MetaInfo', name: str):
22 |         list.__init__(self)
23 |         self.name = name
24 |         self.meta = meta
25 | 
26 |     def appendleft(self, obj):
27 |         self.reverse()
28 |         self.append(obj)
29 |         self.reverse()
30 | 
31 |     def __iter__(self) -> 'Iterator[Union[Tokenizer, Ast]]':
32 |         return list.__iter__(self)
33 | 
34 |     def __getitem__(self, item) -> 'Union[Tokenizer, Ast]':
35 |         return list.__getitem__(self, item)
36 | 
37 |     def __str__(self):
38 |         return self.dump()
39 | 
40 |     def dump(self, indent=0):
41 |         next_indent = indent + 1
42 |         return """{INDENT}{NAME}[
43 | {CONTENT}
44 | {INDENT}]""".format(INDENT=INDENT_UNIT * indent,
45 |                     NAME=self.name,
46 |                     CONTENT='\n'.join(
47 |                         node.dump(next_indent)
48 |                         if isinstance(node, Ast) else \
49 |                             "{NEXT_INDENT}{STR}".format(NEXT_INDENT=INDENT_UNIT * next_indent, STR=node)
50 | 
51 |                         for node in self
52 |                     ))
53 | 
54 |     def dump_to_json(self):
55 |         return dict(name=self.name,
56 |                     value=tuple(node.dump_to_json() for node in self))
57 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/ObjectRegex/MetaInfo.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Oct 14 18:54:45 2017
  5 | 
  6 | @author: misakawa
  7 | """
  8 | from ..Core.BaseDef import *
  9 | 
 10 | 
 11 | class MetaInfo:
 12 |     """
 13 |     Meta information when parsing.
 14 | 
 15 |         `count` is a property of MetaInfo.
 16 |         It shows that how many tokenized(words) have been parsed,
 17 |             which could be used for
 18 |                 - Alerting.
 19 |                 - Eliminating left recursions.
 20 | 
 21 |         `trace` is a property of MetaInfo.
 22 |         It shows a trace of recursive BNF Nodes,
 23 |             which could be used for
 24 |                 - Debugging.
 25 |                 - Eliminating left recursions.
 26 | 
 27 |         `rdx` is a property of MetaInfo.
 28 |         It shows how many lines have beeb parsed now.
 29 |             which could be used for
 30 |                 - Alerting.
 31 |                 - Debugging.
 32 | 
 33 |         `fileName` is also a property of MetaInfo.
 34 |         It suggests which file the parser works on.
 35 | 
 36 |     """
 37 | 
 38 |     def __init__(self, count=0, trace=None, fileName=None):
 39 | 
 40 |         self.count = count
 41 |         if trace:
 42 |             self.trace = trace
 43 |         else:
 44 |             self.trace = Trace()
 45 |             self.trace.append(Trace())
 46 |         # self.history = []
 47 |         self.fileName = fileName if fileName else "<input>"
 48 | 
 49 |     def new(self):
 50 |         self.count += 1
 51 |         self.trace.new(Trace)
 52 | 
 53 |     def commit(self):
 54 |         return self.count, self.trace[self.count].length
 55 | 
 56 |     def rollback(self, history):
 57 |         count, length = history
 58 |         self.count = count
 59 |         self.trace.length = count + 1
 60 |         self.trace[count].length = length
 61 | 
 62 |     def clone(self):
 63 |         """
 64 |         Get a copy of
 65 |                     (RowIdx,
 66 |                      NumberOfParsedWords,
 67 |                      FileName)
 68 |                     from current meta information.
 69 |         """
 70 |         return self.count, self.fileName
 71 | 
 72 |     def __str__(self):
 73 |         return """
 74 | --------------------
 75 | COUNT   : {COUNT}
 76 | TRACE   :
 77 | {TRACE}
 78 | --------------------
 79 | """.format(COUNT=self.count,
 80 |            TRACE='\n'.join(
 81 |                ['[' + (','.join([item.name for item in unit])) + ']' for unit in self.trace])
 82 |            )
 83 | 
 84 |     @property
 85 |     def max_fetched(self):
 86 |         return self.trace.mem()
 87 | 
 88 | 
 89 | """
 90 | use list as trace
 91 | """
 92 | # class MetaInfo:
 93 | #     def __init__(self, count=0, rdx=0, trace=None, fileName=None):
 94 | #
 95 | #         self.count = count
 96 | #         if trace:
 97 | #             self.trace = trace
 98 | #         else:
 99 | #             self.trace = [[]]
100 | #         self.rdx   = rdx
101 | #         self.history  = []
102 | #         self.fileName = fileName if fileName else "<input>"
103 | #
104 | #     def branch(self):
105 | #         """
106 | #         Save a record of parsing history in order to trace back.
107 | #         """
108 | #         self.history.append((self.count, self.rdx, len(self.trace[self.count]) ))
109 | #     def rollback(self):
110 | #         """
111 | #         Trace back.
112 | #         """
113 | #
114 | #         try:
115 | #             count, rdx, length = self.history.pop()
116 | #         except IndexError:
117 | #             return None
118 | #
119 | #         self.count = count
120 | #         self.rdx   = rdx
121 | #         self.trace[count] = self.trace[count][:length]
122 | #
123 | #     def pull(self):
124 | #         """
125 | #         Confirm the current parsing results.
126 | #         Pop a record in parsing history.
127 | #         """
128 | #         try:
129 | #             self.history.pop()
130 | #         except IndexError:
131 | #             raise Exception("pull no thing")
132 | #
133 | #     def new(self):
134 | #         self.count += 1
135 | #         self.trace.append([])
136 | #
137 | #     def clone(self):
138 | #         """
139 | #         Get a copy of
140 | #                     (RowIdx,
141 | #                      NumberOfParsedWords,
142 | #                      FileName)
143 | #                     from current meta information.
144 | #         """
145 | #         return (self.rdx, self.count, self.fileName)
146 | #
147 | #         def __str__(self):
148 | #             return """
149 | #     --------------------
150 | #     COUNT   : {COUNT}
151 | #     ROW_IDX : {ROW_DIX}
152 | #     TRACE   :
153 | #     {TRACE}
154 | #     --------------------
155 | #     """.format(COUNT=self.count,
156 | #                ROW_DIX=self.rdx,
157 | #                TRACE='\n'.join(
158 | #                    ['[' + (','.join([item.name for item in unit])) + ']' for unit in self.trace])
159 | #                )
160 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/ObjectRegex/Node.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Oct 14 18:53:53 2017
  5 | 
  6 | @author: misakawa
  7 | """
  8 | from abc import ABC, abstractmethod
  9 | from typing import Union, List, Tuple, Collection
 10 | from ..Core.BaseDef import *
 11 | from .MetaInfo import MetaInfo
 12 | from ..ErrorFamily import *
 13 | from .ASTDef import Ast
 14 | from .Optimize import optimize
 15 | from .Tokenizer import unique_lit_name, unique_lit_value, unique_literal_cache_pool, Tokenizer
 16 | from ..Config import Debug
 17 | 
 18 | if Debug:
 19 |     from ..Tools import function_debugger
 20 | 
 21 |     DEBUG_INDENT = 1
 22 |     debugger = function_debugger('tag', 'content')
 23 | 
 24 | 
 25 | class Ignore:
 26 |     Value = 0
 27 |     Name = 1
 28 | 
 29 | 
 30 | def debug(msg):
 31 |     def wrap(func):
 32 |         def call(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', *args, **kwargs):
 33 |             global DEBUG_INDENT
 34 |             if not isinstance(self, AstParser):
 35 |                 now = tokens[meta.count]
 36 |                 if hasattr(self, 'mode'):
 37 |                     profile = f'{self.name}[{self.mode}] matching {now}'
 38 |                 else:
 39 |                     profile = f'{self.name} matching {now}'
 40 |             else:
 41 |                 profile = self.name
 42 |             print(Colored.Purple2,
 43 |                   debugger(dict(
 44 |                       tag=f'start {self.__class__.__name__}',
 45 |                       Profile=profile,
 46 |                       Name=self.name,
 47 |                       content=msg,
 48 |                       Meta=meta.count),
 49 |                       indent=DEBUG_INDENT * 4,
 50 |                       inc_indent=2),
 51 |                   Colored.Clear, '\n')
 52 | 
 53 |             DEBUG_INDENT += 1
 54 |             res = func(self, tokens, meta, *args, **kwargs)
 55 |             DEBUG_INDENT -= 1
 56 | 
 57 |             print(Colored.LightBlue,
 58 |                   debugger(
 59 |                       dict(
 60 |                           tag=f'end {self.__class__.__name__}',
 61 |                           Name=self.name,
 62 |                           Profile=profile,
 63 |                           content=msg,
 64 |                           Return=True if res else False,
 65 |                           Meta=meta.count),
 66 |                       indent=DEBUG_INDENT * 4,
 67 |                       inc_indent=1),
 68 |                   Colored.Clear, '\n')
 69 | 
 70 |             return res
 71 | 
 72 |         return call if Debug else func
 73 | 
 74 |     return wrap
 75 | 
 76 | 
 77 | ParserCollections = 'Union[LiteralNameParser, LiteralNameValueParser, LiteralValueParser, AstParser, SeqParser]'
 78 | 
 79 | 
 80 | def parser_name_helper(
 81 |         pattern: 'ParserCollections'):
 82 |     if pattern.__class__ is LiteralNameValueParser:
 83 |         return f"{pattern.name}['{pattern.mode}']"
 84 |     elif pattern.__class__ is LiteralValueParser:
 85 |         return f"'{pattern.mode}'"
 86 |     else:
 87 |         return pattern.name
 88 | 
 89 | 
 90 | class BaseParser(ABC):
 91 |     """Abstract Class"""
 92 |     name = Undef
 93 |     has_recur = Undef
 94 | 
 95 |     @abstractmethod
 96 |     def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef):
 97 |         """Abstract Method"""
 98 |         raise NotImplemented
 99 | 
100 | 
101 | class LiteralNameParser(BaseParser):
102 |     """
103 |     To parse tokenizer with specific name.
104 |     for regex exp
105 |     """
106 | 
107 |     def __init__(self, name):
108 |         self.name = name
109 | 
110 |     def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef):
111 |         try:
112 |             value: 'Tokenizer' = tokens[meta.count]
113 |         except IndexError:
114 |             return Const.UnMatched
115 |         if value.name is self.name:
116 |             meta.new()
117 |             return value
118 |         return Const.UnMatched
119 | 
120 | 
121 | class LiteralValueParser(BaseParser):
122 |     """
123 |     for const char*
124 |     """
125 | 
126 |     def __init__(self, mode):
127 |         self.name = self.mode = mode
128 | 
129 |     def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef):
130 |         try:
131 |             value: 'Tokenizer' = tokens[meta.count]
132 |         except IndexError:
133 |             return Const.UnMatched
134 |         if value.string is self.mode:
135 |             meta.new()
136 |             return value
137 |         return Const.UnMatched
138 | 
139 | 
140 | class LiteralNameValueParser(BaseParser):
141 |     """
142 |     for const char* and its group name
143 |     """
144 | 
145 |     def __init__(self, name, mode):
146 |         self.name = name
147 |         self.mode = mode
148 | 
149 |     @debug('literal name value')
150 |     def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef):
151 |         try:
152 |             value: 'Tokenizer' = tokens[meta.count]
153 |         except IndexError:
154 |             return Const.UnMatched
155 |         if value.name is self.name and value.string is self.mode:
156 |             meta.new()
157 |             return value
158 |         return Const.UnMatched
159 | 
160 | 
161 | class Ref(BaseParser):
162 |     def __init__(self, name):
163 |         self.name = unique_literal_cache_pool[name]
164 | 
165 |     def match(self, tokens: 'Sequence[Tokenizer]', meta: 'MetaInfo', recur: 'Recur' = Undef):
166 |         raise NotImplemented
167 | 
168 | 
169 | class AstParser(BaseParser):
170 | 
171 |     def __init__(self, *cases, name=Undef, to_ignore=Undef):
172 |         # each in the cache will be processed into a parser.
173 |         cases = tuple(
174 |             tuple(
175 |                 LiteralValueParser(each) if isinstance(each, str) else
176 |                 LiteralNameValueParser(each[0], each[1]) if isinstance(each, tuple) else
177 |                 each
178 |                 for each in p)
179 |             for p in cases)
180 |         self.cache: 'Tuple[Tuple[ParserCollections]]' = optimize(cases)
181 | 
182 |         # the possible output types for an series of input tokenized words.
183 |         self.possibilities = []
184 | 
185 |         # whether this parser will refer to itself.
186 |         self.has_recur = False
187 | 
188 |         # the identity of a parser.
189 | 
190 |         self.name = name if name is not Undef else \
191 |             ' | '.join(
192 |                 ' '.join(
193 |                     map(parser_name_helper, case)) for case in cases)
194 | 
195 |         # is this parser compiled, must be False when initializing.
196 |         self.compiled = False
197 | 
198 |         #  if a parser's name is in this set, the result it output will be ignored when parsing.
199 |         self.to_ignore = to_ignore
200 | 
201 |     def compile(self, namespace: dict, recur_searcher: set):
202 |         if self.name in recur_searcher:
203 |             self.has_recur = True
204 |             self.compiled = True
205 |         else:
206 |             recur_searcher.add(self.name)
207 | 
208 |         if self.compiled:
209 |             return self
210 | 
211 |         for es in self.cache:
212 |             self.possibilities.append([])
213 | 
214 |             for e in es:
215 | 
216 |                 if e.__class__ is LiteralNameParser:
217 | 
218 |                     if e.name not in namespace:
219 |                         unique_lit_name(e)
220 |                         namespace[e.name] = e
221 | 
222 |                     else:
223 |                         e = namespace[e.name]
224 | 
225 |                     self.possibilities[-1].append(e)
226 | 
227 |                 elif e.__class__ is LiteralValueParser:
228 |                     literal = parser_name_helper(e)
229 | 
230 |                     if literal not in namespace:
231 |                         unique_lit_value(e)
232 |                         namespace[literal] = e
233 | 
234 |                     else:
235 |                         e = namespace[literal]
236 | 
237 |                     self.possibilities[-1].append(e)
238 | 
239 |                 elif e.__class__ is LiteralNameValueParser:
240 |                     name_literal = parser_name_helper(e)
241 | 
242 |                     if name_literal not in namespace:
243 |                         unique_lit_value(e)
244 |                         unique_lit_name(e)
245 |                         namespace[name_literal] = e
246 |                     else:
247 |                         e = namespace[name_literal]
248 | 
249 |                     self.possibilities[-1].append(e)
250 | 
251 |                 elif e.__class__ is Ref:
252 |                     e = namespace[e.name]
253 | 
254 |                     if isinstance(e, AstParser):
255 |                         e.compile(namespace, recur_searcher)
256 | 
257 |                     self.possibilities[-1].append(e)
258 | 
259 |                     if not self.has_recur and e.has_recur:
260 |                         self.has_recur = True
261 | 
262 |                 else:
263 |                     if e.name not in namespace:
264 |                         unique_lit_name(e)
265 |                         namespace[e.name] = e
266 |                     else:
267 |                         e = namespace[e.name]
268 | 
269 |                     e.compile(namespace, recur_searcher)
270 |                     self.possibilities[-1].append(e)
271 | 
272 |                     if not self.has_recur and e.has_recur:
273 |                         self.has_recur = True
274 | 
275 |         if hasattr(self, 'cache'):
276 |             del self.cache
277 | 
278 |         if self.name in recur_searcher:
279 |             recur_searcher.remove(self.name)
280 | 
281 |         if not self.compiled:
282 |             self.compiled = True
283 | 
284 |     @debug("match")
285 |     def match(self, tokens, meta: 'MetaInfo', recur: 'Recur' = Undef):
286 |         if self.has_recur and self in meta.trace[meta.count]:
287 |             if isinstance(self, SeqParser) or recur is self:
288 |                 return Const.UnMatched
289 | 
290 |             raise RecursiveFound(self)
291 |         history = meta.commit()
292 |         if self.has_recur:
293 |             meta.trace[meta.count].append(self)
294 | 
295 |         for possibility in self.possibilities:
296 |             result = self.pattern_match(tokens, meta, possibility, recur=recur)
297 |             if result is Const.UnMatched:
298 |                 meta.rollback(history)
299 |                 continue
300 |             elif isinstance(result, Ast):
301 |                 break
302 |             elif isinstance(result, RecursiveFound):
303 |                 meta.rollback(history)
304 |                 break
305 |         else:
306 |             return Const.UnMatched
307 | 
308 |         return result
309 | 
310 |     def pattern_match(self, tokens, meta, possibility, recur=Undef):
311 | 
312 |         try:  # Not recur
313 |             result = Ast(meta.clone(), self.name)
314 |             for parser in possibility:
315 |                 r = parser.match(tokens, meta=meta, recur=recur)
316 |                 # if `result` is still empty, it might not allow LR now.
317 |                 if isinstance(r, Tokenizer) or isinstance(r, Ast):
318 |                     result_merge(result, r, parser, self.to_ignore)
319 | 
320 |                 elif r is Const.UnMatched:
321 |                     return Const.UnMatched
322 | 
323 |                 elif isinstance(r, RecursiveFound):
324 |                     raise r
325 | 
326 |                 else:
327 |                     raise UnsolvedError("Unsolved return type. {}".format(r.__class__))
328 |             else:
329 |                 return result
330 | 
331 |         except RecursiveFound as RecurInfo:
332 |             parser: 'ParserCollections'
333 |             RecurInfo.add((self, possibility[possibility.index(parser) + 1:]))
334 | 
335 |             # RecurInfo has a trace of Beginning Recur Node to Next Recur Node with
336 |             # specific possibility.
337 |             if RecurInfo.node is not self:
338 |                 return RecurInfo
339 | 
340 |             return left_recursion(tokens, meta, possibility, RecurInfo)
341 | 
342 | 
343 | def result_merge(result, r, parser, to_ignore):
344 |     if parser.__class__ is SeqParser or parser.__class__ is AccompaniedAstParser:
345 | 
346 |         if to_ignore is Undef:
347 |             result.extend(r)
348 |         else:
349 |             result.extend([item for item in r if
350 |                            ((item.string not in to_ignore[Const.RawFilter]
351 |                              and item.name not in to_ignore[Const.NameFilter]
352 |                              ) if item.__class__ is Tokenizer else (
353 |                                    item.name not in to_ignore[Const.NameFilter]))])
354 |     else:
355 |         if to_ignore is Undef:
356 |             result.append(r)
357 |         else:
358 |             if r.__class__ is Tokenizer:
359 |                 if r.string not in to_ignore[Const.RawFilter] and r.name not in to_ignore[Const.NameFilter]:
360 |                     result.append(r)
361 |             elif r.name not in to_ignore[Const.NameFilter]:
362 |                 result.append(r)
363 | 
364 | 
365 | def left_recursion(cases, meta: 'MetaInfo', recur_case, recur_info):
366 |     recur = recur_info.node
367 |     for case in recur.possibilities:
368 |         if case is recur_case:
369 |             continue
370 | 
371 |         very_first = recur.pattern_match(cases, meta, case, recur=recur)
372 |         if isinstance(very_first, RecursiveFound) or very_first is Const.UnMatched:
373 |             continue
374 |         else:
375 |             history = meta.commit()
376 |             first = very_first
377 |             recur_depth_count = 0
378 |             while True:
379 |                 for parser, possibility in recur_info.possibilities:
380 |                     result = parser.pattern_match(cases, meta, possibility, recur=recur)
381 |                     if result is Const.UnMatched:
382 |                         meta.rollback(history)
383 |                         return Const.UnMatched if recur_depth_count is 0 else very_first
384 |                     elif isinstance(result, Ast):
385 |                         result.appendleft(first)
386 |                     elif isinstance(result, RecursiveFound):
387 |                         raise UnsolvedError("Error occurs : found a new left recursion when handling an other.")
388 |                     else:
389 |                         raise UnsolvedError("Unsolved return from method `patternMatch`.")
390 |                     first = result
391 |                 recur_depth_count += 1
392 |                 very_first = first
393 |     else:
394 |         # Fail to match any case.
395 |         return Const.UnMatched
396 | 
397 | 
398 | class AccompaniedAstParser(AstParser):
399 |     pass
400 | 
401 | 
402 | class SeqParser(AstParser):
403 | 
404 |     def __init__(self, *cases, name=Undef, at_least=0, at_most=Undef):
405 |         super(SeqParser, self).__init__(*cases, name=name)
406 | 
407 |         if at_most is Undef:
408 |             if at_least is 0:
409 |                 self.name = f"({self.name})*"
410 |             else:
411 |                 self.name = f'({self.name}){{{at_least}}}'
412 |         else:
413 |             self.name = f"({self.name}){{{at_least},{at_most}}}"
414 | 
415 |         self.at_least = at_least
416 |         self.at_most = at_most
417 | 
418 |     def match(self, tokens, meta: 'MetaInfo', recur=Undef):
419 | 
420 |         result = Ast(meta.clone(), self.name)
421 | 
422 |         if meta.count == len(tokens):  # boundary cases
423 |             if self.at_least is 0:
424 |                 return result
425 |             return Const.UnMatched
426 | 
427 |         history = meta.commit()
428 |         matched_num = 0
429 |         if self.at_most is not Undef:
430 |             """ (ast){a b} """
431 |             while True:
432 |                 if matched_num >= self.at_most:
433 |                     break
434 |                 try:
435 |                     r = AstParser.match(self, tokens, meta=meta, recur=recur)
436 |                 except IndexError:
437 |                     break
438 | 
439 |                 if r is Const.UnMatched:
440 |                     break
441 | 
442 |                 elif isinstance(r, RecursiveFound):
443 |                     raise UnsolvedError("Cannot make left recursions in SeqParser!!!")
444 | 
445 |                 result.extend(r)
446 |                 matched_num += 1
447 |         else:
448 |             """ ast{a} | [ast] | ast* """
449 |             while True:
450 |                 try:
451 |                     r = AstParser.match(self, tokens, meta=meta, recur=recur)
452 |                 except IndexError:
453 |                     break
454 | 
455 |                 if r is Const.UnMatched:
456 |                     break
457 | 
458 |                 elif isinstance(r, RecursiveFound):
459 |                     raise UnsolvedError("Cannot make left recursions in SeqParser!!!")
460 | 
461 |                 result.extend(r)
462 |                 matched_num += 1
463 | 
464 |         if matched_num < self.at_least:
465 |             meta.rollback(history)
466 |             return Const.UnMatched
467 | 
468 |         return result
469 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/ObjectRegex/Optimize.py:
--------------------------------------------------------------------------------
 1 | def analyze(cases):
 2 |     from .Node import LiteralValueParser, LiteralNameValueParser
 3 |     if len(cases) is 1 or not all(cases):
 4 |         return None
 5 | 
 6 |     groups = dict()
 7 |     group_order = []
 8 | 
 9 |     for case in cases:
10 |         head = case[0]
11 |         if isinstance(head, LiteralValueParser):
12 |             group_id = "value:" + head.mode
13 |         elif isinstance(head, LiteralNameValueParser):
14 |             group_id = f'ref: {head.name} value: {head.mode}'
15 |         else:
16 |             group_id = "ref:" + head.name
17 | 
18 |         if group_id not in group_order:
19 | 
20 |             groups[group_id] = [case]
21 |             group_order.append(group_id)
22 |         else:
23 |             groups[group_id].append(case)
24 | 
25 |     if len(group_order) is 1:
26 |         return None
27 | 
28 |     return groups, group_order
29 | 
30 | 
31 | def grammar_remake(groups, group_order):
32 |     from .Node import AccompaniedAstParser
33 |     return tuple(
34 |         (
35 |             (groups[groupId][0][0],
36 |              AccompaniedAstParser(*[case[1:] for case in groups[groupId]])
37 |              )
38 |             if len(groups[groupId]) > 1 else groups[groupId][0]
39 |         )
40 |         for groupId in group_order)
41 | 
42 | 
43 | def optimize(cases):
44 |     analyzed = analyze(cases)
45 |     if analyzed is None:
46 |         return cases
47 |     groups, group_order = analyzed
48 |     return grammar_remake(groups, group_order)
49 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/ObjectRegex/Tokenizer.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     from typing import Iterable, Tuple, List, Dict, Set
  3 | 
  4 |     if False:
  5 |         from re import __Regex
  6 | except ModuleNotFoundError:
  7 |     pass
  8 | 
  9 | import re
 10 | import json
 11 | import linq
 12 | from collections import defaultdict
 13 | from ..ErrorFamily import UniqueNameConstraintError
 14 | from ..ErrorHandler import Colored, Warnings as warnings
 15 | 
 16 | 
 17 | class Mode:
 18 |     regex = 0
 19 |     keyword = const = 1
 20 |     char = 2
 21 | 
 22 | 
 23 | class TokenSpec:
 24 |     def __init__(self):
 25 |         self.enums: 'Dict[str, str]' = {}
 26 |         # enum name -> const string
 27 | 
 28 |         self.tokens: 'List[Tuple[str, int, str]]' = []
 29 | 
 30 |     def to_token_table(self, indent=15):
 31 |         generated_tokens = set()
 32 |         _join = f',\n{" "*indent}'.join
 33 |         if not self.tokens:
 34 |             return '()'
 35 |         groups = linq.Flow(self.tokens).Group(lambda name, mode, string: (name, mode if mode is not Mode.regex else string)).Unboxed()
 36 | 
 37 |         def make_each(group: 'List[Tuple[str, int, str]]'):
 38 |             name, mode, string = group.__iter__().__next__()
 39 |             if mode is Mode.regex:
 40 |                 return '(unique_literal_cache_pool["{name}"], regex_matcher({string}))'.format(name=name, string=string)
 41 | 
 42 |             modes = []
 43 |             for _, _, string in group:
 44 | 
 45 |                 tp = (name, string)
 46 |                 if tp not in generated_tokens:
 47 |                     modes.append(string)
 48 |                     generated_tokens.add(tp)
 49 | 
 50 |             if not modes:
 51 |                 return None
 52 | 
 53 |             match_mode = ', '.join(sorted(modes, reverse=True))
 54 | 
 55 |             if mode is Mode.char:
 56 |                 return '(unique_literal_cache_pool["{}"], char_matcher(({})))'.format(name, match_mode)
 57 | 
 58 |             return '(unique_literal_cache_pool["{}"], str_matcher(({})))'.format(name, match_mode)
 59 | 
 60 |         token_items = linq.Flow(groups).Map(make_each).Filter(lambda x: x).Then(_join).Unboxed()
 61 |         return '({},)'.format(token_items)
 62 | 
 63 |     def to_name_enum(self):
 64 | 
 65 |         if not self.enums:
 66 |             return ""
 67 |         indent = f'\n{" "*4}'
 68 |         _join = indent.join
 69 | 
 70 |         name_enums = linq.Flow(
 71 |             self.enums.items()
 72 |         ).Map(
 73 |             lambda name, string: f"{name} = unique_literal_cache_pool[{string}]"
 74 |         ).Then(
 75 |             _join
 76 |         ).Unboxed()
 77 | 
 78 |         enum_class_spec = """
 79 | class UNameEnum:
 80 | # names
 81 | {}{}
 82 |         """.format(indent,
 83 |                    name_enums)
 84 | 
 85 |         return enum_class_spec
 86 | 
 87 | 
 88 | class Tokenizer:
 89 |     def __init__(self, name: str, string: str, lineno: int, colno: int):
 90 |         self.name = name
 91 |         self.lineno = lineno
 92 |         self.colno = colno
 93 |         self.string = string
 94 | 
 95 |     def dump_to_json(self):
 96 |         return dict(name=self.name, string=self.string, lineno=self.lineno, colno=self.colno)
 97 | 
 98 |     def dump(self):
 99 |         return self.__str__()
100 | 
101 |     def __repr__(self):
102 |         return f'[name: {self.name}, string: "{self.string}", lineno: {self.lineno}, colno: {self.colno}]'
103 | 
104 |     def __str__(self):
105 | 
106 |         return '[name: {}, string: "{}"]'.format(self.name, self.string)
107 | 
108 |     @staticmethod
109 |     def from_raw_strings(raw_string: str, token_table: 'Iterable', to_ignore=({}, {}), cast_map: dict = None):
110 |         if cast_map is None:
111 |             cast_map = {}
112 | 
113 |         if not raw_string:
114 |             return ()
115 |         lineno = 0
116 |         colno = 0
117 |         pos = 0
118 |         n = len(raw_string)
119 |         while True:
120 |             for name, pat in token_table:
121 |                 w = pat(raw_string, pos)
122 |                 if w:
123 |                     row_inc = w.count('\n')
124 |                     length = len(w)
125 | 
126 |                     if row_inc:
127 |                         lineno += row_inc
128 |                         colno = length - w.rfind('\n') - 1
129 |                     else:
130 |                         colno += length
131 | 
132 |                     pos += length
133 | 
134 |                     if name not in to_ignore[0] and w not in to_ignore[1]:
135 |                         if w in cast_map:
136 |                             name = cast_map[w]
137 |                             w = unique_literal_cache_pool[w]
138 |                             yield Tokenizer(name, w, lineno, colno)
139 |                         else:
140 |                             yield Tokenizer(unique_literal_cache_pool[name], w, lineno, colno)
141 | 
142 |                     if n == pos:
143 |                         return
144 |                     break
145 | 
146 |             else:
147 |                 warnings.warn('no token def {}'.format(raw_string[pos].encode()))
148 |                 if raw_string[pos] is '\n':
149 |                     colno = 0
150 |                     lineno += 1
151 |                 else:
152 |                     colno += 1
153 |                 pos += 1
154 |                 if n == pos:
155 |                     return
156 |                 break
157 | 
158 | 
159 | def char_matcher(mode):
160 |     """
161 |     a faster way for characters to generate token strings cache
162 |     """
163 | 
164 |     def f_raw(inp_str, pos):
165 |         return mode if inp_str[pos] is mode else None
166 | 
167 |     def f_collection(inp_str, pos):
168 |         ch = inp_str[pos]
169 |         for each in mode:
170 |             if ch is each:
171 |                 return ch
172 |         return None
173 | 
174 |     if isinstance(mode, str):
175 |         return f_raw
176 | 
177 |     if len(mode) is 1:
178 |         mode = mode[0]
179 |         return f_raw
180 | 
181 |     return f_collection
182 | 
183 | 
184 | def str_matcher(mode):
185 |     """
186 |     generate token strings' cache
187 |     """
188 | 
189 |     def f_raw(inp_str, pos):
190 |         return unique_literal_cache_pool[mode] if inp_str.startswith(mode, pos) else None
191 | 
192 |     def f_collection(inp_str, pos):
193 |         for each in mode:
194 |             if inp_str.startswith(each, pos):
195 |                 return unique_literal_cache_pool[each]
196 |         return None
197 | 
198 |     if isinstance(mode, str):
199 |         return f_raw
200 | 
201 |     if len(mode) is 1:
202 |         mode = mode[0]
203 |         return f_raw
204 | 
205 |     return f_collection
206 | 
207 | 
208 | def regex_matcher(regex_pat):
209 |     """
210 |     generate token names' cache
211 |     :param regex_pat:
212 |     :return:
213 |     """
214 |     if isinstance(regex_pat, str):
215 |         regex_pat = re.compile(regex_pat)
216 | 
217 |     def f(inp_str, pos):
218 |         m = regex_pat.match(inp_str, pos)
219 |         return m.group() if m else None
220 | 
221 |     return f
222 | 
223 | 
224 | class UniqueLiteralCachePool:
225 |     def __init__(self, dictionary: dict):
226 |         self.content = dictionary
227 | 
228 |     def __getitem__(self, item):
229 |         try:
230 |             return self.content[item]
231 |         except KeyError:
232 |             self.content[item] = item
233 |             return item
234 | 
235 | 
236 | unique_literal_cache_pool = UniqueLiteralCachePool({})
237 | 
238 | 
239 | def unique_lit_name(obj):
240 |     if obj.name is not unique_literal_cache_pool[obj.name]:
241 |         obj.name = unique_literal_cache_pool[obj.name]
242 | 
243 | 
244 | def unique_lit_value(obj):
245 |     if obj.mode is not unique_literal_cache_pool[obj.mode]:
246 |         obj.mode = unique_literal_cache_pool[obj.mode]
247 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/ObjectRegex/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/Python/Ruikowa/Tools/__init__.py:
--------------------------------------------------------------------------------
 1 | import linq
 2 | 
 3 | try:
 4 |     from cytoolz import curry
 5 | except ModuleNotFoundError:
 6 |     from toolz import curry
 7 | 
 8 | 
 9 | @curry
10 | def function_debugger(tag: str, content: str, dictionary: dict, indent: int, inc_indent: int):
11 |     case_map = {tag: 1,
12 |                 content: 2}
13 | 
14 |     indent = " " * indent
15 |     inc_indent = f"{indent}" + " " * inc_indent;
16 | 
17 |     groups = linq.Flow(dictionary.items()).Map(lambda a, b: (a, b)).GroupBy(
18 |         lambda a, b: case_map.get(a, 0)).Unboxed()
19 | 
20 |     others = '\n'.join(map(lambda each: f"{inc_indent}<{each[0]}> {each[1]} </{each[0]}>", groups[0]))
21 | 
22 |     content = f"<{groups[2][0][1]}>"
23 | 
24 |     return (f"{indent}<{groups[1][0][1]}>\n"
25 |             f"{others}\n"
26 |             f'{inc_indent}{content}\n'
27 |             f"{indent}</{groups[1][0][1]}>")
28 | 


--------------------------------------------------------------------------------
/Python/Ruikowa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thautwarm/EBNFParser/101a92c4f408f9e6ce7b55aacb39cded9394521d/Python/Ruikowa/__init__.py


--------------------------------------------------------------------------------
/Python/Ruikowa/color.py:
--------------------------------------------------------------------------------
1 | class Colored:
2 |     Red = '\033[31m'
3 |     Green = '\033[32m'
4 |     Yellow = '\033[33m'
5 |     Blue = '\033[34m'
6 |     Purple = '\033[35m'
7 |     LightBlue = '\033[36m'
8 |     Clear = '\033[39m'
9 |     Purple2 = '\033[95m'


--------------------------------------------------------------------------------
/Python/Ruikowa/io.py:
--------------------------------------------------------------------------------
 1 | encodings = ('utf8', 'gb18030', 'latin1', 'gbk')
 2 | 
 3 | 
 4 | class grace_open:
 5 | 
 6 |     def __init__(self, filename):
 7 |         self.filename = filename
 8 | 
 9 |     def write(self, string: str):
10 |         for encoding in encodings:
11 |             try:
12 |                 with open(self.filename, 'w', encoding=encoding) as f:
13 |                     f.write(string)
14 |                 return self
15 |             except UnicodeEncodeError:
16 |                 continue
17 |         raise UnicodeEncodeError
18 | 
19 |     def read(self):
20 |         for encoding in encodings:
21 |             try:
22 |                 with open(self.filename, 'r', encoding=encoding) as f:
23 |                     return f.read()
24 |             except UnicodeEncodeError:
25 |                 continue
26 |         raise UnicodeEncodeError
27 | 


--------------------------------------------------------------------------------
/Python/release-note:
--------------------------------------------------------------------------------
  1 |    ## what's new in EBNFParser 0.1.2:
  2 | 	1.
  3 | 		separate parser with tokenizer.
  4 | 		You are allowed to whether to define tokenizer automatically in EBNF files.
  5 | 		You are allowed to define a funtion to be the tokenizer at the first line in a EBNF file
  6 | 		for instance:
  7 | 			using {{ lambda string:list(string) }} # or just list
  8 | 
  9 | 		And you are allowed to write a tokenizer definition in another file, for instance:
 10 | 
 11 | 		file: ./xxx.eebnf
 12 | 			using python.token
 13 | 		file: ./python/token
 14 | 			lambda x : string(x)
 15 | 		Take care that you can just write an expression!!!
 16 | 	2.
 17 | 		the meta information format has changed from
 18 | 
 19 | 			"meta":{"rowIdx":<rowIdx>, "count":<count>, "fileName":<fileName>}
 20 | 		to
 21 | 			"meta":[<rowIdx>, <count>, <fileName>]
 22 | 
 23 | 	Enjoy it:)
 24 | 
 25 | 
 26 |    ## what's new in EBNFParser 0.1.2.2:
 27 | 	
 28 | 	You're now allowed to define parsers named by chinese characters. 
 29 | 
 30 | 
 31 |    ## what's new in EBNFParser 0.1.3.1:
 32 | 	
 33 | 	There is a module named `token` which is in CPython STL.
 34 | 	As a result, I changed the name of a specific file generated by Parser Generator from `token.py` to `etoken.py`.
 35 |  
 36 |    ## what's new in EBNFParser 0.1.3.4(0.1.4):
 37 | 	
 38 | 	Fixed bugs for Windows users.
 39 | 	Now just use following command 
 40 | 
 41 | 	```
 42 | 	parserGenerator <grammarFile> <outputParser.py> 
 43 | 		-lang <yourLanguageName> 
 44 | 		-comment <hasComments?>
 45 | 		-multiline <hasMultilineDefinitions?>
 46 | 	```
 47 | 	to generate a parser.
 48 | 
 49 |    ## what's new in EBNFParser 0.2.0:
 50 | 	
 51 | 	I think that I have just found a fantastic method to solve left recursion problem.
 52 | 	The way to do it is just:
 53 | 
 54 | 		- mark the left-recursive parser node and store the tail of each epsilon production of this parser as TAIL.
 55 | 		```
 56 | 		a ::= a b c | a '=>' d | c
 57 | 		# TAIL = b c | '=>' d 
 58 | 		```
 59 | 		- when an ast named RESULT has been parsed, just
 60 | 
 61 | 			* make an new ast also named RESULT', take RESULT as the first elem in RESULT'!!!
 62 | 			* continue parsing by using TAIL.
 63 | 	
 64 | 	I will refactor this method sooner to make my codes more elegant.
 65 | 
 66 | 	- P.S
 67 | 		Sometimes I think my ideas might come from God's Revelation.
 68 | 		It seems so incredible for me to totally solve a part of Principles by myself.
 69 | 	
 70 | 	## what's new in EBNFParser 1.0:
 71 | 
 72 | 		- Totally support any kind of left recursion now.
 73 | 
 74 | 		- Some optimization on Bootstrap Compiler.
 75 | 			
 76 | 			for instance, the result of following one 
 77 | 				`a ::= b c d | b d e | b d f`
 78 | 			can be transformed to the same as
 79 | 				`a ::= b(c d | b d (e|f) )`
 80 | 		
 81 | 		- LiteralParser will get a result which `Type` is `str`, previously it got an `Ast`.
 82 | 
 83 | 			In terms of the following case:
 84 | 
 85 | 			``` test.txt
 86 | 
 87 | 				B := 'b'
 88 | 				A ::= B
 89 | 			
 90 | 			```
 91 | 
 92 | 			Let test the parser.
 93 | 
 94 | 				- Misakawa(Old version).
 95 | 
 96 | 					```
 97 | 					parserGenerator ./test.txt ./testParser.py 
 98 | 					python testLang.py A "b"
 99 | 					A[B['b']
100 | 					  ]
101 | 					```
102 | 				
103 | 				- Ruikowa
104 | 
105 | 					```
106 | 					ruiko ./test.txt ./testParser.py 
107 | 					python testLang.py A "b"
108 | 					A[
109 | 				      "b"
110 | 					 ]
111 | 					```
112 | 		- Support Python3.4+!
113 | 
114 | 	## what's new in EBNFParser 1.0.1:
115 | 
116 | 		- fix a bug in `Throw` syntax.
117 | 			
118 | 			The following syntax should define an ASTParser which will ignore the characters `'\n' and ','` in parsed results, however it used to ignore "'\\n'" and "','". 
119 | 			a Throw ['\n', ','] ::= ... 
120 | 		
121 | 			Now this problem has been fixed.
122 | 		
123 | 		
124 | 	## what's new in EBNFParser 1.0.3:
125 | 
126 | 		- fix a bug in `Throw` syntax:
127 | 
128 | 			There is a bug that ignoring the specific AST in previous versions could be incorrect. Fixed now.
129 | 	
130 | 	## what's new in EBNFParser 1.0.4:
131 | 
132 | 		- make `SyntaxError` caught by `handle_error` be a more specific `DSLSyntaxError`.
133 | 
134 |     ## what's new in EBNFParser 1.0.5:
135 | 
136 | 		- add a new api `MetaInfo.max_fetched` to get max possible parsed words count, for making interactive tools.
137 | 
138 | 	## quite a exciting step in EBNFParser 1.1
139 | 
140 | 	    - support escape literal now:
141 | 
142 | 	        single.Quote := '\'';
143 | 	    
144 | 
145 |     # EBNFParser 2.0
146 | 
147 |         Fast, powerful and human-friendly with intelligent and comfortable error alert.
148 | 
149 |         - all the matchings now could be made by address comparing(use `is` and `not`).
150 | 
151 |         - better auto-tokenizer which can handle every scene belonging to Context-Free Syntax.
152 | 
153 |         - use object tokenizer for precise error raising.
154 | 
155 |         - codes refactored and follow PEP8 strictly. More readable.
156 | 
157 | 
158 | 
159 | 
160 | 	# 2.0.9
161 | 
162 | 		Add custom literal prefixes.
163 | 
164 | 		```
165 | 		keyword as K := 'def' 'let';
166 | 
167 | 		some    ::= K'let' args '=' expr;
168 | 		```
169 | 
170 | 		Take care that you cannot overload `R` prefix.
171 | 		
172 | 			
173 | 			
174 | 
175 | 	# 2.1.1
176 | 
177 | 	make a convenience for the case that all combined parsers can reach each other.(Fixed parser compiler and bootstrap code-gen)
178 | 
179 | 
180 | 	
181 | 	 
182 | 
183 | 	
184 | 	
185 | 	
186 | 	
187 | 


--------------------------------------------------------------------------------
/Python/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Fri Oct  6 00:51:38 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | 
 9 | from setuptools import setup, find_packages
10 | 
11 | with open('./README.rst', encoding='utf-8') as f:
12 |     readme = f.read()
13 | 
14 | setup(name='EBNFParser',
15 |       version='2.1.3',
16 |       keywords='parser, parser framework, parser generator, gramamr, ast, tokenizer, EBNF, BNF',
17 |       description="very powerful and optional parser framework for python",
18 |       long_description=readme,
19 |       license='MIT',
20 |       url='https://github.com/thautwarm/EBNFParser',
21 |       author='thautwarm',
22 |       author_email='twshere@outlook.com',
23 |       include_package_data=True,
24 |       packages=['Ruikowa'],
25 |       entry_points={
26 |           'console_scripts': [
27 |               'ruiko=Ruikowa.Command:main']
28 |       },
29 |       install_requires=[
30 |           'Linq==0.3.1'
31 |       ],
32 |       platforms='any',
33 |       classifiers=[
34 |           'Programming Language :: Python :: 3.6',
35 |           'Programming Language :: Python :: 3.7',
36 |           'Programming Language :: Python :: Implementation :: CPython'],
37 |       zip_safe=False
38 |       )
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/thautwarm/EBNFParser.svg?branch=boating-new)](https://travis-ci.org/thautwarm/EBNFParser)
  2 | [![PyPI version](https://img.shields.io/pypi/v/EBNFParser.svg)](https://pypi.python.org/pypi/EBNFParser)
  3 | [![Release Note](https://img.shields.io/badge/note-release-orange.svg)](https://github.com/thautwarm/EBNFParser/blob/boating-new/Python/release-note)
  4 | [![MIT License](https://img.shields.io/badge/license-MIT-Green.svg?style=flat)](https://github.com/thautwarm/EBNFParser/blob/boating-new/LICENSE)
  5 | 
  6 | # EBNFParser
  7 | Parse Many, Any, Every [![Doc](https://img.shields.io/badge/document-2.1.2-yellow.svg?style=flat)](http://ebnfparser.readthedocs.io/en/boating-new)
  8 | -----------------------
  9 | 
 10 | ```
 11 | LR ::= LR 'a' 'b' | LR 'c' | 'd';
 12 | ```
 13 | 
 14 | 
 15 | - [Python Project(Support Python 3.6+)](https://github.com/thautwarm/EBNFParser/tree/boating-new/Python) (v 2.0+)
 16 |     - [Old Version : Misakawa v0.x](https://github.com/thautwarm/EBNFParser/tree/master/Misakawa.md)
 17 |     - [Old Version : Ruikowa v1.x](https://github.com/thautwarm/EBNFParser/tree/master/README.md)
 18 |  
 19 | --------------------
 20 | 
 21 | ## Install
 22 | - Python
 23 |     - pip  
 24 | 
 25 |     `pip installl -U EBNFParser`
 26 |     
 27 |     - setup 
 28 | 
 29 |     ```shell
 30 |     git clone https://github.com/thautwarm/EBNFParser
 31 |     cd EBNFParser/Python
 32 |     python setup.py install
 33 |     ``` 
 34 | 
 35 | ## Usage 
 36 | 
 37 | - Command Line Tools
 38 |     - `ruiko`.
 39 | 
 40 |     ```shell
 41 |     ruiko ./<grammar File> ./<output filename>
 42 |             [--testTk] # print tokenized words or not
 43 |             [--test] # generate test script "test_lang.py"
 44 |     ```
 45 |     Use command `ruiko` to generate parser and token files, and then you can use `test_lang.py` to test your parser.
 46 | 
 47 |     ```shell
 48 |     python ./test_lang.py Stmt " (+ 1 2) " -o test.json --testTk
 49 |     ```
 50 | 
 51 | - Integrated into your own project
 52 | 
 53 |     ```python
 54 | 
 55 |         from Ruikowa.ObjectRegex.ASTDef import Ast
 56 |         from Ruikowa.ErrorHandler import ErrorHandler
 57 |         from Ruikowa.ObjectRegex.MetaInfo import MetaInfo
 58 |         from Ruikowa.ObjectRegex.Tokenizer import Tokenizer
 59 | 
 60 |         from <your own generated parser module> import <top parser>, token_table
 61 | 
 62 | 
 63 |         import typing as t
 64 | 
 65 |         def token_func(src_code: str) -> t.Iterable[Tokenizer]:
 66 |             return Tokenizer.from_raw_strings(src_code, token_table, ({<the names of tokenizers you would ignore>}, {<the string contents of tokenizers you would ignore>}))
 67 | 
 68 |         parser = ErrorHandler(<top parser>.match, token_func)
 69 | 
 70 |         def parse(filename: str) -> Ast:
 71 | 
 72 |             return parser.from_file(filename)
 73 | 
 74 | 
 75 |         print(parse(<filename of your dsl source code>))
 76 | 
 77 |     ```
 78 | 
 79 | Need more? See [the documents](http://ebnfparser.readthedocs.io/en/boating-new).
 80 | 
 81 | ## Examples
 82 | 
 83 | Here are some examples to refer:  
 84 | 
 85 | EBNFParser 2.0
 86 | 
 87 | - [Rem](https://github.com/thautwarm/Rem)  
 88 |     The Rem programming language.
 89 | 
 90 | 
 91 | Old version(Before EBNFParser 1.1).  
 92 | 
 93 | - [DBG-Lang](https://github.com/thautwarm/dbg-lang)  
 94 |     A DSL for SQL development in Python areas.
 95 | 
 96 | - [Rem(Based EBNFParser1.1)](https://github.com/thautwarm/Rem/tree/backend-ebnfparser1.1)  
 97 |     A full featured modern language to enhance program readability based on CPython.
 98 | 
 99 | - [Lang.Red](https://github.com/thautwarm/lang.red)  
100 |     An attempt to making ASDL in CPython(unfinished yet)
101 | 
102 | Will support F# and Rem.
103 | 
104 |     
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/Ruiko/README.rst:
--------------------------------------------------------------------------------
1 | 
2 | Ruiko Language
3 | ------------------------------
4 | 
5 | **Ruiko** is EBNF-like language for tokenizing and parsing which could handle context-sensitive cases easily(`closure` could be handled in **Ruiko**).
6 | 
7 | What's more, you can even write the compiling actions easily when writing parsers, and the efficiency might be amazing high.


--------------------------------------------------------------------------------
/Ruiko/ast.cpp:
--------------------------------------------------------------------------------
 1 | #include "flowerq/List.hpp"
 2 | #include "flowerq/IO.hpp"
 3 | #include "flowerq/Macro.hpp"
 4 | #include <string>
 5 | #define DEBUB
 6 | 
 7 | 
 8 | class Mixed;
 9 | using Ast = flowerq::List<Mixed>;
10 | 
11 | #ifdef DEBUB
12 |     typedef StringBuff TokenType;
13 | #else
14 |     typedef int TokenType;
15 | #endif
16 | 
17 | struct Token{
18 | 
19 | public:
20 |     int lineno;
21 |     int colno;
22 |     TokenType name;
23 |     StringBuff value;
24 |     Token(int lineno, int colno, TokenType name, StringBuff value){
25 |         this->lineno = lineno;
26 |         this->colno = colno;
27 |         this->name = name;
28 |         this->value = value;
29 |     }
30 |     
31 |     StringBuff toString(){
32 |         return flowerq::IO::inspect(this->name) + rstr("[") + flowerq::IO::inspect(this->value) + rstr("]");
33 |     }
34 |     Token() = default;
35 | 
36 | };
37 | 
38 | class Mixed{
39 | public:
40 |     Token* token_ptr;
41 |     
42 |     Ast* ast_ptr;
43 |     
44 |     bool is_primitive(){
45 |         return ast_ptr == nullptr;
46 |     }
47 | 
48 |     StringBuff toString(){
49 |         if (is_primitive()){
50 |             return token_ptr -> toString();
51 |         }
52 |         return ast_ptr -> toString();
53 |     }
54 | 
55 |     Mixed() = default;
56 |     
57 | };
58 | 
59 | 


--------------------------------------------------------------------------------
/Ruiko/bootstrap.ruiko:
--------------------------------------------------------------------------------
 1 | use Token.Std.{
 2 | 	Name,
 3 | 	String,
 4 | 	Codes,
 5 | 	Number
 6 | }
 7 | 
 8 | Definition ::= Name '::=' OrExp ['where' Codes] 
 9 | OrExp ::= AndExp ('|', AndExp)
10 | AndExp ::= NotExp+
11 | NotExp ::= 'Not' Exp
12 | Exp ::= AtomExp | '<' AtomExp ['by' (Name | Codes)+] ['as' (Name | Codes)] ['then' (Name | Codes)+]'>'
13 | Trailer ::= '{' Number{1 2} '}' | '+' | '*'
14 | AtomExp ::= Atom [Trailer]
15 | Atom ::= Name | Indent | Dedent | String | Codes
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/Ruiko/dev_bnf.cpp:
--------------------------------------------------------------------------------
 1 | #include "ast.cpp"
 2 | #include "flowerq/IO.hpp"
 3 | 
 4 | int main(){
 5 |     using namespace flowerq;
 6 |     Token tk;
 7 |     Mixed m;
 8 |     m.token_ptr = &tk;
 9 |     tk.name = rstr("definition");
10 |     tk.value = rstr("def");
11 |     IO::puts(tk);
12 | }


--------------------------------------------------------------------------------
/Ruiko/flowerq/Composite.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | #ifndef FLOWERQ_COMP
 3 |     #include "Composite.hpp"
 4 | #endif
 5 | */
 6 | #ifndef FLOWERQ_COMP
 7 | #define FLOWERQ_COMP
 8 | #include <functional>
 9 | 
10 | namespace flowerq{
11 | 
12 | 
13 |     template<typename T, typename G>
14 |     std::function<void(T)> and_then(std::function<T(G)> f1, std::function<void(G)> f2){
15 |         return [=](T input){
16 |             f2(f1(input));
17 |         };
18 |     }
19 | 
20 |     // template<typename T, typename G, typename R>
21 |     // auto and_then(std::function<T(G)> f1, std::function<R(G)> f2){
22 |     //     return [=](T input){
23 |     //         return f2(f1(input));
24 |     //     };
25 |     // }
26 | 
27 | 
28 | }
29 | 
30 | #endif


--------------------------------------------------------------------------------
/Ruiko/flowerq/IO.File.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 | TODO: cahcing
 4 | */
 5 | struct Writer
 6 | {
 7 | public:
 8 |     ofstream stream;
 9 |     void write(const Char *buf)
10 |     {
11 |         stream << buf;
12 |     }
13 |     void close()
14 |     {
15 |         stream.close();
16 |     }
17 |     Writer(const char *filename)
18 |         : stream(filename){};
19 | };
20 | 
21 | struct Reader
22 | {
23 | public:
24 |     ifstream stream;
25 | 
26 |     StringBuff read(const Char split)
27 |     {
28 |         StringBuff s;
29 |         Char c;
30 |         while (stream.get(c) && (c != split) && !stream.eof())
31 |         {
32 |             s.push_back(c);
33 |         }
34 |         return s;
35 |     }
36 | 
37 |     StringBuff read()
38 |     {
39 |         StringBuff s;
40 |         Char c;
41 |         while (stream.get(c) && !stream.eof())
42 |         {
43 |             s.push_back(c);
44 |         }
45 |         return s;
46 |     }
47 |     void close()
48 |     {
49 |         stream.close();
50 |     }
51 |     Reader(const char *filename)
52 |         : stream(filename)
53 |     {
54 |     }
55 | };
56 | template <typename R>
57 | R open(const char *filename)
58 | {
59 | }
60 | 
61 | template <>
62 | Writer open<Writer>(const char *filename)
63 | {
64 |     return Writer(filename);
65 | }
66 | 
67 | template <>
68 | Reader open<Reader>(const char *filename)
69 | {
70 |     return Reader(filename);
71 | }
72 | 


--------------------------------------------------------------------------------
/Ruiko/flowerq/IO.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | #ifndef FLOWERQ_IO
  3 |     #include "IO.hpp"
  4 | #endif
  5 | */
  6 | #ifndef FLOWERQ_IO
  7 | #define FLOWERQ_IO
  8 | 
  9 | #include "Match.hpp"
 10 | #include "Macro.hpp"
 11 | 
 12 | #include <iostream>
 13 | #include <tuple>
 14 | #include <functional>
 15 | namespace flowerq{
 16 |     namespace IO{
 17 | 
 18 |         StringBuff
 19 |         inspect(int e) {
 20 |             return to_string(e);
 21 |         }
 22 | 
 23 |         StringBuff
 24 |         inspect(Char e) {
 25 |             return to_string(e);
 26 |         }
 27 | 
 28 | 
 29 |         StringBuff
 30 |         inspect(const Char *buf) {
 31 |             if (buf == NULL){
 32 |                 //TODO: not sure how to handle the null value.
 33 |                 return rstr("");
 34 |             }
 35 |             StringBuff s(buf);
 36 |             return s;
 37 |         }
 38 | 
 39 |         StringBuff
 40 |         inspect(Char *buf) {
 41 |             if (buf == NULL){
 42 |                 //TODO: not sure how to handle the null value.
 43 |                 return rstr("");
 44 |             }
 45 |             StringBuff s(buf);
 46 |             return s;
 47 |         }
 48 | 
 49 |         StringBuff
 50 |         inspect(float e) {
 51 |             return to_string(e);
 52 |         }
 53 | 
 54 |         StringBuff
 55 |         inspect(double e) {
 56 |             return to_string(e);
 57 |         }
 58 | 
 59 |         StringBuff
 60 |         inspect(std::string e) {
 61 |             #ifdef UNICODE
 62 |                 StringBuff ws;
 63 |                 ws.assign(e.begin(), e.end());
 64 |                 return ws;
 65 |             #else
 66 |                 return e;
 67 |             #endif
 68 |         }
 69 | 
 70 |         StringBuff
 71 |         inspect(std::wstring e){
 72 |             #ifndef UNICODE
 73 |                 StringBuff s;
 74 |                 s.assign(e.begin(), e.end());
 75 |                 return s;
 76 |             #else
 77 |                 return e;
 78 |             #endif
 79 |         }
 80 | 
 81 | 
 82 |         StringBuff
 83 |         inspect(bool e) {
 84 |             return e ? rstr("true") : rstr("false");
 85 |    
 86 |         }
 87 | 
 88 |         template<typename T>
 89 |         StringBuff inspect(T t);
 90 |         
 91 |         
 92 |         template<typename T>
 93 |         StringBuff tuple_inspect(std::tuple<T> tp){
 94 |             return inspect<T>(std::get<0> (tp));
 95 |         }
 96 | 
 97 |         template<typename T, typename G>
 98 |         StringBuff tuple_inspect(std::tuple<T, G> tp){
 99 |             return inspect(std::get<0> (tp)) + rstr(", ") + inspect(std::get<1> (tp));
100 |         }
101 | 
102 |         template<typename T, typename... VARARGS>
103 |         StringBuff tuple_inspect(std::tuple<T, VARARGS...> tp){
104 |             return inspect<T>(std::get<0> (tp)) + rstr(", ") + tuple_inspect<VARARGS...>(dependency::tail(tp));
105 |         }
106 | 
107 |         template<typename T>
108 |         StringBuff inspect(std::tuple<T> tp) {
109 |             return rstr("(") + tuple_inspect(tp) + rstr(",)");
110 |         }
111 | 
112 |         template<typename ...VARARGS>
113 |         StringBuff inspect(std::tuple<VARARGS...> tp) {
114 |             return rstr("(") + tuple_inspect<VARARGS...>(tp) + rstr(")");
115 |         }
116 |         
117 |         template<typename T>
118 |         StringBuff inspect(T t) {
119 |             return t.toString();
120 |         }
121 | 
122 |         template<typename T>
123 |         void puts(T t) {
124 |             StringBuff res = inspect(t);
125 |             cout << res << '\t';
126 |         }
127 |         
128 | 
129 |         template<typename T>
130 |         void putstrln(T t) {
131 |             StringBuff res = inspect(t);
132 |             cout << res << std::endl;
133 |         }
134 | 
135 |         void putstrln() {
136 |             printf("\n");
137 |         }
138 | 
139 |         #include "IO.File.hpp"
140 |         
141 |     }
142 | }
143 | #endif


--------------------------------------------------------------------------------
/Ruiko/flowerq/List.BaseMethods.hpp:
--------------------------------------------------------------------------------
 1 | void forEach(std::function<void(T&)> action) {
 2 |     Node<T> *list_ptr = this->head_ptr->Next;
 3 |     while (list_ptr != nullptr) {
 4 |         action(list_ptr->value);
 5 |         list_ptr = list_ptr->Next;
 6 |     }
 7 | }
 8 | 
 9 | template<typename G>
10 | List<G> map(std::function<G(T)> fn){
11 |     
12 |     const int n = length();
13 |     List<G> new_list;
14 |     Node<G>* src_list_ptr = new_list.head_ptr = Node<G>::_new_head(n);
15 |     if (n == 0){
16 |         src_list_ptr -> Next = nullptr;
17 |         return new_list;
18 |     }
19 |     this->forEach([&](T e){
20 |         src_list_ptr->Next = new Node<G>(fn(e));
21 |         src_list_ptr = src_list_ptr -> Next;
22 |     });
23 |     src_list_ptr -> Next = nullptr;
24 |     return new_list;
25 | }
26 | 
27 | List<T> filter(std::function<bool(T)> predicate){
28 |     const int n = length();
29 |     List<T> new_list;
30 |     Node<T>* src_list_ptr = new_list.head_ptr = Node<T>::_new_head();
31 |     if (n == 0){
32 |         src_list_ptr -> Next = nullptr;
33 |         return new_list;
34 |     }
35 |     int length = 0;
36 |     this->forEach([&](T e){
37 |         if (predicate(e)){
38 |             ++ length;
39 |             src_list_ptr->Next = new Node<T>(e);
40 |             src_list_ptr = src_list_ptr -> Next;
41 |         }
42 |     });
43 |     src_list_ptr -> Next = nullptr;
44 |     new_list.head_ptr->size = length;
45 |     return new_list;
46 | }
47 | 
48 | template<typename G>
49 | G reduce(std::function<G(G, T)> fold_fn, G start_elem) {
50 |     this->forEach([&](int e) {
51 |         start_elem = fold_fn(start_elem, e);
52 |     });
53 |     return start_elem;
54 | }
55 | 
56 | template<typename G>
57 | List<std::tuple<T, G>> zip(List<G> traversal) {
58 |     return flowerq::zip(*this, traversal);
59 | }


--------------------------------------------------------------------------------
/Ruiko/flowerq/List.Constructor.hpp:
--------------------------------------------------------------------------------
 1 | template<typename A>
 2 | static List<A> list::create() {
 3 |     List<A> new_list = List<A>();
 4 |     new_list.head_ptr = Node<A>::_new_head(0);
 5 |     new_list.head_ptr->Next = nullptr;
 6 |     return new_list;
 7 | }
 8 | 
 9 | template<typename A>
10 | static List<A> list::create(A value) {
11 |     List<A> new_list = List<A>();
12 |     new_list.head_ptr = Node<A>::_new_head(1);
13 |     new_list.head_ptr->Next = Node<A>::_new_ptr(value);
14 |     return new_list;
15 | }
16 | 
17 | template<typename A, typename ...VARARGS>
18 | static List<A> list::create(A value, VARARGS... varargs){
19 |     List<A> new_list = List<A>();
20 |     int count = 0;
21 |     auto src_ptr = Node<A>::_new_ptr(count, value, varargs...);
22 |     new_list.head_ptr = Node<A>::_new_head(count, src_ptr);
23 |     return new_list;
24 | }
25 | 
26 | template<typename A>
27 | static List<A> list::cons(A value, List<A> list){
28 |     List<A> new_list;
29 |     auto node = Node<A>::_new_ptr(value);
30 |     node -> Next = list.head_ptr->Next;
31 |     new_list.head_ptr = Node<A>::_new_head(list.length() + 1, node);    
32 |     return new_list;
33 | }


--------------------------------------------------------------------------------
/Ruiko/flowerq/List.Node.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | */
 4 | 
 5 | template<typename T>
 6 | struct Node{
 7 | 
 8 | public:
 9 |     union{
10 |         T value;
11 |         int size;
12 |     };
13 |     Node<T>* Next = nullptr;
14 | 
15 | 
16 |     Node(){}
17 |     Node(T v) {
18 |         value = v;
19 |     };
20 | 
21 |     template<typename... Args>
22 |     static Node<T> *_new_ptr(int &count, T value, Args... varargs) {
23 |         auto list_ptr = new Node<T>(value);
24 |         list_ptr->Next = _new_ptr(++count, varargs...);
25 |         return list_ptr;
26 |     }
27 | 
28 |     static Node<T> *_new_ptr(int &count, T value) {
29 |         ++count;
30 |         auto list_ptr = new Node<T>(value);
31 |         list_ptr->Next = nullptr;
32 |         return list_ptr;
33 |     }
34 | 
35 |     static Node<T> *_new_ptr(T value) {
36 |             auto list_ptr = new Node<T>(value);
37 |             list_ptr->Next = nullptr;
38 |             return list_ptr;
39 |     }
40 | 
41 |     static Node<T> *_new_head(int size, Node<T>* next) {
42 |             auto head_ptr = new Node<T>;
43 |             head_ptr -> size = size;
44 |             head_ptr -> Next = next;
45 |             return head_ptr;
46 |     }
47 | 
48 |     static Node<T> *_new_head(int size) {
49 |             auto head_ptr = new Node<T>;
50 |             head_ptr -> size = size;
51 |             return head_ptr;
52 |     }
53 | 
54 |     static Node<T> *_new_head() {
55 |             auto head_ptr = new Node<T>;
56 |             return head_ptr;
57 |     }
58 | 
59 | };
60 | 
61 |     template<typename T>
62 |     static void del(Node<T> &list) {
63 |         del(list.Next);
64 |         list.Next = nullptr;
65 |     }
66 | 
67 |     template<typename T>
68 |     static void del(Node<T> *list_ptr) {
69 |         if (list_ptr == nullptr)
70 |             return;
71 |         Node<T> *next_ptr = list_ptr->Next;
72 |         delete list_ptr;
73 |         del(next_ptr);
74 |     }
75 |     


--------------------------------------------------------------------------------
/Ruiko/flowerq/List.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | #ifndef FLOWERQ_LIST
  3 |     #include "List.hpp"
  4 | #endif
  5 | */
  6 | #ifndef FLOWERQ_LIST
  7 | #define FLOWERQ_LIST
  8 | 
  9 | 
 10 | #include "Macro.hpp"
 11 | #include "IO.hpp"
 12 | #include "Match.hpp"
 13 | #include <functional>
 14 | 
 15 | 
 16 | namespace flowerq{
 17 | 
 18 |     // struct Node definition
 19 |     #include "List.Node.hpp"
 20 | 
 21 | 
 22 |     #pragma region declare
 23 |     template<typename T>
 24 |     class List;
 25 | 
 26 |     template<typename A>
 27 |     static void del(List<A>* list_ptr);
 28 | 
 29 |     template<typename A>
 30 |     static void del(List<A> &list);
 31 | 
 32 | 
 33 |     template<typename A, typename B>
 34 |     static List<std::tuple<A, B>> zip(List<A> list1, List<B> list2);
 35 | 
 36 |     namespace list{
 37 |         template<typename A>
 38 |         static List<A> create();
 39 | 
 40 |         template<typename A>
 41 |         static List<A> create(A value);
 42 | 
 43 |         template<typename A, typename ...VARARGS>
 44 |         static List<A> create(A value, VARARGS... varargs);
 45 | 
 46 |         template<typename A>
 47 |         static List<A> cons(A value, List<A> list);
 48 |     }
 49 |     #pragma endregion
 50 | 
 51 |     template<typename T>
 52 |     class List{
 53 | 
 54 |     protected:
 55 |         Node<T>* head_ptr; // the head ptr does not contain values but the length of list.
 56 |     public:
 57 | 
 58 |         int length(){
 59 |             return head_ptr->size;
 60 |         }
 61 | 
 62 |         template<class J = T>
 63 |         typename std::enable_if<!std::is_same<J, Char>::value, StringBuff>::type
 64 |         toString(){
 65 |             
 66 |             const int n = length();
 67 |             if (n == 0){
 68 |                 return rstr("List<0>[]");
 69 |             }
 70 | 
 71 |             T head;
 72 |             List<T> tail;
 73 |             auto tp = destruct();
 74 |             pattern::match(tp, head, tail);
 75 |             
 76 |             StringBuff res = rstr("List<") + to_string(n) + rstr(">[") + IO::inspect(head);
 77 |             
 78 |             tail.forEach([=, &res](T e) {
 79 |                 res += rstr(", ") + IO::inspect(e);
 80 |             });
 81 | 
 82 |             return res + rstr("]"); 
 83 | 
 84 |         }
 85 | 
 86 |         template<class J = T>
 87 |         typename std::enable_if<std::is_same<J, Char>::value, StringBuff>::type
 88 |         toString(){
 89 |             Char *buffer = new Char[length()];
 90 |             int idx = 0;
 91 |             forEach([&](Char& ch){
 92 |                 buffer[idx++] = ch;
 93 |             });
 94 |             return buffer;
 95 |         }
 96 | 
 97 |         T at(int idx){
 98 |             Node<T> *list_ptr = this->head_ptr->Next;
 99 |             int i = 0;
100 |             while(i++ < idx){
101 |                 if (list_ptr == nullptr){
102 |                     const char* err_info = "Runtime IndexError: List ended before found the index.";
103 |                     printf("%s\n", err_info);
104 |                     const auto err = std::runtime_error(err_info);
105 |                     throw err;
106 |                 }
107 |                 list_ptr = list_ptr->Next;
108 |             }
109 |             return list_ptr->value;
110 |         }
111 | 
112 |         T head(){
113 |             return head_ptr->Next->value;
114 |         }
115 | 
116 |         List<T> tail() {
117 |             List<T> new_list;
118 |             const int n = length();
119 |             if (n == 0){
120 |                 new_list.head_ptr = Node<T>::_new_head(0);
121 |                 return new_list;
122 |             }
123 |             new_list.head_ptr = Node<T>::_new_head(n - 1, this->head_ptr->Next->Next);
124 |             return new_list; 
125 |         }
126 | 
127 |         std::tuple<T, List<T>> destruct() {
128 |             return std::make_tuple(head(), tail());
129 |         };
130 | 
131 |         #include "List.BaseMethods.hpp"
132 | 
133 |         
134 | 
135 | 
136 |         template<typename A>
137 |         friend List<A> list::create();
138 | 
139 |         template<typename A>
140 |         friend List<A> list::create(A value);
141 | 
142 |         template<typename A, typename ...VARARGS>
143 |         friend List<A> list::create(A value, VARARGS... varargs);
144 | 
145 |         template<typename A>
146 |         friend List<A> list::cons(A value, List<A> list);
147 | 
148 |         template<typename A>
149 |         friend void del(List<A>* list_ptr);
150 | 
151 |         template<typename A>
152 |         friend void del(List<A> &list);
153 | 
154 |         template<typename A, typename B>
155 |         friend List<std::tuple<A, B>> zip(List<A> list1, List<B> list2);
156 |     };
157 | 
158 | 
159 |     template<typename A, typename B>
160 |     List<std::tuple<A, B>> zip(List<A> list1, List<B> list2){
161 |         const int len = std::min(list1.length(), list2.length());
162 |         List<std::tuple<A, B>> new_list;
163 |         auto h = new_list.head_ptr = Node<std::tuple<A, B>>::_new_head(len);
164 | 
165 |         Node<A> *h1 = list1.head_ptr->Next;
166 |         Node<B> *h2 = list2.head_ptr->Next;
167 |         for(int i=0; i < len; ++i){
168 |             h -> Next = new Node<std::tuple<A, B>>(std::make_tuple(h1->value, h2->value));
169 |             h = h->Next;
170 |         }
171 |         h -> Next = nullptr;
172 |         return new_list;
173 |     }
174 |     // define ways to construct list.
175 |     #include "List.Constructor.hpp"
176 |     
177 |     using Str = List<Char>;
178 |     // not sure whether to use haskell like string.
179 |     // if so, string concat could be slow a lot.
180 | 
181 |     template<typename A>
182 |     void del(List<A>* list_ptr){
183 |         del(list_ptr->head_ptr);
184 |         delete list_ptr;
185 |     }
186 | 
187 |     template<typename A>
188 |     void del(List<A> &list){
189 |         del(list.head_ptr);
190 |     }
191 | 
192 | }
193 | #endif
194 | 


--------------------------------------------------------------------------------
/Ruiko/flowerq/Macro.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | #ifndef FLOWERQ_MACRO
 3 |     #include "Macro.hpp"
 4 | #endif
 5 | */
 6 | #ifndef FLOWERQ_MACRO
 7 | #define FLOWERQ_MACRO
 8 | #include <string>
 9 | #include <cstring>
10 | #include <iostream>
11 | #include <fstream>
12 | #ifdef UNICODE
13 | 
14 |     #define rstr(src) L ## src
15 |     using Char = wchar_t;
16 |     using  ofstream = std::wofstream;
17 |     using  ifstream = std::wifstream;
18 |     using  fstream = std::wfstream;
19 |     static std::wostream& cout = std::wcout;
20 |     static auto str_len = std::wcslen;
21 |     typedef std::wstring StringBuff;
22 |     template<typename T>
23 |     static StringBuff to_string(T t){
24 |         return std::to_wstring(t);
25 |     }
26 |     // #define __FLOWER_MACRO_GETBUFFLEN__ std::wcslen
27 |     // #define __FLOWER_MACRO_TO_BUFF__ std::to_wstring
28 | #else
29 |     #define rstr(src) src
30 |     using Char = char;
31 |     using  ofstream = std::ofstream;
32 |     using  ifstream = std::ifstream;
33 |     using  fstream = std::fstream;
34 |     static std::ostream& cout = std::cout;
35 |     static auto str_len = std::strlen;
36 |     typedef std::string StringBuff;
37 |     template<typename T>
38 |     static StringBuff to_string(T t){
39 |         return std::to_string(t);
40 |     }
41 | #endif
42 | #endif


--------------------------------------------------------------------------------
/Ruiko/flowerq/Match.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | #ifndef FLOWERQ_MATCH
 3 |     #include "Match.hpp"
 4 | #endif
 5 | */
 6 | #ifndef FLOWERQ_MATCH
 7 | #define FLOWERQ_MATCH
 8 | #include <tuple>
 9 | #include <utility>
10 | 
11 | namespace flowerq {
12 |     namespace dependency {
13 | 
14 |         /// reference:
15 |         ///  url   : https://stackoverflow.com/questions/10626856/how-to-split-a-tuple
16 |         ///  author: André Bergner
17 | 
18 |        
19 | 
20 |         template<std::size_t... Ns, typename... Ts>
21 |         auto tail_impl(std::index_sequence<Ns...>, std::tuple<Ts...> t) {
22 |             return std::make_tuple(std::get<Ns + 1u>(t)...);
23 |         }
24 | 
25 |         template<typename... Ts>
26 |         auto tail(std::tuple<Ts...> t) {
27 |             return tail_impl(std::make_index_sequence<sizeof...(Ts) - 1u>(), t);
28 |         }
29 | 
30 |         template<std::size_t... Ns, typename... Ts>
31 |         auto tail2_impl(std::index_sequence<Ns...>, std::tuple<Ts...> t) {
32 |             return std::make_tuple(std::get<Ns + 2u>(t)...);
33 |         }
34 | 
35 |         template<typename... Ts>
36 |         auto tail2(std::tuple<Ts...> t){
37 |             return tail_impl(std::make_index_sequence<sizeof...(Ts) - 2u>(), t);
38 |         }
39 |     }
40 | 
41 |     namespace pattern {
42 | 
43 |         template<typename T, typename G>
44 |         void match(std::tuple<T, G> tp, T &t, G &g) {
45 |             t = std::get<0>(tp);
46 |             g = std::get<1>(tp);
47 |         }
48 | 
49 |         template<typename T, typename ...VARARGS>
50 |         void match(std::tuple<T, VARARGS...> tp, T &t, VARARGS &... args) {
51 |             t = std::get<0>(tp);
52 |             match(dependency::tail(tp), args...);
53 |         }
54 | 
55 |         template<typename T>
56 |         void match(std::tuple<T> tp, T &t) {
57 |             t = std::get<0>(tp);
58 |         }
59 |     }
60 | }
61 | #endif
62 | 


--------------------------------------------------------------------------------
/Ruiko/main.cpp:
--------------------------------------------------------------------------------
 1 | // #define UNICODE
 2 | #include <iostream>
 3 | #include "flowerq/List.hpp"
 4 | #include "flowerq/Composite.hpp"
 5 | #include "flowerq/IO.hpp"
 6 | 
 7 | int main() {
 8 |     #ifdef UNICODE
 9 |         setlocale(LC_ALL,"");
10 |     #endif
11 |     using namespace flowerq;
12 |     
13 |     // IO::putstrln(list::create(1, 2, 3, 4, 5));
14 |     
15 |     auto lst = list::create(1, 2, 3, 4, 5);
16 | 
17 |     // IO::putstrln(list::create(list::create(1, 2, 3, 5), list::create(2, 3, 4)));
18 |     // IO::putstrln(std::make_tuple(lst));
19 |     IO::putstrln(lst.destruct());
20 | 
21 |     auto new_lst = list::cons(2, lst);
22 |     
23 |     IO::putstrln(new_lst.map<int>([=](int e){return e+1;}).filter([=](int e){return e%2==0;}));
24 | 
25 | 
26 |     auto lst2 = list::create<int>();
27 | 
28 | 
29 |     IO::putstrln(new_lst);
30 | 
31 |     IO::putstrln(new_lst.tail());
32 | 
33 | 
34 |     IO::putstrln(new_lst.reduce<int>([=](int a, int b){return a+b;}, 0));
35 | 
36 |     IO::putstrln(lst);
37 |     IO::putstrln(lst.at(2));
38 |     List<Char> string_ = list::create(rstr('1'), rstr('2'), rstr('3'), rstr('4'));
39 |     IO::putstrln(string_.length());
40 |     IO::putstrln("string here:");
41 |     IO::putstrln(string_);
42 | 
43 |     // 垃圾推导
44 |     list::create(0, 1, 2).map<int>([&](auto e){ return lst.at(e);}).forEach(IO::puts<int>);
45 |     
46 |     
47 |     // 陈独秀同学你先下来
48 |     std::function<int(int)> f1 = [&](auto e){ return lst.at(e);};
49 |     std::function<void(int)> f2 = IO::puts<int>;
50 |     and_then(f1, f2)(2);
51 |     
52 |     auto writer = IO::open<IO::Writer>("test.txt");
53 |     writer.write(rstr("a ::= b [c [d [e f{2, 3}]]]"));
54 |     writer.close();
55 |     
56 |     auto reader = IO::open<IO::Reader>("test.txt");
57 |     auto s = reader.read();
58 |     IO::puts(s);
59 | 
60 | 
61 |     auto xxx = lst;
62 |     IO::putstrln(lst);
63 |     lst = list::create(-1, -1, -1);
64 |     IO::putstrln(xxx);
65 |     IO::putstrln(lst);
66 |     IO::putstrln(lst.zip(lst));
67 | }


--------------------------------------------------------------------------------
/Ruiko/test.txt:
--------------------------------------------------------------------------------
1 | a ::= b [c [d [e f{2, 3}]]]


--------------------------------------------------------------------------------
/Ruiko/xml.ruiko:
--------------------------------------------------------------------------------
 1 | use Token.Std.{
 2 | 	Name
 3 | };
 4 | 
 5 | Tag ::= '<' Name '>'
 6 | EndTag ::= '</' Name '>'
 7 | Block ::=  <Tag as [_, begin, _]>
 8 | 				Block*
 9 | 			<EndTag as [_, @begin, _]> # use context-sensitive syntax
10 | 			| Not (Tag | EndTag) # use negative matching
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/docs/RuikoEBNF.rst:
--------------------------------------------------------------------------------
  1 | Ruiko EBNF
  2 | =====================
  3 | 
  4 | Grammar
  5 | -----------
  6 | 
  7 | .. code ::
  8 | 
  9 |     ignore [token1, token2, ...] 
 10 |     # optional, discard some tokenizers with specific names.
 11 |     # it only affects when you're using EBNFParser automatical tokenizing function.
 12 | 
 13 |     deftoken directory1.directory2...directoryn.filename 
 14 |     # your custom token function. cannot be applied when you're using auto token.
 15 | 
 16 | 
 17 |     token1  := ...;
 18 |     token2  := ...;
 19 | 
 20 |     token3 cast := ...;
 21 |     # define a cast map
 22 | 
 23 |     token4 cast as K := ...;
 24 |     # def cast map and custom prefix
 25 | 
 26 |     token5 as K := ...;
 27 |     # only def custom prefix
 28 | 
 29 | 
 30 |     token6 := ...;
 31 | 
 32 |     token7 of token5 := ...;
 33 |     # add more patterns to token5
 34 | 
 35 |     parser1 ::=  token3 token5+ | [token6] token4* (parser2 parser3){3, 10};
 36 |     # define a combined parser
 37 |     /* 
 38 |        `|` means `or`, 
 39 |        `[<patterns>]` means `optional`, 
 40 |        `(<patterns>)` means `make a new pattern by several patterns`,
 41 |        `pattern+` means one or more,
 42 |        `pattern*` means zero or more,
 43 |        `pattern{a, b}` means matching this pattern more than `a` times and less than b;
 44 |        `pattern{a}` means matching this pattern more than `a` times.
 45 |     */
 46 | 
 47 |     parser2 throw [parser3 ';'] = parser3 parser1 ';';
 48 |     /*
 49 |     the result from `parser2` will not contains 
 50 |         a term(`Tokenizer` or `Ast`) with name=parser3 or string=";"
 51 |     */
 52 | 
 53 | More accurately, see the `bootstrap grammar 
 54 | <https://github.com/thautwarm/EBNFParser/blob/boating-new/Python/Ruikowa/Bootstrap/grammar>`_ here.
 55 | 
 56 | 
 57 | Regex Prefix
 58 | -------------------------
 59 | 
 60 | Regex prefix in ruiko EBNF would add a regex pattern to :code:`token_table`, 
 61 | which might be used for generating an automatical tokenizing function(unless you use your custom tokenizing function).
 62 | 
 63 | When you want to use Regex prefix, just type :code:`R'<your regex pattern>'`.
 64 | 
 65 | - url.ruiko
 66 | 
 67 | .. code ::
 68 | 
 69 |     url := R'https.*?\.(com|cn|org|net)';
 70 |     other := R'.';
 71 |     parserToTest throw [other] ::= (url | other)+;
 72 |     
 73 | test it
 74 | .. code ::
 75 | 
 76 |     ruiko url.ruiko url --test
 77 |     python test_lang.py parserToTest "https://github.comasdas https://123.net"
 78 |     =========================ebnfparser test script================================
 79 |     parserToTest[
 80 |         [name: url, string: "https://github.com"]
 81 |         [name: url, string: "https://123.net"]
 82 |     ]
 83 | 
 84 | You should take care that there is only regex matching in tokenizing process, 
 85 | and when literal parsers and combined parsers are parsing tokenizers, they are matching 
 86 | whether the name is what they expect(in fact, what parsers are comparing by is not the **name**, it's the **memory address**, 
 87 | so EBNFParser is very quick in this process).
 88 | 
 89 | 
 90 | Cast Map
 91 | --------------------------
 92 | 
 93 | 
 94 | .. code ::
 95 | 
 96 |     SomeToken cast as S := 'abc';
 97 |     Alpha               := R'[a-z]+';
 98 |     F                   ::= S'abc' | Alpha;
 99 | 
100 | 
101 | The ruiko codes above defines a tokenize named :code:`SomeToken` with a prefix :code:`S`.
102 | 
103 | 
104 | When the input source is splitted into a sequence of tokenizers , however, even the literal parser
105 | :code:`Alpha` is supposed to match all string matched by regex pattern :code:`"[a-z]+"`, it cannot match a tokenizer
106 | with attribute :code:`string="abc"` generated by EBNFParser automatical tokenizing, 
107 | that's because all the :code:`"all"` has been casted into a unique string in a buffer pool, 
108 | and **all of them have the same name** :code:`SomeToken`, **not** :code:`Alpha`.
109 | 
110 | Here is a string with value :code:`"abc"` located at an unique memory address, 
111 | and every literal parser defined by :code:`"abc"` just matched it only.
112 | 
113 | Just as what I told you at Section :code:`Regex Prefix` , 
114 | The literal parser defined as :code:`Alpha := R'[a-z]+'` just matches the tokenizer whose name is :code:`Alpha`.
115 | 
116 | 
117 | Custom Prefix
118 | --------------------------
119 | 
120 | If you're using custom tokenizing, several :code:`Ruikowa.ObjectRegex.Tokenizer` objects 
121 | with the same attribute :code:`string="abc"` (and have the same memory address)
122 | could have different names.
123 | 
124 | To distinguish from each other, you can do as the following:
125 | 
126 | - Grammar
127 | 
128 | .. code ::
129 | 
130 |     SomeToken as S := 'abc';
131 |     Alpha          := R'[a-z]+';
132 |     F              ::= S'abc' | Alpha;
133 |     G              ::= 'abc';
134 |     H              ::= G | F ;
135 | 
136 | .. code ::
137 | 
138 |         [name: SomeToken, string: "abc"]
139 |         ...
140 | 
141 | 
142 | If you are using combined parser :code:`G` to match above tokenizers, you'll fail,
143 | because in the grammar :code:`G` is defined as :code:`G::='abc'` , it means :code:`G` only accepts
144 | the a tokenizer who has an attribute :code:`name="auto_const"` and another attribute :code:`string="abc"`
145 | (and it's from the unique buff pool, not a string created by regex matching).
146 | 


--------------------------------------------------------------------------------
/docs/codes/just.py:
--------------------------------------------------------------------------------
 1 | # This file is automatically generated by EBNFParser.
 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer
 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef
 4 | namespace = globals()
 5 | recur_searcher = set()
 6 | token_table = ((unique_literal_cache_pool["auto_const"], str_matcher(('just'))),)
 7 | 
 8 | class UNameEnum:
 9 | # names
10 | 
11 |     Just = unique_literal_cache_pool['Just']
12 |         
13 | cast_map = {}
14 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({}, {}),cast_map=cast_map)
15 | 
16 | Just = AstParser([SeqParser(['just'], at_least=1,at_most=Undef)],
17 |                  name="Just",
18 |                  to_ignore=({}, {}))
19 | Just.compile(namespace, recur_searcher)


--------------------------------------------------------------------------------
/docs/codes/just.ruiko:
--------------------------------------------------------------------------------
1 | Just ::= 'just'+;


--------------------------------------------------------------------------------
/docs/codes/lisp.ruiko:
--------------------------------------------------------------------------------
 1 | 
 2 | ignore [space]
 3 | 
 4 | space   := R'\s';
 5 | 
 6 | Atom    := R'[^\(\)\s\`]+'; # use Regex
 7 | 
 8 | Expr    ::= Atom
 9 |         | Quote
10 |         | '(' Expr* ')';
11 | 
12 | 
13 | Quote   ::=  '`' Expr ;
14 | 
15 | Stmts   ::= Expr*;


--------------------------------------------------------------------------------
/docs/codes/lisp_parser.py:
--------------------------------------------------------------------------------
 1 | # This file is automatically generated by EBNFParser.
 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer
 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef
 4 | namespace = globals()
 5 | recur_searcher = set()
 6 | token_table = ((unique_literal_cache_pool["space"], regex_matcher('\s')),
 7 |                (unique_literal_cache_pool["Atom"], regex_matcher('[^\(\)\s\`]+')),
 8 |                (unique_literal_cache_pool["auto_const"], char_matcher(('`', ')', '('))),)
 9 | 
10 | class UNameEnum:
11 | # names
12 | 
13 |     space = unique_literal_cache_pool['space']
14 |     Atom = unique_literal_cache_pool['Atom']
15 |     Expr = unique_literal_cache_pool['Expr']
16 |     Quote = unique_literal_cache_pool['Quote']
17 |     Stmts = unique_literal_cache_pool['Stmts']
18 |         
19 | cast_map = {}
20 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({"space"}, {}),cast_map=cast_map)
21 | space = LiteralNameParser('space')
22 | Atom = LiteralNameParser('Atom')
23 | Expr = AstParser([Ref('Atom')],
24 |                  [Ref('Quote')],
25 |                  ['(', SeqParser([Ref('Expr')], at_least=0,at_most=Undef), ')'],
26 |                  name="Expr",
27 |                  to_ignore=({}, {}))
28 | Quote = AstParser(['`', Ref('Expr')],
29 |                   name="Quote",
30 |                   to_ignore=({}, {}))
31 | Stmts = AstParser([SeqParser([Ref('Expr')], at_least=0,at_most=Undef)],
32 |                   name="Stmts",
33 |                   to_ignore=({}, {}))
34 | Stmts.compile(namespace, recur_searcher)


--------------------------------------------------------------------------------
/docs/codes/parsing_CastMap.py:
--------------------------------------------------------------------------------
 1 | # This file is automatically generated by EBNFParser.
 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer
 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef
 4 | namespace = globals()
 5 | recur_searcher = set()
 6 | token_table = ((unique_literal_cache_pool["space"], regex_matcher('\s+')),
 7 |                (unique_literal_cache_pool["identifier"], regex_matcher('[a-zA-Z_]{1}[a-zA-Z_0-9]*')),
 8 |                (unique_literal_cache_pool["keyword"], str_matcher(('public', 'for', 'def'))),)
 9 | 
10 | class UNameEnum:
11 | # names
12 | 
13 |     space = unique_literal_cache_pool['space']
14 |     identifier = unique_literal_cache_pool['identifier']
15 |     keyword_def = unique_literal_cache_pool['def']
16 |     keyword_for = unique_literal_cache_pool['for']
17 |     keyword_public = unique_literal_cache_pool['public']
18 |     keyword = unique_literal_cache_pool['keyword']
19 |     parserToTest = unique_literal_cache_pool['parserToTest']
20 |         
21 | cast_map = {'def': unique_literal_cache_pool['keyword'], 'for': unique_literal_cache_pool['keyword'], 'public': unique_literal_cache_pool['keyword']}
22 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({"space"}, {}),cast_map=cast_map)
23 | space = LiteralNameParser('space')
24 | identifier = LiteralNameParser('identifier')
25 | keyword = LiteralNameParser('keyword')
26 | parserToTest = AstParser([SeqParser([Ref('identifier')], [Ref('keyword')], at_least=1,at_most=Undef)],
27 |                          name="parserToTest",
28 |                          to_ignore=({}, {}))
29 | parserToTest.compile(namespace, recur_searcher)


--------------------------------------------------------------------------------
/docs/codes/parsing_CastMap.ruiko:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ignore [space]
 4 | space        := R'\s+';
 5 | # ignore the whitespace characters.
 6 | 
 7 | 
 8 | identifier   := R'[a-zA-Z_]{1}[a-zA-Z_0-9]*';
 9 | keyword cast := 'def' 'for' 'public';
10 | 
11 | 
12 | parserToTest ::= (identifier | keyword)+;


--------------------------------------------------------------------------------
/docs/codes/parsing_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # This file is automatically generated by EBNFParser.
 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer
 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef
 4 | namespace = globals()
 5 | recur_searcher = set()
 6 | token_table = ((unique_literal_cache_pool["MyTokenType"], str_matcher(('abc', '233'))),)
 7 | 
 8 | class UNameEnum:
 9 | # names
10 | 
11 |     MyTokenType_abc = unique_literal_cache_pool['abc']
12 |     MyTokenType = unique_literal_cache_pool['MyTokenType']
13 |     parserToTest = unique_literal_cache_pool['parserToTest']
14 |         
15 | cast_map = {}
16 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({}, {}),cast_map=cast_map)
17 | MyTokenType = LiteralNameParser('MyTokenType')
18 | parserToTest = AstParser([SeqParser([Ref('MyTokenType')], at_least=1,at_most=Undef)],
19 |                          name="parserToTest",
20 |                          to_ignore=({}, {}))
21 | parserToTest.compile(namespace, recur_searcher)


--------------------------------------------------------------------------------
/docs/codes/parsing_tokenizer.ruiko:
--------------------------------------------------------------------------------
1 | MyTokenType  := 'abc' '233';
2 | parserToTest ::= MyTokenType+;


--------------------------------------------------------------------------------
/docs/codes/proj.py:
--------------------------------------------------------------------------------
 1 | from Ruikowa.ObjectRegex.ASTDef import Ast
 2 | from Ruikowa.ErrorHandler import ErrorHandler
 3 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo
 4 | from Ruikowa.ObjectRegex.Tokenizer import Tokenizer
 5 | 
 6 | from lisp_parser import Stmts, token_table
 7 | 
 8 | import typing as t
 9 | 
10 | def token_func(src_code: str) -> t.Iterable[Tokenizer]:
11 |     return Tokenizer.from_raw_strings(src_code, token_table, ({"space"}, {}))
12 | 
13 | parser = ErrorHandler(Stmts.match, token_func)
14 | 
15 | def parse(filename: str) -> Ast:
16 | 
17 |     return parser.from_file(filename)
18 | 
19 | 
20 | print(parse("test.lisp"))


--------------------------------------------------------------------------------
/docs/codes/test.lisp:
--------------------------------------------------------------------------------
1 | (define f (x y z)
2 | 	     (+ x 
3 | 	     	(+ y z)))


--------------------------------------------------------------------------------
/docs/codes/test_lang.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # This file is automatically generated by EBNFParser.
 3 | import argparse, json
 4 | 
 5 | cmd_parser = argparse.ArgumentParser(description='test language parsers swiftly.')
 6 | cmd_parser.add_argument("parser", type=str,
 7 |                        help='What kind of parser do you want to test with?(e.g Stmt, Expr, ...)')
 8 | cmd_parser.add_argument("codes", metavar='codes', type=str,
 9 |                         help='input some codes in your own language here.')
10 | cmd_parser.add_argument('-o', help='output. support .json and .ast suffix.', type=str)
11 | cmd_parser.add_argument("--testTk", nargs='?', default=False, const=True)
12 | cmd_parser.add_argument('--debug', nargs='?', default=False, const=True,
13 |                        help='print tokens of grammar file?')
14 | 
15 | args = cmd_parser.parse_args()
16 |                        
17 | if args.debug:
18 |     from Ruikowa.Config import Debug
19 |     Debug.append(1)         
20 |     
21 | from Ruikowa.ErrorHandler import ErrorHandler, Colored
22 | from Ruikowa.ObjectRegex.ASTDef import Ast
23 | from Ruikowa.io import grace_open
24 | from just import *              
25 | print(Colored.Green,'=========================ebnfparser test script================================', Colored.Clear)
26 | 
27 | print_token = args.testTk
28 | ast: Ast = ErrorHandler(eval(args.parser).match, token_func).from_source_code('<input>', args.codes, print_token=print_token)
29 | print(Colored.Blue, ast, Colored.Clear)
30 | if args.o:
31 |     o: str = args.o.lower()
32 |     if o.endswith('.json'):
33 |         grace_open(o).write(json.dumps(ast.dump_to_json(), indent=2))
34 |     elif o.endswith('.ast'):
35 |         grace_open(o).write(ast.dump())
36 |     else:
37 |         raise Exception('Unsupported file ext.')    
38 | 
39 |     


--------------------------------------------------------------------------------
/docs/codes/url.py:
--------------------------------------------------------------------------------
 1 | # This file is automatically generated by EBNFParser.
 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer
 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef
 4 | namespace = globals()
 5 | recur_searcher = set()
 6 | token_table = ((unique_literal_cache_pool["url"], regex_matcher('https.*?\.(com|cn|org|net)')),
 7 |                (unique_literal_cache_pool["other"], regex_matcher('.')),)
 8 | 
 9 | class UNameEnum:
10 | # names
11 | 
12 |     url = unique_literal_cache_pool['url']
13 |     other = unique_literal_cache_pool['other']
14 |     parserToTest = unique_literal_cache_pool['parserToTest']
15 |         
16 | cast_map = {}
17 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({}, {}),cast_map=cast_map)
18 | url = LiteralNameParser('url')
19 | other = LiteralNameParser('other')
20 | parserToTest = AstParser([SeqParser([Ref('url')], [Ref('other')], at_least=1,at_most=Undef)],
21 |                          name="parserToTest",
22 |                          to_ignore=({"other"}, {}))
23 | parserToTest.compile(namespace, recur_searcher)


--------------------------------------------------------------------------------
/docs/codes/url.ruiko:
--------------------------------------------------------------------------------
1 | url := R'https.*?\.(com|cn|org|net)';
2 | other := R'.';
3 | parserToTest throw [other] ::= (url | other)+;


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # EBNFParser documentation build configuration file, created by
  5 | # sphinx-quickstart on Wed Apr  4 17:20:13 2018.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | # import os
 21 | # import sys
 22 | # sys.path.insert(0, os.path.abspath('.'))
 23 | 
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = ['sphinx.ext.autodoc',
 35 |     'sphinx.ext.doctest',
 36 |     'sphinx.ext.todo',
 37 |     'sphinx.ext.mathjax',
 38 |     'sphinx.ext.viewcode',
 39 |     'sphinx.ext.githubpages']
 40 | 
 41 | # Add any paths that contain templates here, relative to this directory.
 42 | templates_path = ['_templates']
 43 | 
 44 | # The suffix(es) of source filenames.
 45 | # You can specify multiple suffix as a list of string:
 46 | #
 47 | # source_suffix = ['.rst', '.md']
 48 | source_suffix = '.rst'
 49 | 
 50 | # The master toctree document.
 51 | master_doc = 'index'
 52 | 
 53 | # General information about the project.
 54 | project = 'EBNFParser'
 55 | copyright = '2018, thautwarm'
 56 | author = 'thautwarm'
 57 | 
 58 | # The version info for the project you're documenting, acts as replacement for
 59 | # |version| and |release|, also used in various other places throughout the
 60 | # built documents.
 61 | #
 62 | # The short X.Y version.
 63 | version = '2.0'
 64 | # The full version, including alpha/beta/rc tags.
 65 | release = '2.0'
 66 | 
 67 | # The language for content autogenerated by Sphinx. Refer to documentation
 68 | # for a list of supported languages.
 69 | #
 70 | # This is also used if you do content translation via gettext catalogs.
 71 | # Usually you set "language" from the command line for these cases.
 72 | language = None
 73 | 
 74 | # List of patterns, relative to source directory, that match files and
 75 | # directories to ignore when looking for source files.
 76 | # This patterns also effect to html_static_path and html_extra_path
 77 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 78 | 
 79 | # The name of the Pygments (syntax highlighting) style to use.
 80 | pygments_style = 'sphinx'
 81 | 
 82 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 83 | todo_include_todos = True
 84 | 
 85 | 
 86 | # -- Options for HTML output ----------------------------------------------
 87 | 
 88 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 89 | # a list of builtin themes.
 90 | #
 91 | html_theme = 'alabaster'
 92 | 
 93 | # Theme options are theme-specific and customize the look and feel of a theme
 94 | # further.  For a list of options available for each theme, see the
 95 | # documentation.
 96 | #
 97 | # html_theme_options = {}
 98 | 
 99 | # Add any paths that contain custom static files (such as style sheets) here,
100 | # relative to this directory. They are copied after the builtin static files,
101 | # so a file named "default.css" will overwrite the builtin "default.css".
102 | html_static_path = ['_static']
103 | 
104 | # Custom sidebar templates, must be a dictionary that maps document names
105 | # to template names.
106 | #
107 | # This is required for the alabaster theme
108 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
109 | html_sidebars = {
110 |     '**': [
111 |         'relations.html',  # needs 'show_related': True theme option to display
112 |         'searchbox.html',
113 |     ]
114 | }
115 | 
116 | 
117 | # -- Options for HTMLHelp output ------------------------------------------
118 | 
119 | # Output file base name for HTML help builder.
120 | htmlhelp_basename = 'EBNFParserdoc'
121 | 
122 | 
123 | # -- Options for LaTeX output ---------------------------------------------
124 | 
125 | latex_elements = {
126 |     # The paper size ('letterpaper' or 'a4paper').
127 |     #
128 |     # 'papersize': 'letterpaper',
129 | 
130 |     # The font size ('10pt', '11pt' or '12pt').
131 |     #
132 |     # 'pointsize': '10pt',
133 | 
134 |     # Additional stuff for the LaTeX preamble.
135 |     #
136 |     # 'preamble': '',
137 | 
138 |     # Latex figure (float) alignment
139 |     #
140 |     # 'figure_align': 'htbp',
141 | }
142 | 
143 | # Grouping the document tree into LaTeX files. List of tuples
144 | # (source start file, target name, title,
145 | #  author, documentclass [howto, manual, or own class]).
146 | latex_documents = [
147 |     (master_doc, 'EBNFParser.tex', 'EBNFParser Documentation',
148 |      'thautwarm', 'manual'),
149 | ]
150 | 
151 | 
152 | # -- Options for manual page output ---------------------------------------
153 | 
154 | # One entry per manual page. List of tuples
155 | # (source start file, name, description, authors, manual section).
156 | man_pages = [
157 |     (master_doc, 'ebnfparser', 'EBNFParser Documentation',
158 |      [author], 1)
159 | ]
160 | 
161 | 
162 | # -- Options for Texinfo output -------------------------------------------
163 | 
164 | # Grouping the document tree into Texinfo files. List of tuples
165 | # (source start file, target name, title, author,
166 | #  dir menu entry, description, category)
167 | texinfo_documents = [
168 |     (master_doc, 'EBNFParser', 'EBNFParser Documentation',
169 |      author, 'EBNFParser', 'One line description of project.',
170 |      'Miscellaneous'),
171 | ]
172 | 
173 | 
174 | source_suffix = ['.rst', '.md', '.MD']
175 | html_theme = 'sphinx_rtd_theme'
176 | 
177 | from recommonmark.parser import CommonMarkParser 
178 | source_parsers = {
179 |     '.md': CommonMarkParser,
180 |     '.MD': CommonMarkParser,
181 | }


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. EBNFParser documentation master file, created by
 2 |    sphinx-quickstart on Wed Apr  4 17:20:13 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to EBNFParser's documentation!
 7 | ======================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Notes
12 | 
13 |    quickstart
14 |    parsing
15 |    RuikoEBNF
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/docs/parsing.rst:
--------------------------------------------------------------------------------
  1 | Parsing in EBNFParser
  2 | =======================
  3 | 
  4 | 
  5 | EBNFParser is a parser generator framework to parse raw string into structured nested list(AST).
  6 | 
  7 | Pasring of EBNFParser has following steps:
  8 | 
  9 | Tokenizing
 10 | ---------------
 11 | 
 12 | Tokenizing is the very first step to split input string into a sequence of :code:`Ruikowa.ObjectRegex.Tokenizer` objects.
 13 | 
 14 | A :code:`Ruikowa.ObjectRegex.Tokenizer` has the following **readonly** attributes:
 15 | 
 16 | - name : str 
 17 |     type of the tokenizer. 
 18 | - string : str 
 19 |     string content(from input raw string) of the tokenizer.
 20 | - colno : int 
 21 |     column number in current file. 
 22 | - lineno : int 
 23 |     row number in current file.
 24 | 
 25 | Example:
 26 | 
 27 | - parsing_tokenizing.ruiko
 28 | 
 29 | .. code :: shell
 30 | 
 31 |     MyTokenType  := 'abc' '233';
 32 |     # The above syntax defines a literal parser to parse strings like "abc" or "233".
 33 |     # "abc", "233" will be added into `token_table` to generate automatical tokenizing function.
 34 |     
 35 |     parserToTest ::= MyTokenType+;
 36 |     # The above syntax defines a combined parser with `MyTokenType`.
 37 |     
 38 |     # A combined parser is combined by several literal parsers and other combined parsers,
 39 |     #   which can handle very complicated cases of a sequence of `Ruikowa.ObjectRegex.Tokenizer` objects.    
 40 | 
 41 | - compile it
 42 | 
 43 | .. code :: shell
 44 | 
 45 |     ruiko parsing_tokenizing.ruiko parsing_tokenizing.py --test
 46 | 
 47 | - test it
 48 | 
 49 | .. code :: shell
 50 | 
 51 |     python test_lang.py parserToTest "abc233233"
 52 |     =========================ebnfparser test script================================ 
 53 |     parserToTest[
 54 |         [name: MyTokenType, string: "abc"]
 55 |         [name: MyTokenType, string: "233"]
 56 |         [name: MyTokenType, string: "233"]
 57 |     ]
 58 | 
 59 | Take care that if you're using anonymous literal pattern when definining a combined parser,
 60 | like the following:
 61 | 
 62 | .. code :: 
 63 | 
 64 |     Just ::= 'just'+;
 65 | 
 66 | Then name of all the anonymous tokenizers is just :code:`"auto_const"` :
 67 | 
 68 | .. code ::
 69 | 
 70 |     ruiko just.ruiko just --test
 71 |     python test_lang.py Just "justjustjust"
 72 |     =========================ebnfparser test script================================
 73 |     Just[
 74 |         [name: auto_const, string: "just"]
 75 |         [name: auto_const, string: "just"]
 76 |         [name: auto_const, string: "just"]
 77 |     ]
 78 | 
 79 | 
 80 | CastMap(Optional)
 81 | ------------------------
 82 | 
 83 | Sometimes we need special cases, a vivid instance is :code:`keyword` .
 84 | 
 85 | The string content of a :code:`keyword` could be also matched 
 86 | by :code:`identifier` (in most programming languages we have identifiers),
 87 | just as the following case:
 88 | 
 89 | - parsing_CastMap.ruiko
 90 | 
 91 | .. code ::
 92 | 
 93 | 
 94 |     ignore [space]
 95 |     space        := R'\s+';
 96 |     # ignore the whitespace characters.
 97 | 
 98 |     
 99 |     identifier   := R'[a-zA-Z_]{1}[a-zA-Z_0-9]*';
100 |     keyword      := 'def' 'for' 'public';
101 | 
102 |     parserToTest ::= (identifier | keyword)+;
103 | 
104 | There is no doubt that :code:`identifier` will cover the cases of :code:`keyword`
105 | 
106 | .. code :: shell
107 | 
108 |     ruiko parsing_CastMap.ruiko parsing_CastMap.py --test
109 |     python test.py parserToTest "def for public"
110 |     =========================ebnfparser test script================================ 
111 |     parserToTest[
112 |         [name: identifier, string: "def"]
113 |         [name: identifier, string: "for"]
114 |         [name: identifier, string: "public"]
115 |     ] 
116 | 
117 | 
118 | Take care that all of the Tokenizers have name **identifier**, not **keyword** !
119 | As as result, the keyword could be used in some illegal places, just like:
120 | 
121 | .. code ::
122 |     
123 |     for = 1
124 |     for for <- [for] do
125 |         for
126 | 
127 | The above example might not trouble you, but of course there could be something severer.
128 | 
129 | I'd like to give a solution adopted by EBNFParser auto-token.
130 | 
131 | (modify parsing_CastMap.ruiko
132 | 
133 | .. code ::
134 | 
135 |     identifier   := R'[a-zA-Z_]{1}[a-zA-Z_0-9]*';
136 |     keyword cast := 'def' 'for' 'public';
137 | 
138 | Here we define a :code:`cast map` that will map the string tokenized by :code:`identifier`(like
139 | :code:`"def"`, :code:`"for"` and :code:`"public"`) to a **const string**, and 
140 | output a :code:`Ruikowa.ObjectRegex.Tokenizer` which name is a **const string** :code:`"keyword"`.
141 | 
142 | .. code :: shell
143 | 
144 |     ruiko parsing_CastMap.ruiko parsing_CastMap.py --test
145 |     python test.py parserToTest "def for public other"
146 |     =========================ebnfparser test script================================ 
147 |      parserToTest[
148 |         [name: keyword, string: "def"]
149 |         [name: keyword, string: "for"]
150 |         [name: keyword, string: "public"]
151 |         [name: identifier, string: "other"]
152 |     ] 
153 | 
154 | 
155 | Perfect!
156 | 
157 | 
158 | ReStructure Tokenizers
159 | -----------------------------
160 | 
161 | This is what the word "parsing" accurately means.
162 | 
163 | Maybe you've heard about some sequence operation like 
164 | :code:`flatMap` (Scala-flatMap_) , :code:`collect` (FSharp-collect_) , :code:`selectMany` (Linq-SelectMany_),
165 | that's great, because parsing is its inverse!
166 | 
167 | .. code ::
168 |     
169 |     raw words : 
170 |     
171 |         ["def", "f", "(", "x", ")", "=", "x"]
172 |     
173 |     after parsing there is an AST:
174 | 
175 |         FunctionDef[ 
176 |             "f"   
177 |                   # "def" is thrown away because it's useless to semantics, but you can 
178 |                   # preserve it, causing noises. The same below.
179 |             ArgList[
180 |                 "x"
181 |             ],
182 | 
183 |             Expression[
184 |                 "x"
185 |             ]
186 |         ]
187 | 
188 | And structures of the parsed just match what you defined with EBNF_. 
189 | 
190 | Here is an example to generate above AST by using a EBNF idiom - :code:`ruiko` 
191 | which is proposed by EBNFParser to extend primary EBNF.
192 | 
193 | .. code :: ebnf
194 | 
195 |     keyword     cast as K       := 'def';
196 |     identifier                  := R'[a-zA-Z_]{1}[a-zA-Z_0-9]*';
197 |     FunctionDef throw ['def']   ::= K'def' identifier '(' ArgList ')' '=' Expression;
198 |     Expression                  ::= ... # omit
199 |     ArgList                     ::= ... # omit
200 |     
201 | 
202 | What's more, EBNFParser supports unlimited **left recursions**.   
203 | 
204 | .. _Scala-flatMap: https://www.scala-lang.org/api/current/?search=flatMap
205 | 
206 | .. _FSharp-collect: https://msdn.microsoft.com/en-us/visualfsharpdocs/conceptual/list.collect['t,'u]-function-[fsharp]
207 | 
208 | .. _Linq-SelectMany: https://msdn.microsoft.com/en-us/library/bb534336(v=vs.110).aspx
209 | 
210 | .. _EBNF: https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form
211 | 
212 | 


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
  1 | Quick Start
  2 | ================
  3 | 
  4 | 
  5 | 
  6 | Installing
  7 | --------------------------------
  8 | 
  9 | The EBNFParser only supports Python 3.6+ now.
 10 | 
 11 | You can install it by using **PyPI**.
 12 | 
 13 | .. code :: shell
 14 | 
 15 |     pip install -U EBNFParser
 16 | 
 17 | 
 18 | 
 19 | Hello World
 20 | --------------------------------
 21 | 
 22 | We can try to parse Lisp grammar syntax into AST(Abstract Synatx Tree) as our first attempt.
 23 | 
 24 | .. code :: lisp 
 25 | 
 26 |     (define add3 (x y z) 
 27 |               (add x
 28 |                   (add y z)))
 29 | 
 30 | 
 31 | Here is a source code example:
 32 | 
 33 | - lisp.ruiko
 34 | 
 35 | .. code ::
 36 | 
 37 |     ignore [space]  # ignore the tokens with this(these) name(s).
 38 | 
 39 |     space   := R'\s';
 40 | 
 41 |     Atom    := R'[^\(\)\s\`]+'; # use Regex
 42 | 
 43 |     Expr    ::= Atom
 44 |             | Quote
 45 |             | '(' Expr* ')';
 46 | 
 47 | 
 48 |     Quote   ::=  '`' Expr ;
 49 | 
 50 |     Stmts   ::= Expr*;
 51 | 
 52 | 
 53 | 
 54 | And then use it to generate a parser and make a test script automatically by EBNFParser.
 55 | 
 56 | Finally, test it.
 57 | 
 58 | .. code ::
 59 | 
 60 |     ruiko lisp.ruiko lisp_parser.py --test
 61 |     python test_lang.py Stmts "(definie f (x y z) (add (add x y) z))"
 62 |      =========================ebnfparser test script================================
 63 |     Stmts[
 64 |         Expr[
 65 |             [name: auto_const, string: "("]
 66 |             Expr[
 67 |                 [name: Atom, string: "definie"]
 68 |     ...(omit)
 69 | 
 70 | 
 71 | 
 72 | Integrate EBNFParser Into Your Own Project
 73 | ---------------------------------------------
 74 | 
 75 | For example, if we have generated the lisp parser file like the above as a module :code:`MyProject.Lisp.parser` .
 76 | 
 77 | .. code :: python
 78 | 
 79 |     from Ruikowa.ObjectRegex.ASTDef import Ast
 80 |     from Ruikowa.ErrorHandler import ErrorHandler
 81 |     from Ruikowa.ObjectRegex.MetaInfo import MetaInfo
 82 |     from Ruikowa.ObjectRegex.Tokenizer import Tokenizer
 83 | 
 84 |     from lisp_parser import Stmts, token_table
 85 | 
 86 |     import typing as t
 87 | 
 88 |     def token_func(src_code: str) -> t.Iterable[Tokenizer]:
 89 |         return Tokenizer.from_raw_strings(
 90 |             src_code, token_table, ({"space"}, {}))
 91 | 
 92 |     parser = ErrorHandler(Stmts.match, token_func)
 93 | 
 94 |     def parse(filename: str) -> Ast:
 95 | 
 96 |         return parser.from_file(filename)
 97 | 
 98 |     # just create a file `test.lisp` and write some lisp codes.
 99 |     print(parse("./test.lisp"))  
100 |     
101 | 
102 | 
103 | An :code:`Ruikowa.ObjectRegex.Ast` is a nested list of Tokenizers, for instance:
104 | 
105 | .. code ::
106 | 
107 |     AstName[
108 |         AstName[
109 |             Tokenizer1
110 |             Tokenizer2
111 |             AstName[
112 |                 ...
113 |             ]
114 |         ]
115 |         Tokenizer3
116 |     ]
117 | 
118 | You can use :code:`obj.name` to get the name of an instance of :code:`Ast` or :code:`Tokenizer`.
119 | 
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | # install
2 | bash INSTALL.sh
3 | 
4 | # ruiko
5 | bash testRuikowa.sh
6 | 


--------------------------------------------------------------------------------
/testRuikowa.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | if test -n $1
 3 | then
 4 | 	bash INSTALL.sh
 5 | 
 6 | fi
 7 | cd tests/Ruikowa/Lang
 8 | 
 9 | bash Lisp/testLisp.sh
10 | 


--------------------------------------------------------------------------------
/tests/Ruikowa/Lang/Lisp/grammar:
--------------------------------------------------------------------------------
 1 | ignore [N]
 2 | 
 3 | someConst cast as K := 'as' 'we' 'can';
 4 | 
 5 | N       := R'\n', R'\t', ' ';
 6 | 
 7 | Atom    := R'[^\(\)\s\`]+'; # use Regex
 8 | 
 9 | Expr    ::= Atom
10 |           | Quote
11 |           | '(' Expr* ')' (K'as' K'we' K'can');
12 | 
13 | 
14 | Quote   ::=  '`' Expr ;
15 | Stmt    ::= Expr*;
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/tests/Ruikowa/Lang/Lisp/pparser.py:
--------------------------------------------------------------------------------
 1 | # This file is automatically generated by EBNFParser.
 2 | from Ruikowa.ObjectRegex.Tokenizer import unique_literal_cache_pool, regex_matcher, char_matcher, str_matcher, Tokenizer
 3 | from Ruikowa.ObjectRegex.Node import AstParser, Ref, SeqParser, LiteralValueParser as L, LiteralNameParser, Undef
 4 | namespace = globals()
 5 | recur_searcher = set()
 6 | token_table = ((unique_literal_cache_pool["someConst"], str_matcher(('we', 'can', 'as'))),
 7 |                (unique_literal_cache_pool["N"], regex_matcher('\n')),
 8 |                (unique_literal_cache_pool["N"], regex_matcher('\t')),
 9 |                (unique_literal_cache_pool["N"], char_matcher((' '))),
10 |                (unique_literal_cache_pool["Atom"], regex_matcher('[^\(\)\s\`]+')),
11 |                (unique_literal_cache_pool["auto_const"], char_matcher(('`', ')', '('))),)
12 | 
13 | class UNameEnum:
14 | # names
15 | 
16 |     someConst_as = unique_literal_cache_pool['as']
17 |     someConst_we = unique_literal_cache_pool['we']
18 |     someConst_can = unique_literal_cache_pool['can']
19 |     someConst = unique_literal_cache_pool['someConst']
20 |     N = unique_literal_cache_pool['N']
21 |     Atom = unique_literal_cache_pool['Atom']
22 |     Expr = unique_literal_cache_pool['Expr']
23 |     Quote = unique_literal_cache_pool['Quote']
24 |     Stmt = unique_literal_cache_pool['Stmt']
25 |         
26 | cast_map = {'as': unique_literal_cache_pool['someConst'], 'we': unique_literal_cache_pool['someConst'], 'can': unique_literal_cache_pool['someConst']}
27 | token_func = lambda _: Tokenizer.from_raw_strings(_, token_table, ({"N"}, {}),cast_map=cast_map)
28 | someConst = LiteralNameParser('someConst')
29 | N = LiteralNameParser('N')
30 | Atom = LiteralNameParser('Atom')
31 | Expr = AstParser([Ref('Atom')],
32 |                  [Ref('Quote')],
33 |                  ['(', SeqParser([Ref('Expr')], at_least=0,at_most=Undef), ')', SeqParser([('someConst', 'as'), ('someConst', 'we'), ('someConst', 'can')], at_least=1,at_most=1)],
34 |                  name="Expr",
35 |                  to_ignore=({}, {}))
36 | Quote = AstParser(['`', Ref('Expr')],
37 |                   name="Quote",
38 |                   to_ignore=({}, {}))
39 | Stmt = AstParser([SeqParser([Ref('Expr')], at_least=0,at_most=Undef)],
40 |                  name="Stmt",
41 |                  to_ignore=({}, {}))
42 | Stmt.compile(namespace, recur_searcher)


--------------------------------------------------------------------------------
/tests/Ruikowa/Lang/Lisp/test.ast:
--------------------------------------------------------------------------------
 1 | Stmt[
 2 |     Expr[
 3 |         [name: :char, string: "("]
 4 |         Expr[
 5 |             [name: Atom, string: "+"]
 6 |         ]
 7 |         Expr[
 8 |             [name: Atom, string: "1"]
 9 |         ]
10 |         Expr[
11 |             [name: Atom, string: "2"]
12 |         ]
13 |         [name: :char, string: ")"]
14 |     ]
15 | ]


--------------------------------------------------------------------------------
/tests/Ruikowa/Lang/Lisp/test.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Stmt",
 3 |   "value": [
 4 |     {
 5 |       "name": "Expr",
 6 |       "value": [
 7 |         {
 8 |           "name": "auto_const",
 9 |           "string": "(",
10 |           "lineno": 1,
11 |           "colno": 5
12 |         },
13 |         {
14 |           "name": "Expr",
15 |           "value": [
16 |             {
17 |               "name": "Atom",
18 |               "string": "+",
19 |               "lineno": 1,
20 |               "colno": 6
21 |             }
22 |           ]
23 |         },
24 |         {
25 |           "name": "Expr",
26 |           "value": [
27 |             {
28 |               "name": "Atom",
29 |               "string": "1",
30 |               "lineno": 1,
31 |               "colno": 8
32 |             }
33 |           ]
34 |         },
35 |         {
36 |           "name": "Expr",
37 |           "value": [
38 |             {
39 |               "name": "Atom",
40 |               "string": "2",
41 |               "lineno": 1,
42 |               "colno": 10
43 |             }
44 |           ]
45 |         },
46 |         {
47 |           "name": "auto_const",
48 |           "string": ")",
49 |           "lineno": 1,
50 |           "colno": 11
51 |         },
52 |         {
53 |           "name": "someConst",
54 |           "string": "as",
55 |           "lineno": 1,
56 |           "colno": 14
57 |         },
58 |         {
59 |           "name": "someConst",
60 |           "string": "we",
61 |           "lineno": 1,
62 |           "colno": 17
63 |         },
64 |         {
65 |           "name": "someConst",
66 |           "string": "can",
67 |           "lineno": 1,
68 |           "colno": 21
69 |         }
70 |       ]
71 |     }
72 |   ]
73 | }


--------------------------------------------------------------------------------
/tests/Ruikowa/Lang/Lisp/testLisp.sh:
--------------------------------------------------------------------------------
1 | cd Lisp
2 | 
3 | ruiko grammar pparser.py
4 | 
5 | python test_lang.py Stmt "
6 |     (+ 1 2) as we can
7 | " -o test.json --testTk
8 | 


--------------------------------------------------------------------------------
/tests/Ruikowa/Lang/Lisp/test_lang.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # This file is automatically generated by EBNFParser.
 3 | import argparse, json
 4 | 
 5 | cmd_parser = argparse.ArgumentParser(description='test language parsers swiftly.')
 6 | cmd_parser.add_argument("parser", type=str,
 7 |                        help='What kind of parser do you want to test with?(e.g Stmt, Expr, ...)')
 8 | cmd_parser.add_argument("codes", metavar='codes', type=str,
 9 |                         help='input some codes in your own language here.')
10 | cmd_parser.add_argument('-o', help='output. support .json and .ast suffix.', type=str)
11 | cmd_parser.add_argument("--testTk", nargs='?', default=False, const=True)
12 | cmd_parser.add_argument('--debug', nargs='?', default=False, const=True,
13 |                        help='print tokens of grammar file?')
14 | 
15 | args = cmd_parser.parse_args()
16 |                        
17 | if args.debug:
18 |     from Ruikowa.Config import Debug
19 |     Debug.append(1)         
20 |     
21 | from Ruikowa.ErrorHandler import ErrorHandler, Colored
22 | from Ruikowa.ObjectRegex.ASTDef import Ast
23 | from Ruikowa.io import grace_open
24 | from pparser import *              
25 | print(Colored.Green,'=========================ebnfparser test script================================', Colored.Clear)
26 | 
27 | print_token = args.testTk
28 | ast: Ast = ErrorHandler(eval(args.parser).match, token_func).from_source_code('<input>', args.codes, print_token=print_token)
29 | print(Colored.Blue, ast, Colored.Clear)
30 | if args.o:
31 |     o: str = args.o.lower()
32 |     if o.endswith('.json'):
33 |         grace_open(o).write(json.dumps(ast.dump_to_json(), indent=2))
34 |     elif o.endswith('.ast'):
35 |         grace_open(o).write(ast.dump())
36 |     else:
37 |         raise Exception('Unsupported file ext.')    
38 | 
39 |     


--------------------------------------------------------------------------------
/tests/Ruikowa/test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from Ruikowa.ObjectRegex.Node import Ast, Ref, LiteralParser, CharParser, SeqParser, AstParser
 3 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo
 4 | from Ruikowa.Core.BaseDef import Trace
 5 | inputs = ['a', '\n', 'abc']
 6 | charParser1 = CharParser('a')
 7 | charParser2 = CharParser('\n')
 8 | litParser   = LiteralParser.RawFormDealer(rawStr='abc', name = 'ABC')
 9 | meta = MetaInfo()
10 | assert charParser1.match(inputs, meta) is 'a'
11 | assert litParser.match(inputs, meta)   is None
12 | assert charParser2.match(inputs, meta) is '\n'
13 | assert litParser.match(inputs, meta)   == 'abc'
14 | 
15 | a = LiteralParser('a', name = 'a')
16 | c = LiteralParser('c', name = 'c')
17 | d = LiteralParser('d', name = 'd')
18 | ASeq = AstParser([Ref('U'), d],[a], name = 'ASeq')
19 | U    = AstParser([Ref('ASeq'), c],  name = 'U')
20 | namespace = globals()
21 | seset     = set()
22 | ASeq.compile(namespace, seset)
23 | x = MetaInfo()
24 | print(ASeq.match(['a', 'c','d','c','d','k'], x))
25 | 
26 | 
27 | a = LiteralParser('a', name = 'a')
28 | c = LiteralParser('c', name = 'c')
29 | d = LiteralParser('d', name = 'd')
30 | ASeq = AstParser([Ref('ASeq'), d],[a], name = 'ASeq')
31 | #U    = AstParser([Ref('ASeq'), c],  name = 'U')
32 | namespace = globals()
33 | seset     = set()
34 | ASeq.compile(namespace, seset)
35 | x = MetaInfo()
36 | print(ASeq.match(['a', 'd','d','d','d','d','g'], x).dump_to_json())
37 | 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/tests/Ruikowa/testBootstrap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Oct 17 20:07:44 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | 
 9 | from Ruikowa.Bootstrap.Parser import *
10 | from Ruikowa.ObjectRegex.MetaInfo import *
11 | from Ruikowa.Bootstrap.Ast import ast_for_stmts
12 | words = token.findall(r"""Token {{
13 | def token(input_str):
14 |     return list(input_str)
15 | }}
16 | Stmt Throw ['\n'] ::= (NEWLINE* Equals* NEWLINE*)*
17 | Expr    ::= Or ('|' Or)*
18 | Or      ::= AtomExpr+
19 | AtomExpr::= Atom [Trailer] 
20 | Atom    ::= Str | Name | '[' Expr ']' | '(' Expr ')' 
21 | Equals ::= Name LitDef Str | Name Def Expr
22 | Trailer::= '*' | '+' | '{' Number{1 2} '}'
23 | Def    := '::='
24 | LitDef := ':='
25 | Str    := R'"[\w|\W]*?"'
26 | Name   := R'[a-zA-Z_][a-zA-Z0-9]*'
27 | Number := R'\d+'
28 | NEWLINE:= '\n'""")
29 | meta = MetaInfo()
30 | res = Stmt.match(words, meta)
31 | print(res)
32 | 


--------------------------------------------------------------------------------
/tests/Ruikowa/testCycleLeftRecur.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Oct 17 20:03:23 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | 
 9 | from Ruikowa.ObjectRegex.Node import Ast, Ref, LiteralParser, CharParser, SeqParser, AstParser
10 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo
11 | from Ruikowa.Core.BaseDef import Trace
12 | a = LiteralParser('a', name = 'a')
13 | c = LiteralParser('c', name = 'c')
14 | d = LiteralParser('d', name = 'd')
15 | ASeq = AstParser([Ref('U'), d],[a], name = 'ASeq')
16 | U    = AstParser([Ref('ASeq'), c],  name = 'U')
17 | namespace = globals()
18 | seset     = set()
19 | ASeq.compile(namespace, seset)
20 | x = MetaInfo()
21 | print('test result => ')
22 | print(ASeq.match(['a', 'c','d','c','d','k'], x))


--------------------------------------------------------------------------------
/tests/Ruikowa/testCycleLeftRecur3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu Oct 19 18:38:03 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | 
 9 | from Ruikowa.ObjectRegex.Node import Ref, AstParser, SeqParser, LiteralParser, CharParser, MetaInfo
10 | import re
11 | token = re.compile("t|\)|\(").findall
12 | namespace     = globals()
13 | recurSearcher = set()
14 | type = LiteralParser('t', name = 'type')
15 | prefix = AstParser([Ref('prefix'),
16 |                     LiteralParser('(', name='LP'),
17 |                     SeqParser([Ref('prefix')]),
18 |                     LiteralParser(')', name='RP')],
19 |                     [Ref('type')], name = 'prefix')
20 |                     
21 | prefix.compile(namespace, recurSearcher)


--------------------------------------------------------------------------------
/tests/Ruikowa/testCycleLeftRecurAndDumpToJSON.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Oct 17 20:06:10 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | 
 9 | from Ruikowa.ObjectRegex.Node import Ast, Ref, LiteralParser, CharParser, SeqParser, AstParser
10 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo
11 | from Ruikowa.Core.BaseDef import Trace
12 | 
13 | a = LiteralParser('a', name = 'a')
14 | c = LiteralParser('c', name = 'c')
15 | d = LiteralParser('d', name = 'd')
16 | ASeq = AstParser([Ref('ASeq'), d],[a], name = 'ASeq')
17 | 
18 | 
19 | #U    = AstParser([Ref('ASeq'), c],  name = 'U')
20 | namespace = globals()
21 | seset     = set()
22 | ASeq.compile(namespace, seset)
23 | x = MetaInfo()
24 | print(ASeq.match(['a', 'd','d','d','d','d'], x))


--------------------------------------------------------------------------------
/tests/Ruikowa/testLiteralParser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Oct 17 20:03:08 2017
 5 | 
 6 | @author: misakawa
 7 | """
 8 | 
 9 | from Ruikowa.ObjectRegex.Node import Ast, Ref, LiteralParser, CharParser, SeqParser, AstParser
10 | from Ruikowa.ObjectRegex.MetaInfo import MetaInfo
11 | from Ruikowa.Core.BaseDef import Trace
12 | inputs = ['a', '\n', 'abc']
13 | charParser1 = CharParser('a')
14 | charParser2 = CharParser('\n')
15 | litParser   = LiteralParser.RawFormDealer(rawStr='abc', name = 'ABC')
16 | meta = MetaInfo()
17 | assert charParser1.match(inputs, meta) is 'a'
18 | assert litParser.match(inputs, meta)   is None
19 | assert charParser2.match(inputs, meta) is '\n'
20 | assert litParser.match(inputs, meta)   == 'abc'


--------------------------------------------------------------------------------