├── .gitignore
├── .travis.yml
├── CONTRIBUTIONS
├── LICENSE
├── MANIFEST.in
├── README.md
├── documentation
    └── example.md
├── pydsl
    ├── __init__.py
    ├── check.py
    ├── contrib
    │   ├── __init__.py
    │   ├── alphabet
    │   │   ├── __init__.py
    │   │   └── test_alphabet.py
    │   ├── bnfgrammar.py
    │   ├── grammar
    │   │   ├── Date.bnf
    │   │   ├── Date.parsley
    │   │   ├── DayOfMonth.py
    │   │   ├── Grammar2RecursiveDescentParserRecognizer.py
    │   │   ├── HTMLTable.bnf
    │   │   ├── ImageFile.py
    │   │   ├── LogicalExpression.bnf
    │   │   ├── MimeType.py
    │   │   ├── SpanishID.py
    │   │   ├── TrueFalse.bnf
    │   │   ├── TrueHTMLTable.bnf
    │   │   ├── __init__.py
    │   │   ├── calc_ply.py
    │   │   ├── cstring.py
    │   │   ├── example_ply.py
    │   │   ├── integerop.py
    │   │   ├── logline.bnf
    │   │   ├── mongoquery.bnf
    │   │   └── protocol.py
    │   ├── mongogrammar.py
    │   ├── regexps.py
    │   ├── spark
    │   │   ├── spark_example.py
    │   │   ├── spark_parse_example.py
    │   │   └── spark_scan_example.py
    │   └── translator
    │   │   ├── calculator.py
    │   │   ├── calculator_bnf.py
    │   │   ├── chemicalFormulas.py
    │   │   └── echo.py
    ├── diff.py
    ├── encoding.py
    ├── equal.py
    ├── exceptions.py
    ├── external
    │   ├── __init__.py
    │   └── spark.py
    ├── extract.py
    ├── file
    │   ├── BNF.py
    │   ├── __init__.py
    │   ├── parsley.py
    │   ├── python.py
    │   └── regexp.py
    ├── grammar
    │   ├── BNF.py
    │   ├── PEG.py
    │   ├── __init__.py
    │   ├── definition.py
    │   ├── parsley.py
    │   └── symbol.py
    ├── guess.py
    ├── lex.py
    ├── parser
    │   ├── LL.py
    │   ├── LR0.py
    │   ├── PEG.py
    │   ├── README.md
    │   ├── __init__.py
    │   ├── backtracing.py
    │   └── parser.py
    ├── token.py
    ├── translator.py
    └── tree.py
├── requirements.txt
├── setup.py
└── tests
    ├── FOL.g
    ├── __init__.py
    ├── functional
        ├── __init__.py
        ├── test_Binary.py
        ├── test_Case.py
        └── test_LogicGrammars.py
    └── unit
        ├── __init__.py
        ├── test_Alphabet.py
        ├── test_BNF.py
        ├── test_BNFLoad.py
        ├── test_Checker.py
        ├── test_Diff.py
        ├── test_Equal.py
        ├── test_Extract.py
        ├── test_GrammarDefinition.py
        ├── test_Guess.py
        ├── test_Lexer.py
        ├── test_PEG.py
        ├── test_Parser.py
        ├── test_Parsley.py
        ├── test_RegularExpression.py
        ├── test_Translate.py
        └── test_Tree.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[co]
 2 | tags
 3 | pylintrc
 4 | .*.swp
 5 | *.png
 6 | *~
 7 | build
 8 | .coverage
 9 | htmlcov
10 | comp
11 | dist
12 | *.egg-info
13 | *.out
14 | *.dictc
15 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.4"
 4 |   - "3.5"
 5 | # command to install dependencies
 6 | install:
 7 |   - "pip install ."
 8 |   - "pip install -r requirements.txt"
 9 | # command to run tests
10 | script:
11 |   - nosetests tests
12 |   - pylint -E pydsl/
13 | 


--------------------------------------------------------------------------------
/CONTRIBUTIONS:
--------------------------------------------------------------------------------
1 |  * check existing issues: https://github.com/nesaro/pydsl
2 |  * read the project's blog: http://pydsl.blogspot.co.uk
3 |  * pull requests :)
4 | 
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include pydsl/contrib/dict *.dict
2 | recursive-include pydsl/contrib/grammar *.bnf *.re *.parsley
3 | exclude tests
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | DESCRIPTION
 2 | ===========
 3 | 
 4 | pydsl is a language library written in python. It provides some verbs for Grammars.
 5 | 
 6 |     from pydsl.file.BNF import load_bnf_file
 7 |     grammardefinition = loand_bnf_file('myfile.bnf')
 8 |     grammardefinition.alphabet # Returns the alphabet used by this grammar
 9 |     grammardefinition.minsize
10 |     grammardefinition.maxsize 
11 |     grammardefinition.enumerate # Returns a generator that generates every accepted word
12 | 
13 |     from pydsl.check import check
14 |     check(grammardefinition,'mystring') # returns True or False
15 | 
16 |     from pydsl.parser import parse
17 |     parse(grammardefinition,'mystring') # returns a ParseTree
18 | 
19 |     from pydsl.extract import extract
20 |     extract(grammardefinition,'abcmystringabc') # returns ('mystring',3,11)
21 | 
22 | FORMATS
23 | =======
24 | 
25 | Functions
26 | ---------
27 | 
28 | | Format  |Check|Match|Search|Split|Extract|Translate|Validate|Diff|
29 | | ------- |:---:|-----|------|-----|:-----:|:-------:|:------:|----|
30 | | BNF     | V   |     |      |     |V      | Parse   | V      |    |
31 | | regexp  | V   |     |      |     |V      | X       |        |    |
32 | | ply     | V   |     |      |     |V      | V       |        |    |
33 | | parsley | V   |     |      |     |V      | V       |        |    |
34 | |pyparsing| V   |     |      |     |V      |         |        |    |
35 | 
36 | Properties
37 | ----------
38 | 
39 | | Format  |First|Min|Max|Enumerate|
40 | | ------- |:---:|---|---|---------|
41 | | BNF     | V   |   |   |         |
42 | | regexp  |     |   |   |         |
43 | | ply     |     |   |   |         |
44 | | parsley |     |   |   |         |
45 | |pyparsing|     |   |   |         |
46 | 
47 | 
48 | INSTALLATION
49 | ============
50 |  * disttools:
51 |    * python3 setup.py install
52 |  * pip:
53 |    * pip install pydsl
54 | 
55 | CONTRIBUTIONS
56 | =============
57 |  * check existing issues: https://github.com/nesaro/pydsl
58 |  * read the project's blog: http://pydsl.blogspot.co.uk
59 |  * pull requests :)
60 | 
61 | 
62 | REQUIREMENTS
63 | ============
64 |  * python >= 3.4
65 |  * optional: ply library ( http://www.dabeaz.com/ply/ )
66 | 
67 | ## Example
68 | 
69 | See [examples](documentation/example.md)
70 | 
71 | ABOUT
72 | =====
73 | Copyright (C) 2008-2015 Nestor Arocha (nesaro@gmail.com)
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/documentation/example.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from pydsl.file.BNF import load_bnf_file
 3 | from pydsl.file.python import load_python_file
 4 | from pydsl.translator import translator_factory
 5 | truefalse = load_bnf_file('pydsl/contrib/grammar/TrueFalse.bnf')
 6 | grammardefinition = load_bnf_file('pydsl/contrib/grammar/LogicalExpression.bnf', {'TrueFalse':truefalse})
 7 | grammardefinition.alphabet # Returns the alphabet used by this grammar
 8 | grammardefinition.minsize
 9 | grammardefinition.maxsize
10 | from pydsl.check import check
11 | check(grammardefinition,['(']) # returns True or False
12 | from pydsl.parser import parse
13 | parse(grammardefinition,[')']) # returns a ParseTree
14 | from pydsl.extract import extract
15 | extract(grammardefinition,'abc()abc') # returns ('False',3,11)
16 | solver = translator_factory(load_python_file('pydsl/contrib/translator/echo.py'))
17 | mystring = "True||False"
18 | result = solver(mystring)
19 | print(result)
20 | ```
21 | 
22 | 


--------------------------------------------------------------------------------
/pydsl/__init__.py:
--------------------------------------------------------------------------------
1 | VERSION = (0,5,3)
2 | 


--------------------------------------------------------------------------------
/pydsl/check.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | #This file is part of pydsl.
  4 | #
  5 | #pydsl is free software: you can redistribute it and/or modify
  6 | #it under the terms of the GNU General Public License as published by
  7 | #the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | #pydsl is distributed in the hope that it will be useful,
 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #GNU General Public License for more details.
 14 | #
 15 | #You should have received a copy of the GNU General Public License
 16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | 
 19 | __author__ = "Nestor Arocha"
 20 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
 21 | __email__ = "nesaro@gmail.com"
 22 | 
 23 | import logging
 24 | from collections import Iterable
 25 | from jsonschema import FormatChecker
 26 | LOG = logging.getLogger(__name__)
 27 | 
 28 | 
 29 | def check(definition, data, *args, **kwargs):
 30 |     """Checks if the input follows the definition"""
 31 |     checker = checker_factory(definition)
 32 |     return checker(data, *args, **kwargs)
 33 | 
 34 | def checker_factory(grammar):
 35 |     from pydsl.grammar.BNF import BNFGrammar
 36 |     from pydsl.grammar.PEG import Sequence, Choice, OneOrMore, ZeroOrMore
 37 |     from pydsl.grammar.definition import PLYGrammar, RegularExpression, String, PythonGrammar, JsonSchema
 38 |     from pydsl.grammar.parsley import ParsleyGrammar
 39 |     if isinstance(grammar, str) and not isinstance(grammar, String):
 40 |         raise TypeError(grammar)
 41 |     if isinstance(grammar, BNFGrammar):
 42 |         return BNFChecker(grammar)
 43 |     elif isinstance(grammar, JsonSchema):
 44 |         return JsonSchemaChecker(grammar)
 45 |     elif isinstance(grammar, RegularExpression):
 46 |         return RegularExpressionChecker(grammar)
 47 |     elif isinstance(grammar, PythonGrammar) or isinstance(grammar, dict) and "matchFun" in grammar:
 48 |         return PythonChecker(grammar)
 49 |     elif isinstance(grammar, PLYGrammar):
 50 |         return PLYChecker(grammar)
 51 |     elif isinstance(grammar, Choice):
 52 |         return ChoiceChecker(grammar)
 53 |     elif isinstance(grammar, ParsleyGrammar):
 54 |         return ParsleyChecker(grammar)
 55 |     elif isinstance(grammar, String):
 56 |         return StringChecker(grammar)
 57 |     elif isinstance(grammar, Sequence):
 58 |         return SequenceChecker(grammar)
 59 |     elif isinstance(grammar, OneOrMore):
 60 |         return OneOrMoreChecker(grammar)
 61 |     elif isinstance(grammar, ZeroOrMore):
 62 |         return ZeroOrMoreChecker(grammar)
 63 |     elif isinstance(grammar, Iterable):
 64 |         return ChoiceChecker(grammar)
 65 |     else:
 66 |         raise ValueError(grammar)
 67 | 
 68 | 
 69 | class Checker(object):
 70 |     """ Ensures that input follows a rule, protocol, grammar alphabet..."""
 71 |     def __init__(self):
 72 |         pass
 73 | 
 74 |     def __call__(self, *args, **kwargs):
 75 |         return self.check(*args, **kwargs)
 76 | 
 77 |     def check(self, value):# -> bool:
 78 |         raise NotImplementedError
 79 | 
 80 | class RegularExpressionChecker(Checker):
 81 |     def __init__(self, regexp, flags = ""):
 82 |         Checker.__init__(self)
 83 |         import re
 84 |         self.__regexpstr = regexp
 85 |         myflags = 0
 86 |         if "i" in flags:
 87 |             myflags |= re.I
 88 |         if isinstance(regexp, str):
 89 |             self.__regexp = re.compile(regexp, myflags)
 90 |         else:
 91 |             self.__regexp = regexp
 92 | 
 93 |     def check(self, data):
 94 |         """returns True if any match any regexp"""
 95 |         if isinstance(data, Iterable):
 96 |             data = "".join(str(x) for x in data)
 97 |         try:
 98 |             data = str(data)
 99 |         except UnicodeDecodeError:
100 |             return False
101 |         return bool(data and self.__regexp.match(data))
102 | 
103 | 
104 | class BNFChecker(Checker):
105 |     """Calls another program to perform checking. Args are always file names"""
106 |     def __init__(self, bnf, parser = None):
107 |         Checker.__init__(self)
108 |         self.gd = bnf
109 |         parser = bnf.options.get("parser", parser)
110 |         if parser in ("descent", "auto", "default", None):
111 |             from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser
112 |             self.__parser = BacktracingErrorRecursiveDescentParser(bnf)
113 |         else:
114 |             raise ValueError("Unknown parser : " + parser)
115 | 
116 |     def check(self, data):
117 |         if isinstance(data, str):
118 |             from pydsl.token import PositionToken
119 |             from pydsl.encoding import ascii_encoding
120 |             data = [PositionToken(x, ascii_encoding, i, i+1) for i,x in enumerate(data)]
121 |         if not isinstance(data, Iterable):
122 |             raise TypeError(data)
123 |         if not all(check(self.gd.alphabet, [x]) for x in data):
124 |             LOG.warning("Invalid input: %s,%s" % (self.gd.alphabet, data))
125 |             return False
126 |         try:
127 |             return len(self.__parser.get_trees(data)) > 0
128 |         except IndexError:
129 |             return False 
130 | 
131 | class ParsleyChecker(Checker):
132 |     def __init__(self, grammar):
133 |         Checker.__init__(self)
134 |         self.gd=grammar
135 | 
136 |     def check(self, data):
137 |         from parsley import ParseError
138 |         try:
139 |             getattr(self.gd.grammar(data), self.gd.root_rule)() #call grammar(data).root_rule()
140 |             return True
141 |         except ParseError:
142 |             return False
143 | 
144 | class PythonChecker(Checker):
145 |     def __init__(self, module):
146 |         Checker.__init__(self)
147 |         self._matchFun = module["matchFun"]
148 | 
149 |     def check(self, data):
150 |         return self._matchFun(data)
151 | 
152 | 
153 | class PLYChecker(Checker):
154 |     def __init__(self, gd):
155 |         Checker.__init__(self)
156 |         self.module = gd.module
157 | 
158 |     def check(self, data):
159 |         if isinstance(data, Iterable):
160 |             data = "".join([str(x) for x in data])
161 |         from ply import yacc, lex
162 |         lexer = lex.lex(self.module)
163 |         parser = yacc.yacc(module = self.module)
164 |         from pydsl.exceptions import ParseError
165 |         try:
166 |             parser.parse(data, lexer = lexer)
167 |         except ParseError:
168 |             return False
169 |         return True
170 | 
171 | class StringChecker(Checker):
172 |     def __init__(self, gd):
173 |         Checker.__init__(self)
174 |         self.gd = gd
175 | 
176 |     def check(self, data):
177 |         if isinstance(data, Iterable) and not isinstance(data, str):
178 |             data = "".join([str(x) for x in data])
179 |         if not isinstance(data, str):
180 |             raise TypeError(data.__class__.__name__)
181 |         return self.gd == str(data)
182 | 
183 | def formatchecker_factory(**checkerdict):
184 |     """Converts a dictionary of strings:checkers into a formatchecker object"""
185 |     fc = FormatChecker()
186 |     for format_name, checker in checkerdict.items():
187 |         fc.checks(format_name)(checker)
188 |     return fc
189 | 
190 | 
191 | class JsonSchemaChecker(Checker):
192 |     def __init__(self, gd, formatdict = None):
193 |         Checker.__init__(self)
194 |         self.gd = gd
195 |         formatdict = formatdict or {}
196 |         self.formatchecker = formatchecker_factory(**formatdict)
197 | 
198 |     def check(self, data, raise_exceptions = False):
199 |         from jsonschema import validate, ValidationError
200 |         try:
201 |             validate(data, self.gd, format_checker = self.formatchecker)
202 |         except ValidationError:
203 |             if raise_exceptions:
204 |                 raise
205 |             return False
206 |         return True
207 | 
208 | class ChoiceChecker(Checker):
209 |     def __init__(self, gd):
210 |         Checker.__init__(self)
211 |         self.gd = gd
212 |         self.checkerinstances = [checker_factory(x) for x in self.gd]
213 | 
214 |     def check(self, data):
215 |         if not isinstance(data, Iterable):
216 |             raise TypeError(data.__class__.__name__)
217 |         return any((x.check(data) for x in self.checkerinstances))
218 | 
219 | class SequenceChecker(Checker):
220 |     def __init__(self, sequence):
221 |         Checker.__init__(self)
222 |         from pydsl.grammar import Grammar
223 |         for x in sequence:
224 |             if not isinstance(x, Grammar):
225 |                 raise TypeError("Expected grammar, got %s" % (x.__class__.__name__,))
226 |         self.sequence = sequence
227 | 
228 |     def check(self, data):
229 |         if not isinstance(data, Iterable):
230 |             raise TypeError(data.__class__.__name__)
231 |         if len(self.sequence) != len(data):
232 |             return False
233 |         return all(check(self.sequence[x], [data[x]]) for x in range(len(self.sequence)))
234 | 
235 | 
236 | class OneOrMoreChecker(Checker):
237 |     def __init__(self, element):
238 |         Checker.__init__(self)
239 |         self.element = element
240 | 
241 |     def check(self, data):
242 |         return bool(data) and all(check(self.element.element, x) for x in data)
243 | 
244 | class ZeroOrMoreChecker(Checker):
245 |     def __init__(self, element):
246 |         Checker.__init__(self)
247 |         self.element = element
248 | 
249 |     def check(self, data):
250 |         return all(check(self.element.element, x) for x in data)
251 | 


--------------------------------------------------------------------------------
/pydsl/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/pydsl/contrib/__init__.py


--------------------------------------------------------------------------------
/pydsl/contrib/alphabet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/pydsl/contrib/alphabet/__init__.py


--------------------------------------------------------------------------------
/pydsl/contrib/alphabet/test_alphabet.py:
--------------------------------------------------------------------------------
1 | grammarlist = ["integer","Date"]
2 | iclass = "AlphabetList"
3 | 


--------------------------------------------------------------------------------
/pydsl/contrib/bnfgrammar.py:
--------------------------------------------------------------------------------
 1 | """BNF grammars for testing"""
 2 | 
 3 | from pydsl.grammar.symbol import TerminalSymbol, NonTerminalSymbol, NullSymbol
 4 | from pydsl.grammar.BNF import Production, BNFGrammar
 5 | from pydsl.file.BNF import strlist_to_production_set
 6 | from pydsl.file.python import load_python_file
 7 | from pydsl.grammar.definition import String, RegularExpression
 8 | 
 9 | leftrecursive=["S ::= E","E ::= E dot | dot","dot := String,."]
10 | rightrecursive=["S ::= E","E ::= dot E | dot","dot := String,."]
11 | centerrecursive=["S ::= E","E ::= dot E dot | dot","dot := String,."]
12 | 
13 | #productionset0 definition
14 | 
15 | symbol1 = TerminalSymbol(String("S"))
16 | symbol2 = TerminalSymbol(String("R"))
17 | final1 = NonTerminalSymbol("exp")
18 | rule1 = Production([final1], (symbol1, symbol2))
19 | productionset0 = BNFGrammar(final1, (rule1,symbol1,symbol2))
20 | p0good = "SR"
21 | p0bad = "RS"
22 | 
23 | 
24 | #productionset1 definition
25 | symbol1 = TerminalSymbol(String("S"))
26 | symbol2 = TerminalSymbol(String("R"))
27 | symbol3 = TerminalSymbol(String(":"))
28 | symbol4 = TerminalSymbol(RegularExpression("^[0123456789]*$"))
29 | symbol5 = TerminalSymbol(load_python_file('pydsl/contrib/grammar/cstring.py'))
30 | final1 = NonTerminalSymbol("storeexp") 
31 | final2 = NonTerminalSymbol("retrieveexp") 
32 | final3 = NonTerminalSymbol("exp")
33 | rule1 = Production([final1], (symbol1, symbol3, symbol5))
34 | rule2 = Production([final2], (symbol2, symbol3, symbol4))
35 | rule3 = Production([final3], [final1])
36 | rule4 = Production([final3], [final2])
37 | rulelist = (rule1, rule2, rule3, rule4, symbol1, symbol2, symbol3, symbol4, symbol5)
38 | productionset1 = BNFGrammar(final3, rulelist)
39 | 
40 | #productionset2 definition
41 | symbola = TerminalSymbol(String("A"))
42 | symbolb = TerminalSymbol(String("B"))
43 | nonterminal = NonTerminalSymbol("res")
44 | rulea = Production ((nonterminal,), (symbola, NullSymbol(), symbolb))
45 | productionset2 = BNFGrammar(nonterminal, (rulea, symbola, symbolb))
46 | productionsetlr = strlist_to_production_set(leftrecursive)
47 | productionsetrr = strlist_to_production_set(rightrecursive)
48 | productionsetcr = strlist_to_production_set(centerrecursive)
49 | 
50 | #arithmetic
51 | 
52 | 
53 | arithmetic=["E ::= E plus T | T", "T ::= T times F | F" ,"F ::= open_parenthesis E close_parenthesis | id", "id := String,123" , "plus := String,+", "times := String,*", "open_parenthesis := String,(","close_parenthesis := String,)"]
54 | productionset_arithmetic = strlist_to_production_set(arithmetic, start_symbol= "E")
55 | 
56 | addition=["S ::= E","E ::= E plus F | F" ,"F ::= open_parenthesis E close_parenthesis | id", "id := String,123" , "plus := String,+", "open_parenthesis := String,(","close_parenthesis := String,)"]
57 | productionset_addition = strlist_to_production_set(addition)
58 | #tokenlist definition
59 | string1 = "S:a"
60 | string2 = "S:"
61 | string3 = "AB"
62 | string4 = "AAB"
63 | string5 = "ACB"
64 | dots = "....."
65 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/Date.bnf:
--------------------------------------------------------------------------------
1 | //Date
2 | #parser=descent
3 | 
4 | S ::= dayofmonth separator number separator number
5 | separator := String,/
6 | number := Word,integer,max,1
7 | dayofmonth := Word,DayOfMonth,max,1
8 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/Date.parsley:
--------------------------------------------------------------------------------
1 | separator = '/'
2 | number = digit{1, 2}:n -> int(''.join(n))
3 | dayofmonth = number:n ?(DayOfMonth(n)) ->n
4 | expr=dayofmonth:d separator number:m separator number:y ->(d,m,y)
5 | 
6 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/DayOfMonth.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | #Copyright (C) 2008-2014 Nestor Arocha
 5 | 
 6 | 
 7 | 
 8 | def matchFun(myinput):
 9 |     from collections import Iterable
10 |     if isinstance(myinput, Iterable):
11 |         myinput = "".join([str(x) for x in myinput])
12 |     strnumber = str(myinput)
13 |     try:
14 |         number = int(strnumber)
15 |     except ValueError:
16 |         return False
17 |     if 0 < number < 32:
18 |         return True
19 |     return False
20 | 
21 | iclass = "PythonGrammar"
22 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/Grammar2RecursiveDescentParserRecognizer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """Grammar 2 Recursive Descent Parser Recognizer
 5 | First recipe of the book "Language implementation patterns
 6 | 
 7 | grammar NestedNameList;
 8 | list : '[' elements ']' ; // match bracketed list
 9 | elements : element (',' element)* ; // match comma-separated list
10 | element : NAME | list ; // element is name or nested list
11 | NAME : ('a'..'z' |'A'..'Z' )+ ; // NAME is sequence of >=1 letter
12 | """
13 | 
14 | 
15 | def matchFun(inputstr):
16 |     def look_ahead(tl):
17 |         if tl[0] == "[":
18 |             return "list"
19 |         elif tl[0] == ",":
20 |             return ","
21 | 
22 |     def mlist(tl):
23 |         if tl.pop(0) != "[":
24 |             return False
25 |         if not elements(tl):
26 |             return False
27 |         if tl.pop(0) != "]":
28 |             return False
29 |         return True
30 | 
31 |     def elements(tl):
32 |         if not element(tl):
33 |             return False
34 |         while look_ahead(tl) == ",":
35 |             tl.pop(0)
36 |             if not element(tl):
37 |                 return False
38 |         return True
39 | 
40 |     def element(tl):
41 |         if look_ahead(tl) == "list":
42 |             if not mlist(tl):
43 |                 return False
44 |         else:
45 |             if not name(tl):
46 |                 return False
47 |         return True
48 | 
49 |     def name(tl):
50 |         import re
51 |         if not re.match("[a-zA-Z]", tl.pop(0)):
52 |             return False
53 |         while tl and re.match("[a-zA-Z]", tl[0]):
54 |             tl.pop(0)
55 |         return True
56 | 
57 |     inputlist = [x for x in inputstr]
58 |     return element(inputlist) and not len(inputlist)
59 | 
60 | 
61 | iclass = "PythonGrammar"
62 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/HTMLTable.bnf:
--------------------------------------------------------------------------------
 1 | //HTML table Grammar
 2 | 
 3 | S ::= btable Trblock etable
 4 | Trblock ::= btr Tdblock etr 
 5 | Tdblock ::= btd etd 
 6 | btable := String,<table>
 7 | etable := String,</table>
 8 | btr := String,<tr>
 9 | etr := String,</tr>
10 | btd := String,<td>
11 | etd := String,</td>
12 | // identifier := Word,Integer,max,1
13 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/ImageFile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | #copyright (c) 2008-2013 Nestor Arocha
 5 | 
 6 | """Image file recognizer"""
 7 | 
 8 | def matchFun(input):
 9 |     content = input #assuming bytes
10 |     import imghdr
11 |     try:
12 |         return bool(imghdr.what(None, content))
13 |     except:
14 |         return False
15 | 
16 | 
17 | iclass = "PythonGrammar"
18 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/LogicalExpression.bnf:
--------------------------------------------------------------------------------
 1 | //Logical Expression
 2 | 
 3 | S ::= Expression
 4 | Expression ::= identifier | op Expression cp | OperatorExpression 
 5 | OperatorExpression ::= not Expression | identifier RestExpression 
 6 | RestExpression ::= and Expression | or Expression
 7 | op := String,(
 8 | cp := String,)
 9 | and := String,&&
10 | or := String,||
11 | not := String,!
12 | identifier := Word,TrueFalse,min,1
13 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/MimeType.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | #copyright (c) 2008-2013 Nestor Arocha
  5 | 
  6 | """Mime Type recognizer"""
  7 | 
  8 | _mimelist = ["applicaiton/x-bytecode.python",
  9 | "application/acad",
 10 | "application/arj",
 11 | "application/base64",
 12 | "application/binhex",
 13 | "application/binhex4",
 14 | "application/book",
 15 | "application/cdf",
 16 | "application/clariscad",
 17 | "application/commonground",
 18 | "application/drafting",
 19 | "application/dsptype",
 20 | "application/dxf",
 21 | "application/envoy",
 22 | "application/excel",
 23 | "application/fractals",
 24 | "application/freeloader",
 25 | "application/futuresplash",
 26 | "application/gnutar",
 27 | "application/groupwise",
 28 | "application/hlp",
 29 | "application/hta",
 30 | "application/i-deas",
 31 | "application/iges",
 32 | "application/inf",
 33 | "application/java",
 34 | "application/java-byte-code",
 35 | "application/lha",
 36 | "application/lzx",
 37 | "application/mac-binary",
 38 | "application/macbinary",
 39 | "application/mac-binhex",
 40 | "application/mac-binhex40",
 41 | "application/mac-compactpro",
 42 | "application/marc",
 43 | "application/mbedlet",
 44 | "application/mcad",
 45 | "application/mime",
 46 | "application/mspowerpoint",
 47 | "application/msword",
 48 | "application/mswrite",
 49 | "application/netmc",
 50 | "application/octet-stream",
 51 | "application/oda",
 52 | "application/pdf",
 53 | "application/pkcs10",
 54 | "application/pkcs-12",
 55 | "application/pkcs7-mime",
 56 | "application/pkcs7-signature",
 57 | "application/pkcs-crl",
 58 | "application/pkix-cert",
 59 | "application/pkix-crl",
 60 | "application/plain",
 61 | "application/postscript",
 62 | "application/powerpoint",
 63 | "application/pro_eng",
 64 | "application/ringing-tones",
 65 | "application/rtf",
 66 | "application/sdp",
 67 | "application/sea",
 68 | "application/set",
 69 | "application/sla",
 70 | "application/smil",
 71 | "application/solids",
 72 | "application/sounder",
 73 | "application/step",
 74 | "application/streamingmedia",
 75 | "application/toolbook",
 76 | "application/vda",
 77 | "application/vnd.fdf",
 78 | "application/vnd.hp-hpgl",
 79 | "application/vnd.hp-pcl",
 80 | "application/vnd.ms-excel",
 81 | "application/vnd.ms-pki.certstore",
 82 | "application/vnd.ms-pki.pko",
 83 | "application/vnd.ms-pki.seccat",
 84 | "application/vnd.ms-pki.stl",
 85 | "application/vnd.ms-powerpoint",
 86 | "application/vnd.ms-project",
 87 | "application/vnd.nokia.configuration-message",
 88 | "application/vnd.nokia.ringing-tone",
 89 | "application/vnd.rn-realmedia",
 90 | "application/vnd.rn-realplayer",
 91 | "application/vnd.wap.wmlc",
 92 | "application/vnd.wap.wmlscriptc",
 93 | "application/vnd.xara",
 94 | "application/vocaltec-media-desc",
 95 | "application/vocaltec-media-file",
 96 | "application/wordperfect",
 97 | "application/wordperfect6.0",
 98 | "application/wordperfect6.1",
 99 | "application/x-123",
100 | "application/x-aim",
101 | "application/x-authorware-bin",
102 | "application/x-authorware-map",
103 | "application/x-authorware-seg",
104 | "application/x-bcpio",
105 | "application/x-binary",
106 | "application/x-binhex40",
107 | "application/x-bsh",
108 | "application/x-bytecode.elisp (compiled elisp)",
109 | "application/x-bzip",
110 | "application/x-bzip2",
111 | "application/x-cdf",
112 | "application/x-cdlink",
113 | "application/x-chat",
114 | "application/x-cmu-raster",
115 | "application/x-cocoa",
116 | "application/x-compactpro",
117 | "application/x-compress",
118 | "application/x-compressed",
119 | "application/x-conference",
120 | "application/x-cpio",
121 | "application/x-cpt",
122 | "application/x-csh",
123 | "application/x-deepv",
124 | "application/x-director",
125 | "application/x-dvi",
126 | "application/x-elc",
127 | "application/x-envoy",
128 | "application/x-esrehber",
129 | "application/x-excel",
130 | "application/x-frame",
131 | "application/x-freelance",
132 | "application/x-gsp",
133 | "application/x-gss",
134 | "application/x-gtar",
135 | "application/x-gzip",
136 | "application/x-hdf",
137 | "application/x-helpfile",
138 | "application/x-httpd-imap",
139 | "application/x-ima",
140 | "application/x-internett-signup",
141 | "application/x-inventor",
142 | "application/x-ip2",
143 | "application/x-java-class",
144 | "application/x-java-commerce",
145 | "application/x-javascript",
146 | "application/x-koan",
147 | "application/x-ksh",
148 | "application/x-latex",
149 | "application/x-lha",
150 | "application/x-lisp",
151 | "application/x-livescreen",
152 | "application/x-lotus",
153 | "application/x-lotusscreencam",
154 | "application/x-lzh",
155 | "application/x-lzx",
156 | "application/x-macbinary",
157 | "application/x-mac-binhex40",
158 | "application/x-magic-cap-package-1.0",
159 | "application/x-mathcad",
160 | "application/x-meme",
161 | "application/x-midi",
162 | "application/x-mif",
163 | "application/x-mix-transfer",
164 | "application/xml",
165 | "application/x-mplayer2",
166 | "application/x-msexcel",
167 | "application/x-mspowerpoint",
168 | "application/x-navi-animation",
169 | "application/x-navidoc",
170 | "application/x-navimap",
171 | "application/x-navistyle",
172 | "application/x-netcdf",
173 | "application/x-newton-compatible-pkg",
174 | "application/x-nokia-9000-communicator-add-on-software",
175 | "application/x-omc",
176 | "application/x-omcdatamaker",
177 | "application/x-omcregerator",
178 | "application/x-pagemaker",
179 | "application/x-pcl",
180 | "application/x-pixclscript",
181 | "application/x-pkcs10",
182 | "application/x-pkcs12",
183 | "application/x-pkcs7-certificates",
184 | "application/x-pkcs7-certreqresp",
185 | "application/x-pkcs7-mime",
186 | "application/x-pkcs7-signature",
187 | "application/x-pointplus",
188 | "application/x-portable-anymap",
189 | "application/x-project",
190 | "application/x-qpro",
191 | "application/x-rtf",
192 | "application/x-sdp",
193 | "application/x-sea",
194 | "application/x-seelogo",
195 | "application/x-sh",
196 | "application/x-shar",
197 | "application/x-shockwave-flash",
198 | "application/x-sit",
199 | "application/x-sprite",
200 | "application/x-stuffit",
201 | "application/x-sv4cpio",
202 | "application/x-sv4crc",
203 | "application/x-tar",
204 | "application/x-tbook",
205 | "application/x-tcl",
206 | "application/x-tex",
207 | "application/x-texinfo",
208 | "application/x-troff",
209 | "application/x-troff-man",
210 | "application/x-troff-me",
211 | "application/x-troff-ms",
212 | "application/x-troff-msvideo",
213 | "application/x-ustar",
214 | "application/x-visio",
215 | "application/x-vnd.audioexplosion.mzz",
216 | "application/x-vnd.ls-xpix",
217 | "application/x-vrml",
218 | "application/x-wais-source",
219 | "application/x-winhelp",
220 | "application/x-wintalk",
221 | "application/x-world",
222 | "application/x-wpwin",
223 | "application/x-wri",
224 | "application/x-x509-ca-cert",
225 | "application/x-x509-user-cert",
226 | "application/x-zip-compressed",
227 | "application/zip",
228 | "audio/aiff",
229 | "audio/basic",
230 | "audio/it",
231 | "audio/make",
232 | "audio/make.my.funk",
233 | "audio/mid",
234 | "audio/midi",
235 | "audio/mod",
236 | "audio/mpeg",
237 | "audio/mpeg3",
238 | "audio/nspaudio",
239 | "audio/s3m",
240 | "audio/tsp-audio",
241 | "audio/tsplayer",
242 | "audio/vnd.qcelp",
243 | "audio/voc",
244 | "audio/voxware",
245 | "audio/wav",
246 | "audio/x-adpcm",
247 | "audio/x-aiff",
248 | "audio/x-au",
249 | "audio/x-gsm",
250 | "audio/x-jam",
251 | "audio/x-liveaudio",
252 | "audio/xm",
253 | "audio/x-mid",
254 | "audio/x-midi",
255 | "audio/x-mod",
256 | "audio/x-mpeg",
257 | "audio/x-mpeg-3",
258 | "audio/x-mpequrl",
259 | "audio/x-nspaudio",
260 | "audio/x-pn-realaudio",
261 | "audio/x-pn-realaudio-plugin",
262 | "audio/x-psid",
263 | "audio/x-realaudio",
264 | "audio/x-twinvq",
265 | "audio/x-twinvq-plugin",
266 | "audio/x-vnd.audioexplosion.mjuicemediafile",
267 | "audio/x-voc",
268 | "audio/x-wav",
269 | "chemical/x-pdb",
270 | "drawing/x-dwf (old)",
271 | "image/bmp",
272 | "image/cmu-raster",
273 | "image/fif",
274 | "image/florian",
275 | "image/g3fax",
276 | "image/gif",
277 | "image/ief",
278 | "image/jpeg",
279 | "image/jutvision",
280 | "image/naplps",
281 | "image/pict",
282 | "image/pjpeg",
283 | "image/png",
284 | "image/tiff",
285 | "image/vasa",
286 | "image/vnd.dwg",
287 | "image/vnd.fpx",
288 | "image/vnd.net-fpx",
289 | "image/vnd.rn-realflash",
290 | "image/vnd.rn-realpix",
291 | "image/vnd.wap.wbmp",
292 | "image/vnd.xiff",
293 | "image/xbm",
294 | "image/x-cmu-raster",
295 | "image/x-dwg",
296 | "image/x-icon",
297 | "image/x-jg",
298 | "image/x-jps",
299 | "image/x-niff",
300 | "image/x-pcx",
301 | "image/x-pict",
302 | "image/xpm",
303 | "image/x-portable-anymap",
304 | "image/x-portable-bitmap",
305 | "image/x-portable-graymap",
306 | "image/x-portable-greymap",
307 | "image/x-portable-pixmap",
308 | "image/x-quicktime",
309 | "image/x-rgb",
310 | "image/x-tiff",
311 | "image/x-windows-bmp",
312 | "image/x-xbitmap",
313 | "image/x-xbm",
314 | "image/x-xpixmap",
315 | "image/x-xwd",
316 | "image/x-xwindowdump",
317 | "i-world/i-vrml",
318 | "message/rfc822",
319 | "model/iges",
320 | "model/vnd.dwf",
321 | "model/vrml",
322 | "model/x-pov",
323 | "multipart/x-gzip",
324 | "multipart/x-ustar",
325 | "multipart/x-zip",
326 | "music/crescendo",
327 | "music/x-karaoke",
328 | "paleovu/x-pv",
329 | "text/asp",
330 | "text/css",
331 | "text/html",
332 | "text/mcf",
333 | "text/pascal",
334 | "text/plain",
335 | "text/richtext",
336 | "text/scriplet",
337 | "text/sgml",
338 | "text/tab-separated-values",
339 | "text/uri-list",
340 | "text/vnd.abc",
341 | "text/vnd.fmi.flexstor",
342 | "text/vnd.rn-realtext",
343 | "text/vnd.wap.wml",
344 | "text/vnd.wap.wmlscript",
345 | "text/webviewhtml",
346 | "text/x-asm",
347 | "text/x-audiosoft-intra",
348 | "text/x-c",
349 | "text/x-component",
350 | "text/x-fortran",
351 | "text/x-h",
352 | "text/x-java-source",
353 | "text/x-la-asf",
354 | "text/x-m",
355 | "text/xml",
356 | "text/x-pascal",
357 | "text/x-script",
358 | "text/x-script.csh",
359 | "text/x-script.elisp",
360 | "text/x-script.guile",
361 | "text/x-script.ksh",
362 | "text/x-script.lisp",
363 | "text/x-script.perl",
364 | "text/x-script.perl-module",
365 | "text/x-script.phyton",
366 | "text/x-script.rexx",
367 | "text/x-script.scheme",
368 | "text/x-script.sh",
369 | "text/x-script.tcl",
370 | "text/x-script.tcsh",
371 | "text/x-script.zsh",
372 | "text/x-server-parsed-html",
373 | "text/x-setext",
374 | "text/x-sgml",
375 | "text/x-speech",
376 | "text/x-uil",
377 | "text/x-uuencode",
378 | "text/x-vcalendar",
379 | "video/animaflex",
380 | "video/avi",
381 | "video/avs-video",
382 | "video/dl",
383 | "video/fli",
384 | "video/gl",
385 | "video/mpeg",
386 | "video/msvideo",
387 | "video/quicktime",
388 | "video/vdo",
389 | "video/vivo",
390 | "video/vnd.rn-realvideo",
391 | "video/vnd.vivo",
392 | "video/vosaic",
393 | "video/x-amt-demorun",
394 | "video/x-amt-showrun",
395 | "video/x-atomic3d-feature",
396 | "video/x-dl",
397 | "video/x-dv",
398 | "video/x-fli",
399 | "video/x-gl",
400 | "video/x-isvideo",
401 | "video/x-motion-jpeg",
402 | "video/x-mpeg",
403 | "video/x-mpeq2a",
404 | "video/x-ms-asf",
405 | "video/x-ms-asf-plugin",
406 | "video/x-msvideo",
407 | "video/x-qtc",
408 | "video/x-scm",
409 | "video/x-sgi-movie",
410 | "windows/metafile",
411 | "www/mime",
412 | "x-conference/x-cooltalk",
413 | "xgl/drawing",
414 | "xgl/movie",
415 | "x-music/x-midi",
416 | "x-world/x-3dmf",
417 | "x-world/x-svr",
418 | "x-world/x-vrml",
419 | "x-world/x-vrt"]
420 | 
421 | 
422 | def matchFun(input):
423 |     content = str(input)
424 |     return content in _mimelist
425 | 
426 | iclass = "PythonGrammar"
427 | 
428 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/SpanishID.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | #copyright (c) 2008-2013 Nestor Arocha
 5 | 
 6 | """spanish id number grammar"""
 7 | 
 8 | def matchFun(inputstr):
 9 |     dni = str(inputstr)
10 |     if len(dni) != 9:
11 |         return False
12 |     string = 'TRWAGMYFPDXBNJZSQVHLCKE'
13 |     resto = int(dni[:8]) % 23
14 |     if dni[-1].lower() == string[resto].lower():
15 |         return True
16 |     return False
17 | 
18 | def propFun(inputstr, propertyname):
19 |     dni = inputstr
20 |     if propertyname == "number":
21 |         return dni[:8]
22 |     elif propertyname == "letter":
23 |         return dni[:-1]
24 |     else:
25 |         return False
26 | 
27 | iclass = "PythonGrammar"
28 | 
29 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/TrueFalse.bnf:
--------------------------------------------------------------------------------
1 | S ::= true | false
2 | true := String,True
3 | false := String,False
4 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/TrueHTMLTable.bnf:
--------------------------------------------------------------------------------
 1 | //HTML table Grammar
 2 | 
 3 | S ::= btable Tablecontent etable
 4 | Tablecontent ::= Trblock Tablecontent | Null
 5 | Trblock ::= btr Trcontent etr 
 6 | Trcontent ::= Tdblock Trcontent | Null
 7 | Tdblock ::= btd identifier etd 
 8 | btable := String,<table>
 9 | etable := String,</table>
10 | btr := String,<tr>
11 | etr := String,</tr>
12 | btd := String,<td>
13 | etd := String,</td>
14 | identifier := Word,integer,max,1
15 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/pydsl/contrib/grammar/__init__.py


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/calc_ply.py:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | # calc.py
  3 | #
  4 | # A simple calculator with variables -- all in one file.
  5 | # -----------------------------------------------------------------------------
  6 | 
  7 | from pydsl.exceptions import ParseError
  8 | 
  9 | tokens = (
 10 |     'NAME','NUMBER',
 11 |     'PLUS','MINUS','TIMES','DIVIDE','EQUALS',
 12 |     'LPAREN','RPAREN',
 13 |     )
 14 | 
 15 | # Tokens
 16 | 
 17 | t_PLUS    = r'\+'
 18 | t_MINUS   = r'-'
 19 | t_TIMES   = r'\*'
 20 | t_DIVIDE  = r'/'
 21 | t_EQUALS  = r'='
 22 | t_LPAREN  = r'\('
 23 | t_RPAREN  = r'\)'
 24 | t_NAME    = r'[a-zA-Z_][a-zA-Z0-9_]*'
 25 | 
 26 | def t_NUMBER(t):
 27 |     r'\d+'
 28 |     try:
 29 |         t.value = int(t.value)
 30 |     except ValueError:
 31 |         print("Integer value too large %d", t.value)
 32 |         t.value = 0
 33 |     return t
 34 | 
 35 | # Ignored characters
 36 | t_ignore = " \t"
 37 | 
 38 | def t_newline(t):
 39 |     r'\n+'
 40 |     t.lexer.lineno += t.value.count("\n")
 41 | 
 42 | def t_error(t):
 43 |     raise ParseError("unknown character", t.lexpos)
 44 |     print("Illegal character '%s'" % t.value[0])
 45 |     t.lexer.skip(1)
 46 | 
 47 | # Parsing rules
 48 | 
 49 | precedence = (
 50 |     ('left','PLUS','MINUS'),
 51 |     ('left','TIMES','DIVIDE'),
 52 |     ('right','UMINUS'),
 53 |     )
 54 | 
 55 | # dictionary of names
 56 | names = { }
 57 | 
 58 | def p_statement_assign(t):
 59 |     'statement : NAME EQUALS expression'
 60 |     names[t[1]] = t[3]
 61 | 
 62 | def p_statement_expr(t):
 63 |     'statement : expression'
 64 |     t[0] = t[1]
 65 | 
 66 | def p_expression_binop(t):
 67 |     '''expression : expression PLUS expression
 68 |                   | expression MINUS expression
 69 |                   | expression TIMES expression
 70 |                   | expression DIVIDE expression'''
 71 |     if t[2] == '+'  : t[0] = t[1] + t[3]
 72 |     elif t[2] == '-': t[0] = t[1] - t[3]
 73 |     elif t[2] == '*': t[0] = t[1] * t[3]
 74 |     elif t[2] == '/': t[0] = t[1] / t[3]
 75 | 
 76 | def p_expression_uminus(t):
 77 |     'expression : MINUS expression %prec UMINUS'
 78 |     t[0] = -t[2]
 79 | 
 80 | def p_expression_group(t):
 81 |     'expression : LPAREN expression RPAREN'
 82 |     t[0] = t[2]
 83 | 
 84 | def p_expression_number(t):
 85 |     'expression : NUMBER'
 86 |     t[0] = t[1]
 87 | 
 88 | def p_expression_name(t):
 89 |     'expression : NAME'
 90 |     try:
 91 |         t[0] = names[t[1]]
 92 |     except LookupError:
 93 |         raise ParseError("Undefined name",0)
 94 |         t[0] = 0
 95 | 
 96 | def p_error(t):
 97 |     raise ParseError("Syntax error at", t.value)
 98 | 
 99 | iclass="PLY"
100 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/cstring.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | #Copyright (C) 2008-2013 Nestor Arocha
 5 | 
 6 | """Any string """
 7 | 
 8 | iclass="PythonGrammar"
 9 | def matchFun(inputstr):
10 |     try:
11 |         str(inputstr)
12 |     except UnicodeDecodeError:
13 |         return False
14 |     return True
15 | 
16 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/example_ply.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | """Calculate the molecular weight given a molecular formula
  4 | 
  5 | Parse the formula using PLY.
  6 | 
  7 | See http://www.dalkescientific.com/writings/NBN/parsing_with_ply.html
  8 | """
  9 | # ply_mw.py
 10 | 
 11 | from ply.lex import TOKEN
 12 | from pydsl.exceptions import ParseError
 13 | 
 14 | 
 15 | ### Define the lexer
 16 | 
 17 | tokens = (
 18 |     "ATOM",
 19 |     "DIGITS",
 20 | )
 21 | 
 22 | mw_table = {
 23 |     'H': 1.00794,
 24 |     'C': 12.001,
 25 |     'Cl': 35.453,
 26 |     'O': 15.999,
 27 |     'S': 32.06,
 28 | }
 29 | 
 30 | 
 31 | # I don't want to duplicate the atom names so extract the
 32 | # keys to make the lexer pattern.
 33 | 
 34 | # Sort order is:
 35 | #   - alphabetically on first character, to make it easier
 36 | # for a human to look at and debug any problems
 37 | # 
 38 | #   - then by the length of the symbol; two letters before 1
 39 | # Needed because Python's regular expression matcher
 40 | # uses "first match" not "longest match" rules.
 41 | # For example, "C|Cl" matches only the "C" in "Cl"
 42 | # The "-" in "-len(symbol)" is a trick to reverse the sort order.
 43 | #
 44 | #   - then by the full symbol, to make it easier for people
 45 | 
 46 | # (This is more complicated than needed; it's to show how
 47 | # this approach can scale to all 100+ known and named elements)
 48 | 
 49 | atom_names = sorted(
 50 |     mw_table.keys(),
 51 |     key = lambda symbol: (symbol[0], -len(symbol), symbol))
 52 | 
 53 | # Creates a pattern like:  Cl|C|H|O|S
 54 | atom_pattern = "|".join(atom_names)
 55 | 
 56 | # Use a relatively new PLY feature to set the __doc__
 57 | # string based on a Python variable.
 58 | @TOKEN(atom_pattern)
 59 | def t_ATOM(t):
 60 |     t.value = mw_table[t.value]
 61 |     return t
 62 | 
 63 | def t_DIGITS(t):
 64 |     r"\d+"
 65 |     t.value = int(t.value)
 66 |     return t
 67 | 
 68 | def t_error(t):
 69 |     raise ParseError("unknown character", t.lexpos)
 70 | 
 71 | 
 72 | ## Here's an example of using the lexer
 73 | 
 74 | # data = "H2SO4"
 75 | # 
 76 | # lex.input(data)
 77 | # 
 78 | # for tok in iter(lex.token, None):
 79 | #     print tok
 80 | 
 81 | ##### Define the grammar
 82 | 
 83 | # The molecular weight of "" is 0.0
 84 | def p_mw_empty(p):
 85 |     "mw : "
 86 |     p[0] = 0.0
 87 | 
 88 | def p_mw_formula(p):
 89 |     "mw : formula"
 90 |     p[0] = p[1]
 91 |     
 92 | 
 93 | def p_first_species_term(p):
 94 |     "formula : species"
 95 |     p[0] = p[1]
 96 | 
 97 | def p_species_list(p):
 98 |     "formula : formula species"
 99 |     p[0] = p[1] + p[2]
100 | 
101 | def p_species(p):
102 |     "species : ATOM DIGITS"
103 |     p[0] = p[1] * p[2]
104 | 
105 | def p_species_default(p):
106 |     "species : ATOM"
107 |     p[0] = p[1]
108 | 
109 | def p_error(p):
110 |     raise ParseError("unexpected character", p.lexpos)
111 | 
112 | iclass="PLY"
113 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/integerop.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | #Copyright (C) 2008-2013 Nestor Arocha
 5 | 
 6 | 
 7 | def propFun(input, property):
 8 |     if property == "Operator":
 9 |         return input[1]
10 | 
11 | def matchFun(myinput, auxgrammardict):
12 |     myinput = str(myinput)
13 |     validoperators = ["+", "-", "*", "/"]
14 |     operatorexists = False
15 |     currentoperator = None
16 |     for operator in validoperators:
17 |         if operator in myinput:
18 |             operatorexists = True
19 |             currentoperator = operator
20 |             break
21 |     if not operatorexists:
22 |         return False
23 |     parts = myinput.split(currentoperator)
24 |     if len(parts) != 2:
25 |         return False
26 |     for part in parts:
27 |         if not auxgrammardict["integer"].check(part):
28 |             return False
29 |     return True
30 | 
31 | auxdic = {"integer":"integer"}
32 | iclass = "PythonGrammar"
33 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/logline.bnf:
--------------------------------------------------------------------------------
 1 | S ::= ip space hyphen space hyphen space Datetime space quotation Command quotation space int space int space quotation Referer quotation space quotation Useragent quotation
 2 | Datetime ::= opensquare DayOfMonth slash Month slash Year colon Hour colon Minute colon Second space Plusminus int closesquare
 3 | Command ::= get | post | hyphen
 4 | Plusminus ::= plus | hyphen
 5 | Referer ::= string
 6 | Useragent ::= string
 7 | DayOfMonth ::= int
 8 | Month ::= int
 9 | Year ::= int
10 | Hour ::= int
11 | Minute ::= int
12 | Second ::= int
13 | 
14 | space := Word,space,max
15 | plus := String,+
16 | quotation := String,"
17 | colon := String,:
18 | hyphen := String,-
19 | get := String,GET
20 | slash := String,/
21 | post := String,POST
22 | int := Word,integer,max
23 | ip := Word,ipv4,max
24 | opensquare := String,[
25 | closesquare := String,]
26 | string := Word,characters,max
27 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/mongoquery.bnf:
--------------------------------------------------------------------------------
 1 | S :: Query
 2 | Query : { identifier : subexpression } | { $or : querylist }
 3 | querylist: [ Query *(,Query) ]
 4 | valuelist: [ value *(,value) ]
 5 | sublist: [ subexpression *(,subexpression) ]
 6 | subexpression: inexpression |  orexpression | ninexpression | elemMatchexpression | modexpression | existsexpression | typeexpression | query | value
 7 | existsexpression: { $exists: bool } 
 8 | inexpression: { $in: valuelist}
 9 | ninexpression: { $nin: valuelist }
10 | orexpression: { $or: sublist}
11 | elemMatchexpression: { $elemMatch : { identifier:value * (, identifier:value) } }
12 | modexpression: { $mod: [int, int] }
13 | typeexpression: { $type: int }
14 | bool: true | false
15 | value: str | int | float | bool | / regexp /
16 | 


--------------------------------------------------------------------------------
/pydsl/contrib/grammar/protocol.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | #copyright (c) 2008-2011 Nestor Arocha
 5 | 
 6 | """Protocols"""
 7 | 
 8 | def matchFun(inputd):
 9 |     inputs = str(inputd)
10 |     return inputs.find("://") != -1
11 | 
12 | def propFun(inputd, propertyname):
13 |     inputs = str(inputd)
14 |     protocol, rest = inputs.split("://")
15 |     if propertyname == "protocol":
16 |         return protocol
17 |     if "?" in rest:
18 |         path, options = rest.split("?")
19 |         if propertyname == "path":
20 |             return path
21 |         elif propertyname == "options":
22 |             return options
23 |     else:
24 |         if propertyname == "path":
25 |             return rest
26 | 
27 | iclass = "PythonGrammar"
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/pydsl/contrib/mongogrammar.py:
--------------------------------------------------------------------------------
1 | spec = {"a":1,"b":2}
2 | fullspec = {"a":{"$type":"integer"},"b":{"$type":"integer"}}
3 | 


--------------------------------------------------------------------------------
/pydsl/contrib/regexps.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | res={
 4 |         "australian_phone":{"regexp":"^(\+\d{2}[ \-]{0,1}){0,1}(((\({0,1}[ \-]{0,1})0{0,1}\){0,1}[2|3|7|8]{1}\){0,1}[ \-]*(\d{4}[ \-]{0,1}\d{4}))|(1[ \-]{0,1}(300|800|900|902)[ \-]{0,1}((\d{6})|(\d{3}[ \-]{0,1}\d{3})))|(13[ \-]{0,1}([\d \-]{5})|((\({0,1}[ \-]{0,1})0{0,1}\){0,1}4{1}[\d \-]{8,10})))$"},
 5 |         "australian_postcode":{"regexp":"(0[289][0-9]{2})|([1345689][0-9]{3})|(2[0-8][0-9]{2})|(290[0-9])|(291[0-4])|(7[0-4][0-9]{2})|(7[8-9][0-9]{2})"},
 6 |         "austrian_vat":{"regexp":"^(AT){0,1}[U]{0,1}[0-9]{8}$"},
 7 |         "belgian_vat":{"regexp":"^(BE)[0-1]{1}[0-9]{9}$|^((BE)|(BE ))[0-1]{1}(\d{3})([.]{1})(\d{3})([.]{1})(\d{3})"},
 8 |         "bic":{"regexp":"^([a-zA-Z]){4}(AF|AX|AL|DZ|AS|AD|AO|AI|AQ|AG|AR|AM|AW|AU|AZ|BS|BH|BD|BB|BY|BE|BZ|BJ|BM|BT|BO|BA|BW|BV|BR|IO|BN|BG|BF|BI|KH|CM|CA|CV|KY|CF|TD|CL|CN|CX|CC|CO|KM|CG|CD|CK|CR|CI|HR|CU|CY|CZ|DK|DJ|DM|DO|EC|EG|SV|GQ|ER|EE|ET|FK|FO|FJ|FI|FR|GF|PF|TF|GA|GM|GE|DE|GH|GI|GR|GL|GD|GP|GU|GT|GG|GN|GW|GY|HT|HM|VA|HN|HK|HU|IS|IN|ID|IR|IQ|IE|IM|IL|IT|JM|JP|JE|JO|KZ|KE|KI|KP|KR|KW|KG|LA|LV|LB|LS|LR|LY|LI|LT|LU|MO|MK|MG|MW|MY|MV|ML|MT|MH|MQ|MR|MU|YT|MX|FM|MD|MC|MC|MN|ME|MS|MA|MZ|MM|MA|NR|NP|NL|AN|NC|NZ|NI|NE|NG|NU|NF|MP|NO|OM|PK|PW|PS|PA|PG|PY|PE|PH|PN|PL|PT|PR|QA|RE|RO|RU|RW|SH|KN|LC|PM|VC|WS|SM|ST|SA|SN|RS|SC|SL|SG|SK|SI|SB|SO|ZA|GS|ES|LK|SD|SR|SJ|SZ|SE|CH|SY|TW|TJ|TZ|TH|TL|TG|TK|TO|TT|TN|TR|TM|TC|TV|UG|UA|AE|GB|US|UM|UY|UZ|VU|VE|VN|VG|VI|WF|EH|YE|ZM|ZW)([0-9a-zA-Z]){2}([0-9a-zA-Z]{3})$"},
 9 |         "binary":{"regexp":"^[01]*$"},
10 |         "brainfuck":{"regexp":"^(-|<|>|\.|,|\+|\[|\])+$"},
11 |         "bulgarian_vat":{"regexp":"^(BG){0,1}([0-9]{9}|[0-9]{10})$"},
12 |         "camelcase":{"regexp":"^[A-Z][a-z]+([A-Z][a-z]+)+$"},
13 |         "canadian_postcode":{"regexp":"^[ABCEGHJKLMNPRSTVXYabceghjklmnprstvxy]{1}\d{1}[A-Za-z]{1}\d{1}[A-Za-z]{1}\d{1}$"},
14 |         "characters":{"regexp":"^[A-z]+$"},
15 |         "color_code":{"regexp":"^#(\d{6})|^#([A-F]{6})|^#([A-F]|[0-9]){6}"},
16 |         "coordinate":{"regexp":"^\d{1,2}(\.\d*)?[NS] 1?\d{1,2}(\.\d*)?[EW]$"},
17 |         "credit_card":{"regexp":"^(\d{4}-){3}\d{4}$|^(\d{4} ){3}\d{4}$|^\d{16}$"},
18 |         "dms_coordinate":{"regexp":"[0-9]{1,2}[:|°][0-9]{1,2}[:|'](?:\b[0-9]+(?:\.[0-9]*)?|\.[0-9]+\b)\"?[N|S|E|W]"},
19 |         "dutch_postcode":{"regexp":"^[1-9]{1}[0-9]{3}\s?[a-zA-Z]{2}$"},
20 |         "email":{"regexp":"^(?P<user>[A-Z0-9._%+-]+)@(?P<domain>[A-Z0-9.-]+\.[A-Z]{2,4})$","flags":"i"},
21 |         "FileFilter":{"regexp":"^([A-z]|[*?.])+$"},
22 |         "float":{"regexp":"^[123456789][01234567890]*\.[0123456789]*$"},
23 |         "fqdn":{"regexp":"^(?=^.{1,254}$)(^(?:(?!\.|-)([a-z0-9\-\*]{1,63}|([a-z0-9\-]{1,62}[a-z0-9]))\.)+(?:[a-z]{2,})$)$"},
24 |         "german_postcode":{"regexp":"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$"},
25 |         "hex":{"regexp":"^[0-9a-fA-F]*$"},
26 |         "Identifier":{"regexp":"^[A-Za-z][_A-Za-z0-9]*$"},
27 |         "indian_mobile_2":{"regexp":"^((\+){0,1}91(\s){0,1}(\-){0,1}(\s){0,1}){0,1}9[0-9](\s){0,1}(\-){0,1}(\s){0,1}[1-9]{1}[0-9]{7}$"},
28 |         "indian_mobile":{"regexp":"^[89][0-9]{9}"},
29 |         "indian_postcode":{"regexp":"^[1-9]{3}\s{0,1}[0-9]{3}$"},
30 |         "integer":{"regexp":"^[0123456789]*$"},
31 |         "ipv4_2":{"regexp":"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"},
32 |         "ipv4":{"regexp":"^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"},
33 |         "ipv6":{"regexp":"^((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))$"},
34 |         "isbn":{"regexp":"^((978[\--– ])?[0-9][0-9\--– ]{10}[\--– ][0-9xX])|((978)?[0-9]{9}[0-9Xx])$"},
35 |         "iso_8601":{"regexp":"^(?<Date>(?<Year>\d{4})-(?<Month>\d{2})-(?<Day>\d{2}))(?:T(?<Time>(?<SimpleTime>(?<Hour>\d{2}):(?<Minute>\d{2})(?::(?<Second>\d{2}))?)?(?:\.(?<FractionalSecond>\d{1,7}))?(?<Offset>-\d{2}\:\d{2})?))?$"},
36 |         "israel_mobile":{"regexp":"^\+?972(\-)?0?[23489]{1}(\-)?[^0\D]{1}\d{6}$"},
37 |         "italian_fiscal_code":{"regexp":"^[A-Za-z]{6}[0-9LMNPQRSTUV]{2}[A-Za-z]{1}[0-9LMNPQRSTUV]{2}[A-Za-z]{1}[0-9LMNPQRSTUV]{3}[A -Za-z]{1}$"},
38 |         "mac_address":{"regexp":"^([0-9A-F]{2}[:-]){5}([0-9A-F]{2})$"},
39 |         "netherlands_postcode":{"regexp":"^[1-9]{1}[0-9]{3}\s?[A-Z]{2}$"},
40 |         "nginxlog":{"regexp":"^(?P<ip>[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)[^[]+\[([^]]+)\][^/]+([^ ]+).+$"},
41 |         "pakistan_phone":{"regexp":"^((\+92)|(0092))-{0,1}\d{3}-{0,1}\d{7}$|^\d{11}$|^\d{4}-\d{7}$"},
42 |         "passport":{"regexp":"^[A-Z0-9<]{9}[0-9]{1}[A-Z]{3}[0-9]{7}[A-Z]{1}[0-9]{7}[A-Z0-9<]{14}[0-9]{2}$"},
43 |         "polish_landline":{"regexp":"^(\+48\s*)?\d{2}\s*\d{3}(\s*|\-)\d{2}(\s*|\-)\d{2}$"},
44 |         "portuguese_phone":{"regexp":"^((\+351|00351|351)?)(2\d{1}|(9(3|6|2|1)))\d{7}$"},
45 |         "portuguese_postcode":{"regexp":"^[0-9]{4}-[0-9]{3}$"},
46 |         "pythonlogging":{"regexp":"^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2}) (?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2}),(?P<mil>\d*) \[(?P<level>\w*)\s*\] \[(?P<process>[_\.\w]*)\s*\]: (?P<message>.*)$"},
47 |         "saudi_mobile":{"regexp":"05\d{8}"},
48 |         "scientific_notation":{"regexp":"^(-?[1-9](\.\d+)?)((\s?[X*]\s?10[E^]([+-]?\d+))|(E([+-]?\d+)))$"},
49 |         "slovak_postcode":{"regexp":"^(([0-9]{5})|([0-9]{3}[ ]{0,1}[0-9]{2}))$"},
50 |         "slovenian_phone":{"regexp":"^(([0-9]{3})[ \-\/]?([0-9]{3})[ \-\/]?([0-9]{3}))|([0-9]{9})|([\+]?([0-9]{3})[ \-\/]?([0-9]{2})[ \-\/]?([0-9]{3})[ \-\/]?([0-9]{3}))$"},
51 |         "space":{"regexp":"^ $"},
52 |         "swedish_personnumber":{"regexp":"^[0-9]{6}-[0-9pPtTfF][0-9]{3}$"},
53 |         "swiss_phone":{"regexp":"^(\+?)(\d{2,4})(\s?)(\-?)((\(0\))?)(\s?)(\d{2})(\s?)(\-?)(\d{3})(\s?)(\-?)(\d{2})(\s?)(\-?)(\d{2})"},
54 |         "swiss_postcode":{"regexp":"^[1-9][0-9][0-9][0-9]$"},
55 |         "uk_driving_license":{"regexp":"^([A-Z]{2}[9]{3}|[A-Z]{3}[9]{2}|[A-Z]{4}[9]{1}|[A-Z]{5})[0-9]{6}([A-Z]{1}[9]{1}|[A-Z]{2})[A-Z0-9]{3}[0-9]{2}$"},
56 |         "uknin":{"regexp":"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$", "description":"UK national insurance number"},
57 |         "uk_postcode":{"regexp":"^[A-Za-z]{1,2}[\d]{1,2}([A-Za-z])?\s?[\d][A-Za-z]{2}$"},
58 |         "ukranian_phone":{"regexp":"^((8|\+38)-?)?(\(?044\)?)?-?\d{3}-?\d{2}-?\d{2}$"},
59 |         "uk_vat":{"regexp":"^([GB])*(([1-9]\d{8})|([1-9]\d{11}))$"},
60 |         "unixFilename":{"regexp":"^(\d|\w|\ |\.|\*)*$"},
61 |         "uptime_command":{"regexp":"^([0-2][0-9]\:[0-5][0-9]\:[0-5][0-9])\s+up\s+([0-9\:]{1,5})\s*(days|day|min|mins)?(?:\,\s+([0-9\:]{1,5})\s*(days|day|min|mins)?)?\,\s+([0-9]{1,4})\susers?\,\s+load\s+average\:\s+([0-9\.]{1,6})\,\s+([0-9\.]{1,6})\,\s+([0-9\.]{1,6})$"},
62 |         "uri":{"regexp":"^(?P<protocol>[A-Z0-9]+)://(?P<resource>.*)$", "flags":"i"},
63 |         "us_phone":{"regexp":"^(((\(\d{3}\)|\d{3})( |-|\.))|(\(\d{3}\)|\d{3}))?\d{3}( |-|\.)?\d{4}(( |-|\.)?([Ee]xt|[Xx])[.]?( |-|\.)?\d{4})?$"},
64 |         "us_social_security_number":{"regexp":"^((?!000)(?!666)(?:[0-6]\d{2}|7[0-2][0-9]|73[0-3]|7[5-6][0-9]|77[0-2]))-((?!00)\d{2})-((?!0000)\d{4})$"},
65 |         "uuid":{"regexp":"^((?-i:0x)?[A-Fa-f0-9]{32}| [A-Fa-f0-9]{8}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{12}| \{[A-Fa-f0-9]{8}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{12}\})$"},
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/pydsl/contrib/spark/spark_example.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) 1999-2000 John Aycock
 2 | #  
 3 | #  Permission is hereby granted, free of charge, to any person obtaining
 4 | #  a copy of this software and associated documentation files (the
 5 | #  "Software"), to deal in the Software without restriction, including
 6 | #  without limitation the rights to use, copy, modify, merge, publish,
 7 | #  distribute, sublicense, and/or sell copies of the Software, and to
 8 | #  permit persons to whom the Software is furnished to do so, subject to
 9 | #  the following conditions:
10 | #  
11 | #  The above copyright notice and this permission notice shall be
12 | #  included in all copies or substantial portions of the Software.
13 | #  
14 | #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | #  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 | #  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 | #  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 | #  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | from spark_scan_example import scan
23 | from spark_parse_example import parse
24 | 
25 | if __name__ == '__main__':
26 |     import sys
27 |     filename = sys.argv[1]
28 |     f = open(filename)
29 |     parse(scan(f))
30 |     f.close()
31 |     print('ok')
32 | 


--------------------------------------------------------------------------------
/pydsl/contrib/spark/spark_scan_example.py:
--------------------------------------------------------------------------------
 1 | #  Copyright (c) 1999-2000 John Aycock
 2 | #  
 3 | #  Permission is hereby granted, free of charge, to any person obtaining
 4 | #  a copy of this software and associated documentation files (the
 5 | #  "Software"), to deal in the Software without restriction, including
 6 | #  without limitation the rights to use, copy, modify, merge, publish,
 7 | #  distribute, sublicense, and/or sell copies of the Software, and to
 8 | #  permit persons to whom the Software is furnished to do so, subject to
 9 | #  the following conditions:
10 | #  
11 | #  The above copyright notice and this permission notice shall be
12 | #  included in all copies or substantial portions of the Software.
13 | #  
14 | #  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | #  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | #  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 | #  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 | #  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 | #  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 | #  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | #
23 | #  Why would I write my own when GvR maintains this one?
24 | #
25 | import tokenize
26 | 
27 | class Token:
28 |     def __init__(self, type, attr=None, lineno='???'):
29 |         self.type = type
30 |         self.attr = attr
31 |         self.lineno = lineno
32 | 
33 |     def __cmp__(self, o):
34 |         return cmp(self.type, o)
35 |     ###
36 |     def __repr__(self):
37 |         return str(self.type)
38 | 
39 | _map = {
40 |     tokenize.ENDMARKER    : 'ENDMARKER',
41 |     tokenize.NAME         : 'NAME',
42 |     tokenize.NUMBER        : 'NUMBER',
43 |     tokenize.STRING        : 'STRING',
44 |     tokenize.NEWLINE    : 'NEWLINE',
45 |     tokenize.INDENT        : 'INDENT',
46 |     tokenize.DEDENT        : 'DEDENT',
47 | }
48 | 
49 | _rw = {
50 |     'and'        : None,
51 |     'assert'    : None,
52 |     'break'        : None,
53 |     'class'        : None,
54 |     'continue'    : None,
55 |     'def'        : None,
56 |     'del'        : None,
57 |     'elif'        : None,
58 |     'else'        : None,
59 |     'except'    : None,
60 |     'exec'        : None,
61 |     'finally'    : None,
62 |     'for'        : None,
63 |     'from'        : None,
64 |     'global'    : None,
65 |     'if'        : None,
66 |     'import'    : None,
67 |     'in'        : None,
68 |     'is'        : None,
69 |     'lambda'    : None,
70 |     'not'        : None,
71 |     'or'        : None,
72 |     'pass'        : None,
73 |     'print'        : None,
74 |     'raise'        : None,
75 |     'return'    : None,
76 |     'try'        : None,
77 |     'while'        : None,
78 | }
79 | 
80 | def scan(f):
81 |     tokens = []
82 | 
83 |     def callback(value, lexeme, lineno_column, end, line, list=tokens):
84 |         attr = None
85 |         type = lexeme
86 |         lineno, column = lineno_column
87 |         if value in (tokenize.COMMENT, tokenize.NL):
88 |             return
89 |         elif value in _map:
90 |             if value != tokenize.NAME or not lexeme in _rw:
91 |                 attr = lexeme
92 |                 type = _map[value]
93 | 
94 |         t = Token(type, attr=attr, lineno=lineno)
95 |         list.append(t)
96 | 
97 |     [callback(*token) for token in tokenize.generate_tokens(f.readline)]
98 |     return tokens
99 | 


--------------------------------------------------------------------------------
/pydsl/contrib/translator/calculator.py:
--------------------------------------------------------------------------------
 1 | __all__ = ['repository', 'iclass', 'root_rule', 'rules']
 2 | rules = """digit = anything:x ?(x in '0123456789')
 3 | number = <digit+>:ds -> int(ds)
 4 | expr = number:left ( '+' number:right -> left + right
 5 |                    | -> left)"""
 6 | 
 7 | root_rule="expr"
 8 | 
 9 | from pydsl.grammar.PEG import Sequence
10 | repository={'string':Sequence.from_string('fas')}
11 | iclass="parsley"
12 | 
13 | 


--------------------------------------------------------------------------------
/pydsl/contrib/translator/calculator_bnf.py:
--------------------------------------------------------------------------------
 1 | from pydsl.File.BNF import strlist_to_production_set
 2 | from pydsl.Grammar import RegularExpression
 3 | from pydsl.Parser.LL import LL1RecursiveDescentParser
 4 | 
 5 | def tree_translator(tree):
 6 |     from pydsl.Grammar.Symbol import NonTerminalSymbol
 7 |     if tree.symbol == NonTerminalSymbol("E"):
 8 |         return int(str(tree.childlist[0].content)) + int(str(tree.childlist[2].content))
 9 |     elif len(tree.childlist) == 1:
10 |         return tree_translator(tree.childlist[0])
11 |     else:
12 |         raise Exception
13 |             
14 | 
15 | grammar_def = [
16 |         "S ::= E",
17 |         "E ::= number operator number",
18 |         "number := Word,integer,max",
19 |         "operator := String,+",
20 |         ]
21 | repository = {'integer':RegularExpression("^[0123456789]*$")}
22 | production_set = strlist_to_production_set(grammar_def, repository)
23 | rdp = LL1RecursiveDescentParser(production_set)
24 | 
25 | 
26 | def translator(data):
27 |     parse_tree = rdp(data)
28 |     return tree_translator(parse_tree[0])
29 | 
30 | 


--------------------------------------------------------------------------------
/pydsl/contrib/translator/chemicalFormulas.py:
--------------------------------------------------------------------------------
 1 | # chemicalFormulas.py
 2 | #
 3 | # Copyright (c) 2003, 2007, Paul McGuire
 4 | #
 5 | 
 6 | from pyparsing import Word, Optional, OneOrMore, Group, ParseException
 7 | 
 8 | # define a simple Python dict of atomic weights, with chemical symbols
 9 | # for keys
10 | atomicWeight = {
11 |     "O"  : 15.9994,
12 |     "H"  : 1.00794,
13 |     "Na" : 22.9897,
14 |     "Cl" : 35.4527,
15 |     "C"  : 12.0107,
16 |     "S"  : 32.0655,
17 |     }
18 | 
19 | # define some strings to use later, when describing valid lists 
20 | # of characters for chemical symbols and numbers
21 | caps = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
22 | lowers = caps.lower()
23 | digits = "0123456789"
24 | 
25 | # Version 1
26 | # Define grammar for a chemical formula
27 | # - an element is a Word, beginning with one of the characters in caps,
28 | #   followed by zero or more characters in lowers
29 | # - an integer is a Word composed of digits
30 | # - an elementRef is an element, optionally followed by an integer - if 
31 | #   the integer is omitted, assume the value "1" as a default; these are 
32 | #   enclosed in a Group to make it easier to walk the list of parsed 
33 | #   chemical symbols, each with its associated number of atoms per 
34 | #   molecule
35 | # - a chemicalFormula is just one or more elementRef's
36 | element = Word( caps, lowers )
37 | integer = Word( digits )
38 | elementRef = Group( element + Optional( integer, default="1" ) )
39 | chemicalFormula = OneOrMore( elementRef )
40 | 
41 | # Version 2 - Auto-convert integers, and add results names
42 | def convertIntegers(tokens):
43 |     return int(tokens[0])
44 |     
45 | element = Word( caps, lowers )
46 | integer = Word( digits ).setParseAction( convertIntegers )
47 | elementRef = Group( element("symbol") + Optional( integer, default=1 )("qty") )
48 | # pre-1.4.7, use this: 
49 | # elementRef = Group( element.setResultsName("symbol") + Optional( integer, default=1 ).setResultsName("qty") )
50 | chemicalFormula = OneOrMore( elementRef )
51 | 
52 | 
53 | # Version 3 - Compute partial molecular weight per element, simplifying 
54 | # summing
55 | # No need to redefine grammar, just define parse action function, and
56 | # attach to elementRef
57 | def computeElementWeight(tokens):
58 |     element = tokens[0]
59 |     element["weight"] = atomicWeight[element.symbol] * element.qty
60 |     
61 | elementRef.setParseAction(computeElementWeight)
62 | 
63 | root_symbol = chemicalFormula
64 | 
65 | iclass = "pyparsing"
66 | 


--------------------------------------------------------------------------------
/pydsl/contrib/translator/echo.py:
--------------------------------------------------------------------------------
1 | def function(input):
2 |     return input
3 | 
4 | 
5 | iclass = "PythonTranslator"
6 | 


--------------------------------------------------------------------------------
/pydsl/diff.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | __author__ = "Nestor Arocha"
19 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
20 | __email__ = "nesaro@gmail.com"
21 | 
22 | import logging
23 | from pydsl.grammar.PEG import Choice
24 | LOG = logging.getLogger(__name__)
25 | 
26 | def lcs(list1, list2):
27 |     import difflib
28 |     differences = difflib.SequenceMatcher(None, list1, list2)
29 |     return [x for x in differences.get_matching_blocks()]
30 | 
31 | def diff_factory(definition):
32 |     if isinstance(definition, Choice):
33 |         return lcs
34 |     raise ValueError
35 | 
36 | def diff(definition, element1, element2):
37 |     return diff_factory(definition)(element1, element2)
38 | 


--------------------------------------------------------------------------------
/pydsl/encoding.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | #This file is part of pydsl.
 5 | #
 6 | #pydsl is free software: you can redistribute it and/or modify
 7 | #it under the terms of the GNU General Public License as published by
 8 | #the Free Software Foundation, either version 3 of the License, or
 9 | #(at your option) any later version.
10 | #
11 | #pydsl is distributed in the hope that it will be useful,
12 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | #GNU General Public License for more details.
15 | #
16 | #You should have received a copy of the GNU General Public License
17 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | 
20 | __author__ = "Nestor Arocha"
21 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
22 | __email__ = "nesaro@gmail.com"
23 | 
24 | from pydsl.grammar import String
25 | from pydsl.grammar.PEG import Choice
26 | 
27 | ascii_encoding = Choice([String(chr(x)) for x in range(128)], calculate_base_alphabet=False)
28 | 


--------------------------------------------------------------------------------
/pydsl/equal.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | #This file is part of pydsl.
 3 | #
 4 | #pydsl is free software: you can redistribute it and/or modify
 5 | #it under the terms of the GNU General Public License as published by
 6 | #the Free Software Foundation, either version 3 of the License, or
 7 | #(at your option) any later version.
 8 | #
 9 | #pydsl is distributed in the hope that it will be useful,
10 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | #GNU General Public License for more details.
13 | #
14 | #You should have received a copy of the GNU General Public License
15 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | 
18 | __author__ = "Nestor Arocha"
19 | __copyright__ = "Copyright 2008-2020, Nestor Arocha"
20 | __email__ = "n@nestorarocha.com"
21 | 
22 | import logging
23 | from collections import Iterable
24 | from .check import check
25 | from jsonschema import FormatChecker
26 | LOG = logging.getLogger(__name__)
27 | 
28 | def equal(definition, first_element, second_element) -> bool:
29 |     """Compares if the two elements are equal according to the grammar definition"""
30 |     if not check(definition, first_element):
31 |         raise ValueError
32 |     if not check(definition, second_element):
33 |         raise ValueError
34 |     equal_checker = equal_factory(definition)
35 |     return equal_checker(first_element, second_element)
36 | 
37 | 
38 | def equal_factory(definition):
39 |     from pydsl.grammar.definition import String
40 |     if isinstance(definition, String):
41 |         return lambda x,y: x==y
42 | 
43 | 


--------------------------------------------------------------------------------
/pydsl/exceptions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # This file is part of pydsl.
 4 | #
 5 | # pydsl is free software: you can redistribute it and/or modify
 6 | # it under the terms of the GNU General Public License as published by
 7 | # the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | # pydsl is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | 
19 | __author__ = "Nestor Arocha"
20 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
21 | __email__ = "nesaro@gmail.com"
22 | 
23 | 
24 | class ParseError(Exception):
25 | 
26 |     def __init__(self, msg, offset):
27 |         self.msg = msg
28 |         self.offset = offset
29 | 
30 |     def __repr__(self):
31 |         return "ParseError(%r, %r)" % (self.msg, self.offset)
32 | 
33 |     def __str__(self):
34 |         return "%s at position %s" % (self.msg, self.offset + 1)
35 | 


--------------------------------------------------------------------------------
/pydsl/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/pydsl/external/__init__.py


--------------------------------------------------------------------------------
/pydsl/extract.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | #This file is part of pydsl.
  4 | #
  5 | #pydsl is free software: you can redistribute it and/or modify
  6 | #it under the terms of the GNU General Public License as published by
  7 | #the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | #pydsl is distributed in the hope that it will be useful,
 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #GNU General Public License for more details.
 14 | #
 15 | #You should have received a copy of the GNU General Public License
 16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | __author__ = "Nestor Arocha"
 19 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
 20 | __email__ = "nesaro@gmail.com"
 21 | 
 22 | import logging
 23 | LOG = logging.getLogger(__name__)
 24 | from pydsl.check import checker_factory
 25 | from pydsl.lex import lexer_factory
 26 | from pydsl.token import PositionToken, Token
 27 | 
 28 | 
 29 | def filter_subsets(lst):
 30 |     to_remove = set()
 31 |     for i, j, _, _ in lst:
 32 |         for x, y, _, _ in lst:
 33 |             if (x < i and y >= j) or (x <= i and y > j):
 34 |                 to_remove.add((i,j))
 35 |                 break
 36 |     result = list(lst)
 37 | 
 38 |     for element in lst:
 39 |         if (element[0], element[1]) in to_remove:
 40 |             result.remove(element)
 41 |     return result
 42 | 
 43 | 
 44 | def extract_alphabet(alphabet, inputdata, fixed_start = False):
 45 |     """
 46 |     Receives a sequence and an alphabet, 
 47 |     returns a list of PositionTokens with all of the parts of the sequence that 
 48 |     are a subset of the alphabet
 49 |     """
 50 |     if not inputdata:
 51 |         return []
 52 |     base_alphabet = alphabet.alphabet
 53 | 
 54 |     lexer = lexer_factory(alphabet, base_alphabet)
 55 |     totallen = len(inputdata)
 56 |     maxl = totallen
 57 |     minl = 1
 58 |     if fixed_start:
 59 |         max_start = 1
 60 |     else:
 61 |         max_start = totallen
 62 |     result = []
 63 |     for i in range(max_start):
 64 |         for j in range(i+minl, min(i+maxl, totallen) + 1):
 65 |             try:
 66 |                 lexed = lexer(inputdata[i:j])
 67 |                 if lexed and len(lexed) == 1:
 68 |                     result.append((i,j, inputdata[i:j], lexed[0].gd))
 69 |                 elif lexed:
 70 |                     raise Exception
 71 |             except:
 72 |                 continue
 73 |     result = filter_subsets(result)
 74 |     return [PositionToken(content, gd, left, right) for (left, right, content, gd) in result]
 75 | 
 76 | def extract(grammar, inputdata, fixed_start = False, return_first=False):
 77 |     """
 78 |     Receives a sequence and a grammar, 
 79 |     returns a list of PositionTokens with all of the parts of the sequence that 
 80 |     are recognized by the grammar
 81 |     """
 82 |     if not inputdata:
 83 |         return []
 84 |     checker = checker_factory(grammar)
 85 | 
 86 |     totallen = len(inputdata)
 87 |     from pydsl.grammar.PEG import Choice
 88 |     try:
 89 |         maxl = grammar.maxsize or totallen
 90 |     except NotImplementedError:
 91 |         maxl = totallen
 92 |     try:
 93 |         #minl = grammar.minsize #FIXME: It won't work with incompatible alphabets
 94 |         minl = 1
 95 |     except NotImplementedError:
 96 |         minl = 1
 97 |     if fixed_start:
 98 |         max_start = 1
 99 |     else:
100 |         max_start = totallen
101 |     result = []
102 |     for i in range(max_start):
103 |         for j in range(i+minl, min(i+maxl, totallen) + 1):
104 |             slice = inputdata[i:j]
105 |             check = checker.check(slice)
106 |             if check:
107 |                 this_pt = PositionToken(slice, grammar, i, j)
108 |                 if return_first:
109 |                     return this_pt
110 |                 result.append(this_pt)
111 |     return result
112 | 
113 | def search(grammar, inputdata):
114 |     return extract(grammar, inputdata, return_first=True)
115 | 
116 | def match(grammar, inputdata):
117 |     return extract(grammar, inputdata, fixed_start=True, return_first=True)
118 | 
119 | 


--------------------------------------------------------------------------------
/pydsl/file/BNF.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | #This file is part of pydsl.
  4 | #
  5 | #pydsl is free software: you can redistribute it and/or modify
  6 | #it under the terms of the GNU General Public License as published by
  7 | #the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | #pydsl is distributed in the hope that it will be useful,
 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #GNU General Public License for more details.
 14 | #
 15 | #You should have received a copy of the GNU General Public License
 16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | 
 19 | """BNF format functions"""
 20 | 
 21 | __author__ = "Nestor Arocha"
 22 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
 23 | __email__ = "nesaro@gmail.com"
 24 | 
 25 | import logging
 26 | import re
 27 | from pydsl.grammar.symbol import TerminalSymbol, NonTerminalSymbol, NullSymbol
 28 | from pydsl.grammar.BNF import Production
 29 | LOG = logging.getLogger(__name__)
 30 | 
 31 | """ pydsl Grammar definition file parser """
 32 | 
 33 | def __generateStringSymbol(rightside):
 34 |     head, tail = rightside.split(",", 1)
 35 |     if head != "String":
 36 |         raise TypeError
 37 |     content = tail
 38 |     if len(tail) > 2 and tail[1][0] == "'" and tail[1][-1] == "'":
 39 |         content = tail[1][1:-1]
 40 |     from pydsl.grammar.definition import String
 41 |     return TerminalSymbol(String(content))
 42 | 
 43 | def __generateWordSymbol(rightside, repository):
 44 |     args = rightside.split(",")
 45 |     if args[0] != "Word":
 46 |         raise TypeError
 47 |     return TerminalSymbol(repository[args[1]])
 48 | 
 49 | 
 50 | def read_nonterminal_production(line, symboldict):
 51 |     sidesarray = line.split("::=")
 52 |     if len(sidesarray) < 2 or len(sidesarray) > 3:
 53 |         raise ValueError("Error reading nonterminal production rule")
 54 |     leftside = sidesarray[0].strip()
 55 |     #leftside contains at least one NonTerminalSymbol
 56 |     #FIXME supports only one symbol
 57 |     symboldict[leftside] = NonTerminalSymbol(leftside)
 58 |     rightside = sidesarray[1]
 59 |     alternatives = [alt.rstrip() for alt in rightside.split("|")]
 60 |     result = []
 61 |     n = 0
 62 |     for alternative in alternatives:
 63 |         symbollist = alternative.split()
 64 |         symbolinstancelist = []
 65 |         for symbol in symbollist:
 66 |             symbolinstancelist.append(symboldict[symbol])
 67 |         result.append(Production([symboldict[leftside]], symbolinstancelist))
 68 |         n += 1
 69 |     return result
 70 | 
 71 | def read_terminal_production(line, repository):
 72 |     leftside, rightside = line.split(":=")
 73 |     leftside = leftside.strip()
 74 |     symbolnames = leftside.split(" ")
 75 |     if len(symbolnames) != 1:
 76 |         LOG.error("Error generating terminal rule: " + line + "At left side")
 77 |         raise ValueError("Error reading left side of terminal production rule")
 78 |     #leftside is symbolname
 79 |     rightside = rightside.strip()
 80 |     #regexp to detect rightside: String, Grammar
 81 |     if re.search("^String", rightside):
 82 |         newsymbol = __generateStringSymbol(rightside)
 83 |     elif re.search("^Word", rightside):
 84 |         newsymbol = __generateWordSymbol(rightside, repository)
 85 |     elif re.search("^Null", rightside):
 86 |         newsymbol = NullSymbol()
 87 |     else:
 88 |         raise ValueError("Unknown terminal production type " + str(rightside))
 89 |     return symbolnames[0], newsymbol
 90 | 
 91 | 
 92 | def strlist_to_production_set(linelist, repository = None, start_symbol = "S"):
 93 |     if repository is None:
 94 |         repository = {}
 95 |     nonterminalrulelist = []
 96 |     terminalrulelist = []
 97 |     rulelist = []
 98 |     symboldict = {"Null":NullSymbol()}
 99 |     macrodict = {}
100 |     #first read terminalsymbols
101 |     for line in linelist:
102 |         cleanline = re.sub("//.*$", "", line)
103 |         if re.search("::=", cleanline):
104 |             nonterminalrulelist.append(cleanline)
105 |         elif re.search (":=", cleanline):
106 |             symbolname, symbolinstance = read_terminal_production(cleanline, repository)
107 |             symboldict[symbolname] = symbolinstance
108 |             terminalrulelist.append(symbolinstance)
109 |         elif re.search ("^#.*$", cleanline):
110 |             pair = cleanline[1:].split("=")
111 |             assert(len(pair)==2)
112 |             macrodict[pair[0]] = pair[1].rstrip()
113 |         elif re.search ("^\s*$", cleanline):
114 |             pass #Empty line
115 |         else:
116 |             raise ValueError("Unknown line at bnf input file")
117 | 
118 |     #then read nonterminalsymbols
119 |     while nonterminalrulelist:
120 |         linestodrop = []
121 |         for myindex in range(len(nonterminalrulelist)):
122 |             try:
123 |                 newrules = read_nonterminal_production(nonterminalrulelist[myindex], symboldict)
124 |                 for newrule in newrules:
125 |                     rulelist.append(newrule)
126 |             except KeyError:
127 |                 pass
128 |             else:
129 |                 linestodrop.append(myindex)
130 |         if not linestodrop:
131 |             raise Exception("No rule found: ")
132 |         linestodrop.reverse()
133 |         for myindex in linestodrop:
134 |             del nonterminalrulelist[myindex]
135 |     for terminal in terminalrulelist:
136 |         rulelist.append(terminal)
137 |     from pydsl.grammar.BNF import BNFGrammar
138 |     return BNFGrammar(symboldict[start_symbol], rulelist, macrodict)
139 | 
140 | 
141 | def load_bnf_file(filepath, repository = None):
142 |     """Converts a bnf file into a BNFGrammar instance"""
143 |     linelist = []
144 |     with open(filepath,'r') as mlfile:
145 |         for line in mlfile:
146 |             linelist.append(line)
147 |     return strlist_to_production_set(linelist, repository)
148 | 
149 | 
150 | def str_to_productionset(string):
151 |     """Converts a str into a ProductionRuleSet"""
152 |     return strlist_to_production_set(string.split('\n'))
153 | 
154 | 


--------------------------------------------------------------------------------
/pydsl/file/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/pydsl/file/__init__.py


--------------------------------------------------------------------------------
/pydsl/file/parsley.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | # This file is part of pydsl.
 4 | #
 5 | # pydsl is free software: you can redistribute it and/or modify
 6 | # it under the terms of the GNU General Public License as published by
 7 | # the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | # pydsl is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | 
19 | from pydsl.grammar.parsley import ParsleyGrammar
20 | 
21 | __author__ = "Ptolom"
22 | __copyright__ = "Copyright 2014, Ptolom"
23 | __email__ = "ptolom@hexifact.co.uk"
24 | 
25 | def load_parsley_grammar_file(filepath, root_rule="expr", repository=None):
26 |     with open(filepath,'r') as file:
27 |         return ParsleyGrammar(file.read(), root_rule, repository)
28 | 


--------------------------------------------------------------------------------
/pydsl/file/python.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | 
19 | """ File Library class """
20 | 
21 | import imp
22 | 
23 | def getFileTuple(fullname):
24 |     import os.path
25 |     (dirName, fileName) = os.path.split(fullname)
26 |     (fileBaseName, fileExtension) = os.path.splitext(fileName)
27 |     return dirName, fileName, fileBaseName, fileExtension
28 | 
29 | def load_module(filepath, identifier = None):
30 |     if identifier is None:
31 |         (_, _, identifier, _) = getFileTuple(filepath)
32 |     return imp.load_source(identifier, filepath)
33 | 
34 | def load_python_file(moduleobject):
35 |     """ Try to create an indexable instance from a module"""
36 |     if isinstance(moduleobject, str):
37 |         moduleobject = load_module(moduleobject)
38 |     if not hasattr(moduleobject, "iclass"):
39 |         raise KeyError("Element" + str(moduleobject))
40 |     iclass = getattr(moduleobject, "iclass")
41 |     mylist = getattr(moduleobject, "__all__", None) or list(filter(lambda x:x[:1] != "_", (dir(moduleobject))))
42 |     mylist.remove('iclass')
43 |     resultdic = {}
44 |     for x in mylist:
45 |         resultdic[x] = getattr(moduleobject, x)
46 |     if iclass == "SymbolGrammar":
47 |         from pydsl.grammar.BNF import BNFGrammar
48 |         return BNFGrammar(**resultdic)
49 |     elif iclass == "PLY":
50 |         from pydsl.grammar.definition import PLYGrammar
51 |         return PLYGrammar(moduleobject)
52 |     elif iclass in ["PythonGrammar"]:
53 |         from pydsl.grammar.definition import PythonGrammar
54 |         return PythonGrammar(resultdic)
55 |     elif iclass == "PythonTranslator":
56 |         return resultdic
57 |     elif iclass == "parsley":
58 |         from pydsl.grammar.parsley import ParsleyGrammar
59 |         return ParsleyGrammar(**resultdic)
60 |     elif iclass == "pyparsing":
61 |         return resultdic['root_symbol']
62 |     else:
63 |         raise ValueError(str(moduleobject))
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/pydsl/file/regexp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | 
19 | """Regular expression file parser"""
20 | 
21 | __author__ = "Nestor Arocha"
22 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
23 | __email__ = "nesaro@gmail.com"
24 | 
25 | 
26 | import re
27 | 
28 | def load_re_from_file(filepath):
29 |     """Converts a re file to Regular Grammar instance"""
30 |     regexp = None
31 |     with open(filepath,'r') as mlfile:
32 |         flagstr = ""
33 |         for line in mlfile:
34 |             cleanline = re.sub("//.*$", "", line)
35 |             if re.search("^\s*$", cleanline):
36 |                 continue
37 |             if re.search ("^#.*$", cleanline):
38 |                 flagstr = cleanline[1:]
39 |                 continue
40 |             if regexp is not None:
41 |                 raise Exception("Regular expression file format error")
42 |             else:
43 |                 regexp = cleanline.rstrip('\n')
44 |     flags = 0
45 |     if "i" in flagstr:
46 |         flags |= re.I
47 |     from pydsl.grammar.definition import RegularExpression
48 |     return RegularExpression(regexp, flags)
49 | 


--------------------------------------------------------------------------------
/pydsl/grammar/BNF.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | # This file is part of pydsl.
  4 | #
  5 | # pydsl is free software: you can redistribute it and/or modify
  6 | # it under the terms of the GNU General Public License as published by
  7 | # the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | # pydsl is distributed in the hope that it will be useful,
 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | # GNU General Public License for more details.
 14 | #
 15 | # You should have received a copy of the GNU General Public License
 16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | """Production rules"""
 19 | 
 20 | from pydsl.grammar.symbol import Symbol, TerminalSymbol, NullSymbol, EndSymbol
 21 | from pydsl.grammar.definition import Grammar
 22 | from pydsl.grammar.PEG import Choice
 23 | 
 24 | __author__ = "Nestor Arocha"
 25 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
 26 | __email__ = "nesaro@gmail.com"
 27 | 
 28 | def list_eq(list1, list2):
 29 |     return len(list1) == len(list2) and all(list1[x] == list2[x] for x in range(len(list1)))
 30 | 
 31 | class Production(object):
 32 | 
 33 |     def __init__(self, leftside, rightside):
 34 |         # Left side must have at least one non terminal symbol
 35 |         for element in rightside:
 36 |             if not isinstance(element, Symbol):
 37 |                 raise TypeError
 38 |         self.leftside = tuple(leftside)
 39 |         self.rightside = tuple(rightside)
 40 | 
 41 |     def __str__(self):
 42 |         leftstr = " ".join([x for x in self.leftside])
 43 |         rightstr = " ".join([str(x) for x in self.rightside])
 44 |         return leftstr + "::=" + rightstr
 45 | 
 46 |     def __eq__(self, other):
 47 |         try:
 48 |             if len(self.leftside) != len(other.leftside):
 49 |                 return False
 50 |             if len(self.rightside) != len(other.rightside):
 51 |                 return False
 52 |             if not list_eq(self.leftside, other.leftside):
 53 |                 return False
 54 |             if not list_eq(self.rightside, other.rightside):
 55 |                 return False
 56 |         except AttributeError:
 57 |             return False
 58 |         return True
 59 | 
 60 |     def __hash__(self):
 61 |         return hash(self.leftside) & hash(self.rightside)
 62 | 
 63 | 
 64 | #Only stores a ruleset, and methods to ask properties or validity check
 65 | class BNFGrammar(Grammar):
 66 | 
 67 |     def __init__(self, initialsymbol, fulllist, options=None):
 68 |         Grammar.__init__(self)
 69 |         self._initialsymbol = initialsymbol
 70 |         for rule in fulllist:
 71 |             if fulllist.count(rule) > 1:
 72 |                 raise ValueError("Duplicated rule: " + str(rule))
 73 |         self.fulllist = tuple(fulllist)
 74 |         self.options = options or {}
 75 | 
 76 |     def __hash__(self):
 77 |         return hash(self.fulllist)
 78 | 
 79 |     @property
 80 |     def alphabet(self):
 81 |         return Choice([x.gd for x in self.terminal_symbols])
 82 | 
 83 |     @property
 84 |     def productions(self):
 85 |         return [x for x in self.fulllist if isinstance(x, Production)]
 86 | 
 87 |     @property
 88 |     def terminal_symbols(self):
 89 |         return [x for x in self.fulllist if isinstance(x, TerminalSymbol)]
 90 | 
 91 |     @property
 92 |     def first(self):
 93 |         """Returns the a grammar definition that includes all first elements of this grammar"""
 94 |         return self.first_lookup(self.initialsymbol)
 95 | 
 96 |     def first_lookup(self, symbol, size=1):
 97 |         """
 98 |         Returns a Grammar Definition with the first n terminal symbols
 99 |         produced by the input symbol
100 |         """
101 |         if isinstance(symbol, (TerminalSymbol, NullSymbol)):
102 |             return [symbol.gd]
103 |         result = []
104 |         for production in self.productions:
105 |             if production.leftside[0] != symbol:
106 |                 continue
107 |             for right_symbol in production.rightside:
108 |                 if right_symbol == symbol: #Avoids infinite recursion
109 |                     break
110 |                 current_symbol_first = self.first_lookup(right_symbol, size)
111 |                 import collections
112 |                 from pydsl.grammar.definition import String
113 |                 if isinstance(current_symbol_first, collections.Iterable) and not isinstance(current_symbol_first, String):
114 |                     result += current_symbol_first
115 |                 else:
116 |                     result.append(current_symbol_first)
117 |                 if isinstance(current_symbol_first, String) or \
118 |                         not isinstance(current_symbol_first, collections.Iterable) or \
119 |                         (NullSymbol not in current_symbol_first):
120 |                     break # This element doesn't have Null in its first set so there is no need to continue
121 |         if not result:
122 |             raise KeyError("Symbol doesn't exist in this grammar")
123 |         return Choice(result)
124 | 
125 |     def next_lookup(self, symbol):
126 |         """Returns the next TerminalSymbols produced by the input symbol within this grammar definition"""
127 |         result = []
128 |         if symbol == self.initialsymbol:
129 |             result.append(EndSymbol())
130 |         for production in self.productions:
131 |             if symbol in production.rightside:
132 |                 nextindex = production.rightside.index(symbol) + 1
133 |                 while nextindex < len(production.rightside):
134 |                     nextsymbol = production.rightside[nextindex]
135 |                     firstlist = self.first_lookup(nextsymbol)
136 |                     cleanfirstlist = Choice([x for x in firstlist if x != NullSymbol()])
137 |                     result.append(cleanfirstlist)
138 |                     if NullSymbol() not in firstlist:
139 |                         break
140 |                 else:
141 |                     result += self.next_lookup(production.leftside[0]) #reached the end of the rightside
142 |         return result
143 | 
144 |     def __eq__(self, other):
145 |         try:
146 |             return self._initialsymbol == other.initialsymbol and list_eq(self.productions, other.productions)
147 |         except AttributeError:
148 |             return False
149 | 
150 |     @property
151 |     def initialsymbol(self):
152 |         return self._initialsymbol
153 | 
154 |     @property
155 |     def main_production(self):
156 |         """Returns main rule"""
157 |         for rule in self.productions:
158 |             if rule.leftside[0] == self._initialsymbol:
159 |                 return rule
160 |         raise IndexError
161 | 
162 |     def getProductionsBySide(self, symbol):
163 |         result = [rule for rule in self.productions if len(rule.leftside) == 1 and rule.leftside[0] == symbol]
164 |         if not result:
165 |             raise IndexError("Symbol: %s" % str(symbol))
166 |         return result
167 | 
168 |     def getSymbols(self):
169 |         """Returns every symbol"""
170 |         symbollist = []
171 |         for rule in self.productions:
172 |             for symbol in rule.leftside + rule.rightside:
173 |                 if symbol not in symbollist:
174 |                     symbollist.append(symbol)
175 |         symbollist += self.terminal_symbols
176 |         return symbollist
177 | 
178 |     def __str__(self):
179 |         return str(list(map(str, self.productions)))
180 | 


--------------------------------------------------------------------------------
/pydsl/grammar/PEG.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | # This file is part of pydsl.
  4 | #
  5 | # pydsl is free software: you can redistribute it and/or modify
  6 | # it under the terms of the GNU General Public License as published by
  7 | # the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | # pydsl is distributed in the hope that it will be useful,
 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | # GNU General Public License for more details.
 14 | #
 15 | # You should have received a copy of the GNU General Public License
 16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | __author__ = "Nestor Arocha"
 19 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
 20 | __email__ = "nesaro@gmail.com"
 21 | 
 22 | """
 23 | 
 24 | Parser expression grammars
 25 | 
 26 | Loosely based on pymeta
 27 | 
 28 | https://launchpad.net/pymeta
 29 | 
 30 | See also http://en.wikipedia.org/wiki/Parsing_expression_grammar
 31 | 
 32 | """
 33 | 
 34 | from .definition import Grammar
 35 | from itertools import chain
 36 | 
 37 | class ZeroOrMore(Grammar):
 38 |     def __init__(self, element):
 39 |         Grammar.__init__(self)
 40 |         self.element = element
 41 | 
 42 |     def first(self):
 43 |         return Choice([self.element])
 44 | 
 45 | 
 46 | class OneOrMore(Grammar):
 47 |     def __init__(self, element):
 48 |         Grammar.__init__(self)
 49 |         self.element = element
 50 | 
 51 |     def first(self):
 52 |         return Choice([self.element])
 53 | 
 54 | class Sequence(Grammar, list):
 55 |     def __init__(self, *args, **kwargs):
 56 |         base_alphabet = kwargs.pop('base_alphabet', None)
 57 |         Grammar.__init__(self, base_alphabet)
 58 |         list.__init__(self, *args, **kwargs)
 59 |         for x in self:
 60 |             if not isinstance(x, Grammar):
 61 |                 raise TypeError(x)
 62 | 
 63 |     def __hash__(self):
 64 |         return hash(tuple(self))
 65 | 
 66 |     @classmethod
 67 |     def from_string(cls, string):
 68 |         from .definition import String
 69 |         return cls([String(x) for x in string])
 70 | 
 71 | class Choice(set, Grammar):
 72 |     """Uses a list of grammar definitions with common base alphabets"""
 73 |     def __init__(self, grammarlist, calculate_base_alphabet = True):
 74 |         set.__init__(self, grammarlist)
 75 |         if calculate_base_alphabet:
 76 |             base_alphabet = set()
 77 |             for x in self:
 78 |                 base_alphabet = base_alphabet.union(x.alphabet)
 79 |         else:
 80 |             base_alphabet = None
 81 |         Grammar.__init__(self, base_alphabet)
 82 | 
 83 |     def __str__(self):
 84 |         return str([str(x) for x in self])
 85 | 
 86 |     def __add__(self, other):
 87 |         return Choice([x for x in self] + [x for x in other])
 88 | 
 89 |     def __hash__(self):
 90 |         return hash(tuple(x for x in self))
 91 | 
 92 | class Optional(object):
 93 |     def __init__(self, element):
 94 |         Grammar.__init__(self)
 95 |         self.element = element
 96 | 
 97 | class Not(object):
 98 |     def __init__(self, element):
 99 |         self.element = element
100 | 
101 | class And(object):
102 |     def __init__(self, element):
103 |         Grammar.__init__(self)
104 |         self.element = element
105 | 
106 | 


--------------------------------------------------------------------------------
/pydsl/grammar/__init__.py:
--------------------------------------------------------------------------------
1 | from .definition import Grammar, RegularExpression, String, PythonGrammar, grammar_factory, JsonSchema
2 | from .BNF import BNFGrammar
3 | 


--------------------------------------------------------------------------------
/pydsl/grammar/definition.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | # This file is part of pydsl.
  4 | #
  5 | # pydsl is free software: you can redistribute it and/or modify
  6 | # it under the terms of the GNU General Public License as published by
  7 | # the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | # pydsl is distributed in the hope that it will be useful,
 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | # GNU General Public License for more details.
 14 | #
 15 | # You should have received a copy of the GNU General Public License
 16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | __author__ = "Nestor Arocha"
 19 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
 20 | __email__ = "nesaro@gmail.com"
 21 | 
 22 | import collections
 23 | 
 24 | class Grammar(object):
 25 | 
 26 |     def __init__(self, base_alphabet = None):
 27 |         self.__base_alphabet = base_alphabet
 28 | 
 29 |     def enum(self):
 30 |         """Generates every possible accepted string"""
 31 |         raise NotImplementedError
 32 | 
 33 |     @property
 34 |     def first(self):
 35 |         """Alphabet that matches every possible first element.
 36 |         the returned value is a subset of the base_alphabet"""
 37 |         return self.alphabet
 38 | 
 39 |     @property
 40 |     def minsize(self):# -> int:
 41 |         """Returns the minimum size in alphabet tokens"""
 42 |         return 0
 43 | 
 44 |     @property
 45 |     def maxsize(self):
 46 |         """Returns the max size in alphabet tokens"""
 47 |         return None
 48 | 
 49 |     @property
 50 |     def alphabet(self):
 51 |         if not self.__base_alphabet:
 52 |             raise AttributeError
 53 |         return self.__base_alphabet
 54 | 
 55 | class PLYGrammar(Grammar):
 56 |     """PLY based grammar"""
 57 |     def __init__(self, module):
 58 |         Grammar.__init__(self)
 59 |         self.module = module
 60 | 
 61 | class RegularExpression(Grammar):
 62 |     def __init__(self, regexp, flags = 0):
 63 |         Grammar.__init__(self, None)
 64 |         import re
 65 |         retype = type(re.compile('hello, world'))
 66 |         if isinstance(regexp, retype):
 67 |             self.regexp = regexp
 68 |             self.regexpstr = regexp.pattern
 69 |             self.flags = regexp.flags
 70 |         elif isinstance(regexp, str):
 71 |             self.regexpstr = regexp
 72 |             self.flags = flags
 73 |             self.regexp = re.compile(regexp, flags)
 74 |         else:
 75 |             raise TypeError
 76 | 
 77 |     def __hash__(self):
 78 |         return hash(self.regexpstr)
 79 | 
 80 |     def __eq__(self, other):
 81 |         try:
 82 |             return self.regexpstr == other.regexpstr and self.flags == other.flags
 83 |         except AttributeError:
 84 |             return False
 85 | 
 86 |     def __str__(self):
 87 |         return self.regexpstr
 88 | 
 89 |     @property
 90 |     def first(self):
 91 |         i = 0
 92 |         while True:
 93 |             if self.regexpstr[i] == "^":
 94 |                 i+=1
 95 |                 continue
 96 |             if self.regexpstr[i] == "[":
 97 |                 from .PEG import Choice
 98 |                 return Choice([String(x) for x in self.regexpstr[i+1:self.regexpstr.find("]")]])
 99 |             return String(self.regexpstr[i])
100 | 
101 |     def __getattr__(self, attr):
102 |         return getattr(self.regexp, attr)
103 | 
104 |     @property
105 |     def alphabet(self):
106 |         from pydsl.encoding import ascii_encoding
107 |         return ascii_encoding
108 | 
109 | 
110 | class String(Grammar, str):
111 |     def __init__(self, string):
112 |         if isinstance(string, list):
113 |             raise TypeError('Attempted to initialize a String with a list %s' % (string, ) )
114 |         Grammar.__init__(self, None)
115 | 
116 |     @property
117 |     def first(self):
118 |         return String(self[0])
119 | 
120 |     def enum(self):
121 |         yield self
122 | 
123 |     @property
124 |     def maxsize(self):
125 |         return len(self)
126 | 
127 |     @property
128 |     def minsize(self):
129 |         return len(self)
130 | 
131 |     @property
132 |     def alphabet(self):
133 |         from pydsl.encoding import ascii_encoding
134 |         return ascii_encoding
135 | 
136 | class JsonSchema(Grammar, dict):
137 |     def __init__(self, *args, **kwargs):
138 |         from pydsl.encoding import ascii_encoding
139 |         Grammar.__init__(self, ascii_encoding)
140 |         dict.__init__(self, *args, **kwargs)
141 | 
142 | class PythonGrammar(Grammar, dict):
143 |     """
144 |     A Python dictionary that defines a Grammar.
145 |     it must define at least matchFun
146 |     """
147 |     def __init__(self, *args, **kwargs):
148 |         """
149 |         It receives a dictionary constructor which must define
150 |         matchFun. Example: {'matchFun':<function x at 0x000000>}
151 |         """
152 |         Grammar.__init__(self)
153 |         dict.__init__(self, *args, **kwargs)
154 | 
155 |     def __hash__(self):
156 |         if not self:
157 |             raise Exception
158 |         items = tuple(self.items())
159 |         res = hash(items[0])
160 |         for item in items[1:]:
161 |             res ^= hash(item)
162 |         return res
163 | 
164 |     def __setitem__(self, key, value):
165 |         raise Exception
166 | 
167 |     @property
168 |     def alphabet(self):
169 |         if "alphabet" in self:
170 |             return self['alphabet']
171 |         from pydsl.encoding import ascii_encoding
172 |         return ascii_encoding
173 | 
174 | def grammar_factory(input_definition):
175 |     if isinstance(input_definition, str):
176 |         return String(input_definition)
177 |     import re
178 |     retype = type(re.compile('hello, world'))
179 |     if isinstance(input_definition, retype):
180 |         return RegularExpression(retype)
181 |     if isinstance(input_definition, collections.Iterable):
182 |         if isinstance(input_definition[0], str):
183 |             #Return a composition grammar ([a,b] -> "a|b")
184 |             pass
185 |         elif isinstance(input_definition[0], collections.Iterable):
186 |             #
187 |             pass
188 |     raise ValueError("Unable to create a grammar for %s" % input_definition)
189 | 


--------------------------------------------------------------------------------
/pydsl/grammar/parsley.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | # This file is part of pydsl.
 4 | #
 5 | # pydsl is free software: you can redistribute it and/or modify
 6 | # it under the terms of the GNU General Public License as published by
 7 | # the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | # pydsl is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | from __future__ import absolute_import
19 | from pydsl.grammar.definition import Grammar
20 | from pydsl.check import checker_factory
21 | 
22 | __author__ = "Ptolom"
23 | __copyright__ = "Copyright 2014, Ptolom"
24 | __email__ = "ptolom@hexifact.co.uk"
25 | 
26 | class ParsleyGrammar(Grammar):
27 |     def __init__(self, rules, root_rule="expr", repository=None):
28 |         import parsley
29 |         Grammar.__init__(self)
30 |         repo=dict(repository or {})
31 |         for key in repo:
32 |             if isinstance(repo[key], Grammar):
33 |                 repo[key] = checker_factory(repo[key])
34 |         self.grammar=parsley.makeGrammar(rules, repo)
35 |         self.root_rule=root_rule 
36 | 


--------------------------------------------------------------------------------
/pydsl/grammar/symbol.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | # This file is part of pydsl.
  4 | #
  5 | # pydsl is free software: you can redistribute it and/or modify
  6 | # it under the terms of the GNU General Public License as published by
  7 | # the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | # pydsl is distributed in the hope that it will be useful,
 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | # GNU General Public License for more details.
 14 | #
 15 | # You should have received a copy of the GNU General Public License
 16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | __author__ = "Nestor Arocha"
 19 | __copyright__ = "Copyright 2008-2015, Nestor Arocha"
 20 | __email__ = "nesaro@gmail.com"
 21 | 
 22 | import logging
 23 | LOG = logging.getLogger(__name__)
 24 | from pydsl.grammar.definition import Grammar
 25 | from pydsl.check import check
 26 | 
 27 | class Symbol(object):
 28 |     pass
 29 | 
 30 | class NonTerminalSymbol(str, Symbol):
 31 |     def __init__(self, name):
 32 |         Symbol.__init__(self)
 33 | 
 34 |     def __str__(self):
 35 |         return "<NonTS: " + self + ">"
 36 | 
 37 |     def __hash__(self):
 38 |         return str.__hash__(self)
 39 | 
 40 |     def __eq__(self, other):
 41 |         if not isinstance(other, NonTerminalSymbol):
 42 |             return False
 43 |         return str.__eq__(self,other)
 44 | 
 45 | 
 46 | class TerminalSymbol(Symbol):
 47 | 
 48 |     def __init__(self, gd):
 49 |         if not isinstance(gd, Grammar):
 50 |             raise TypeError("Expected Grammar, got %s" % (gd,))
 51 |         Symbol.__init__(self)
 52 |         if not gd:
 53 |             raise Exception
 54 |         self.gd = gd
 55 | 
 56 |     def __hash__(self):
 57 |         return hash(self.gd)
 58 | 
 59 |     def check(self, data):# ->bool:
 60 |         """Checks if input is recognized as this symbol"""
 61 |         return check(self.gd, data)
 62 | 
 63 |     def first(self):
 64 |         return self.gd.first
 65 | 
 66 |     def __eq__(self, other):
 67 |         """StringTerminalSymbol are equals if definition and names are equal"""
 68 |         try:
 69 |             return self.gd == other.gd
 70 |         except AttributeError:
 71 |             return False
 72 | 
 73 |     def __str__(self):
 74 |         return "<TS: " + str(self.gd) + ">"
 75 | 
 76 | class NullSymbol(Symbol):
 77 |     _instance = None
 78 |     def __new__(cls):
 79 |         if not cls._instance:
 80 |             cls._instance = super(NullSymbol, cls).__new__(cls)
 81 |         return cls._instance
 82 | 
 83 |     def __eq__(self, other):
 84 |         return isinstance(other, NullSymbol)
 85 | 
 86 |     def __ne__(self, other):
 87 |         return not self.__eq__(other)
 88 | 
 89 |     def __bool__(self):
 90 |         return False
 91 | 
 92 | class EndSymbol(Symbol):
 93 |     _instance = None
 94 |     def __new__(cls):
 95 |         if cls._instance is None:
 96 |             cls._instance = super(EndSymbol, cls).__new__(cls)
 97 |         return cls._instance
 98 | 
 99 |     def __hash__(self):
100 |         assert(EndSymbol._instance is not None)
101 |         return hash(id(self._instance))
102 | 
103 |     def __eq__(self, other):
104 |         result = isinstance(other, EndSymbol) or EndSymbol == other
105 |         return result
106 | 
107 |     def __bool__(self):
108 |         return False
109 | 
110 |     def __str__(self):
111 |         return "$"
112 | 


--------------------------------------------------------------------------------
/pydsl/guess.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | #This file is part of pydsl.
 5 | #
 6 | #pydsl is free software: you can redistribute it and/or modify
 7 | #it under the terms of the GNU General Public License as published by
 8 | #the Free Software Foundation, either version 3 of the License, or
 9 | #(at your option) any later version.
10 | #
11 | #pydsl is distributed in the hope that it will be useful,
12 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | #GNU General Public License for more details.
15 | #
16 | #You should have received a copy of the GNU General Public License
17 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | 
20 | """ guess which types are the input data.  """
21 | 
22 | __author__ = "Nestor Arocha"
23 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
24 | __email__ = "nesaro@gmail.com"
25 | 
26 | import logging
27 | LOG = logging.getLogger(__name__)
28 | from pydsl.check import check
29 | 
30 | class Guesser(object):
31 |     """Returns every grammar and alphabet definition that matches the input"""
32 |     def __init__(self, grammarlist):
33 |         self.grammarlist = grammarlist
34 | 
35 |     def __call__(self, data):
36 |         return [x for x in self.grammarlist if check(x,data)]
37 | 
38 | def guess(grammarlist, data):
39 |     return Guesser(grammarlist)(data)
40 | 


--------------------------------------------------------------------------------
/pydsl/lex.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | # This file is part of pydsl.
  4 | #
  5 | # pydsl is free software: you can redistribute it and/or modify
  6 | # it under the terms of the GNU General Public License as published by
  7 | # the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | # pydsl is distributed in the hope that it will be useful,
 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | # GNU General Public License for more details.
 14 | #
 15 | # You should have received a copy of the GNU General Public License
 16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | """Lexer classes. Receives and input sequences and returns a list of Tokens"""
 19 | 
 20 | __author__ = "Nestor Arocha"
 21 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
 22 | __email__ = "nesaro@gmail.com"
 23 | 
 24 | from pydsl.grammar.PEG import Choice
 25 | from pydsl.check import checker_factory
 26 | from pydsl.token import Token, PositionToken
 27 | from pydsl.tree import PositionResultList
 28 | from pydsl.encoding import ascii_encoding
 29 | 
 30 | 
 31 | class DummyLexer(object):
 32 | 
 33 |     """Special Lexer that encodes from a string a reads a string"""
 34 | 
 35 |     def __call__(self, string):
 36 |         for x in string:
 37 |             yield Token(x, ascii_encoding)
 38 | 
 39 | 
 40 | #A1 A2
 41 | #|  |
 42 | #A3 A4
 43 | #|  |
 44 | #A5 |
 45 | #\  /
 46 | # A6
 47 | 
 48 | #Order is not always unique, as in the previous example A4 could be extracter after or before A3. At the moment the algorithm is to compute elements of the longest path first (extract elements from longest path every single time)
 49 | 
 50 | 
 51 | #Check that every element in the input belongs to base
 52 | 
 53 | #Call the lexers following the graph
 54 | 
 55 | 
 56 | def graph_from_alphabet(alphabet, base):
 57 |     """Creates a graph that connects the base with the target through alphabets
 58 |     If every target is connected to any inputs, create the independent paths"""
 59 |     if not isinstance(alphabet, Choice):
 60 |         raise TypeError(alphabet.__class__.__name__)
 61 |     if not isinstance(base, Choice):
 62 |         raise TypeError(base.__class__.__name__)
 63 |             
 64 |     import networkx
 65 |     result = networkx.DiGraph()
 66 |     current_alphabet = alphabet
 67 |     pending_stack = set(current_alphabet)
 68 |     while pending_stack:
 69 |         current_alphabet = pending_stack.pop()
 70 |         if current_alphabet == base:
 71 |             continue
 72 |         if current_alphabet in base:
 73 |             result.add_edge(current_alphabet, base)
 74 |         elif isinstance(current_alphabet, Choice):
 75 |             for element in current_alphabet:
 76 |                 if element in base:
 77 |                     result.add_edge(current_alphabet, base)
 78 |                 else:
 79 |                     result.add_edge(current_alphabet, element)
 80 |                     pending_stack.add(element)
 81 |         elif current_alphabet.alphabet:
 82 |             result.add_edge(current_alphabet, current_alphabet.alphabet)
 83 |             pending_stack.add(current_alphabet.alphabet)
 84 |     return result
 85 | 
 86 | def print_graph(result):
 87 |     import networkx
 88 |     import matplotlib.pyplot as plt
 89 |     plt.figure(figsize=(8,8))
 90 |     # with nodes colored by degree sized by population
 91 |     networkx.draw(result, with_labels=True)
 92 |     plt.savefig("knuth_miles.png")
 93 | 
 94 | class GeneralLexer(object):
 95 |     """Multi level lexer"""
 96 |     def __init__(self, alphabet, base):
 97 |         if not isinstance(alphabet, Choice):
 98 |             raise TypeError(alphabet.__class__.__name__)
 99 |         if not alphabet:
100 |             raise ValueError
101 |         if not base:
102 |             raise ValueError
103 |         self.alphabet = alphabet
104 |         self.base = base
105 | 
106 | 
107 |     def __call__(self, data):
108 |         if isinstance(data, str):
109 |             data = [Token(x, ascii_encoding) for x in data]
110 |         from pydsl.token import append_position_to_token_list
111 |         data = append_position_to_token_list(data)
112 | 
113 |         if not all(isinstance(x, Token) for x in data):
114 |             raise TypeError
115 |         for element in data:
116 |             from pydsl.check import check
117 |             if not check(self.base, [element]):
118 |                 raise ValueError('Unexpected input %s for alphabet %s' % (element, self.base))
119 |         if self.base == self.alphabet:
120 |             return data
121 |         graph = graph_from_alphabet(self.alphabet, self.base)
122 |         solved_elements = {}
123 |         graph.node[self.base]['parsed'] = data #Attach data to every element in the graph
124 |         #print_graph(graph)
125 |         digraph_walker_backwards(graph, self.base, my_call_back)
126 |         result = []
127 |         for output_alphabet in self.alphabet:
128 |             if output_alphabet in self.base:
129 |                 output_alphabet = self.base
130 |             if output_alphabet not in graph.node or 'parsed' not in graph.node[output_alphabet]:
131 |                 raise Exception("alphabet not initialized:%s" % output_alphabet)
132 |             for token in graph.node[output_alphabet]['parsed']:
133 |                 result.append(PositionToken(str(token), output_alphabet, token.left, token.right))
134 | 
135 |         result = sorted(result, key=lambda x: x.left)
136 |         result = remove_subsets(result)
137 |         result = remove_duplicates(result)
138 |         return [Token(x.content, x.gd) for x in result]
139 | 
140 | 
141 | def is_subset(a, b):
142 |     """Excluding same size"""
143 |     return b.left <= a.left and b.right > a.right or b.left < a.left and b.right >= a.right 
144 | 
145 | def remove_subsets(ptoken_list):
146 |     result = []
147 |     for ptoken in ptoken_list:
148 |         if not any((is_subset(ptoken, x) for x in ptoken_list)):
149 |             result.append(ptoken)
150 |     return result
151 | 
152 | def remove_duplicates(ptoken_list):
153 |     result = []
154 |     for x in ptoken_list:
155 |         for y in result:
156 |             if x.content == y.content and x.left == y.left and x.right == y.right: #ignores GD
157 |                 break
158 |         else:
159 |             result.append(x)
160 |     return result
161 | 
162 | def my_call_back(graph, element):
163 |     gne = graph.node[element]
164 |     if 'parsed' in gne:
165 |         return  # Already parsed
166 |     flat_list = []
167 |     for successor in graph.successors(element):
168 |         if successor not in graph.node or 'parsed' not in graph.node[successor]:
169 |             my_call_back(graph, successor)
170 |         for token in graph.node[successor]['parsed']:
171 |             flat_list.append(token)
172 |     sorted_flat_list = remove_subsets(flat_list)
173 |     lexed_list = []
174 |     prev_right = 0
175 |     for token in sorted_flat_list:
176 |         if prev_right != token.left:
177 |             raise Exception("Non contiguous parsing from sucessors")
178 |         prev_right = token.right
179 |         lexed_list.append(token)
180 |     from pydsl.extract import extract
181 |     gne['parsed'] = extract(element, lexed_list)
182 | 
183 | 
184 | 
185 | def digraph_walker_backwards(graph, element, call_back):
186 |     """Visits every element guaranteeing that the previous elements have been visited before"""
187 |     call_back(graph, element)
188 |     for predecessor in graph.predecessors(element):
189 |         call_back(graph, predecessor)
190 |     for predecessor in graph.predecessors(element):
191 |         digraph_walker_backwards(graph, predecessor, call_back)
192 | 
193 | 
194 | 
195 | class ChoiceLexer(object):
196 |     """Lexer receives an Alphabet in the initialization (A1).
197 |     Receives an input that belongs to A1 and generates a list of tokens in a different Alphabet A2
198 |     It is always described with a regular grammar"""
199 | 
200 |     def __init__(self, alphabet):
201 |         self.load(None)
202 |         self.alphabet = alphabet
203 | 
204 |     def load(self, string):
205 |         self.string = string
206 |         self.index = 0
207 | 
208 |     def __call__(self, string):
209 |         """Tokenizes input, generating a list of tokens"""
210 |         self.load(str(string))
211 |         result = []
212 |         while True:
213 |             try:
214 |                 result.append(self.nextToken())
215 |             except:
216 |                 break
217 |         return result
218 | 
219 |     def nextToken(self):
220 |         best_right = 0
221 |         best_gd = None
222 |         matches = []
223 |         for gd in self.alphabet:
224 |             checker = checker_factory(gd)
225 |             if checker.check(self.string[self.index:self.index+1]):
226 |                 matches.append(gd)
227 |         if not matches:
228 |             raise Exception("Unmatched Token")
229 |         elif len(matches) > 1:
230 |             raise Exception("Too many matches")
231 |         gd = matches[0]
232 |         result = Token(self.string[self.index:self.index+1], gd)
233 |         self.index += 1
234 |         return result
235 | 
236 | 
237 | class ChoiceBruteForceLexer(object):
238 |     """Attempts to generate the smallest token sequence by evaluating every accepted sequence"""
239 | 
240 |     def __init__(self, alphabet):
241 |         self.alphabet = alphabet
242 | 
243 |     def __call__(self, string):  # -> "TokenList":
244 |         """Tokenizes input, generating a list of tokens"""
245 |         self.string = string
246 |         return [x for x in self.nextToken()]
247 | 
248 |     def nextToken(self):
249 |         tree = PositionResultList()  # This is the extract algorithm
250 |         valid_alternatives = []
251 |         for gd in self.alphabet:
252 |             checker = checker_factory(gd)
253 |             for left in range(0, len(self.string)):
254 |                 if getattr(gd, 'maxsize', None):
255 |                     max_right = left + gd.maxsize + 1
256 |                 else:
257 |                     max_right = len(self.string) +1 
258 |                 for right in range(left +1, min(max_right, len(self.string) +1)):
259 |                     slice = self.string[left:right]
260 |                     if checker.check(slice):
261 |                         tree.append(left, right, slice, gd, check_position=False)
262 |         if not tree:
263 |             raise Exception("Nothing consumed")
264 | 
265 |         right_length_seq = []
266 |         valid_sequences = tree.valid_sequences()
267 |         for x in valid_sequences:
268 |             my_list = list(x)
269 |             if my_list[-1]['right'] == len(self.string):
270 |                 right_length_seq.append(my_list)
271 |         if not right_length_seq:
272 |             raise Exception("No sequence found for input %s alphabet %s" % (self.string,self.alphabet))
273 |         for y in sorted(right_length_seq, key=lambda x:len(x))[0]: #Always gets the match with less tokens
274 |             yield Token(y['content'], y.get('gd'))
275 | 
276 | def lexer_factory(alphabet, base, force_lexer = None):
277 |     if force_lexer is None:
278 |         if alphabet == ascii_encoding:
279 |             force_lexer = "lexer"
280 |         elif isinstance(alphabet, Choice) and alphabet.alphabet == base:
281 |             force_lexer = "brute_force"
282 |         else:
283 |             force_lexer = "general"
284 |         
285 |     if force_lexer == "lexer":
286 |         return ChoiceLexer(alphabet)
287 |     elif force_lexer == "brute_force":
288 |         return ChoiceBruteForceLexer(alphabet)
289 |     elif force_lexer == "general":
290 |         return GeneralLexer(alphabet, base)
291 |     else:
292 |         raise ValueError
293 | 
294 | def lex(alphabet, base, data, force_lexer = None):
295 |     return lexer_factory(alphabet, base, force_lexer)(data)
296 | 


--------------------------------------------------------------------------------
/pydsl/parser/LL.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | """LL family parsers"""
19 | 
20 | __author__ = "Nestor Arocha"
21 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
22 | __email__ = "nesaro@gmail.com"
23 | from pydsl.check import check
24 | from pydsl.parser.parser import TopDownParser
25 | from pydsl.tree import ParseTree
26 | from pydsl.exceptions import ParseError
27 | import logging
28 | LOG = logging.getLogger(__name__)
29 | 
30 | 
31 | 
32 | class LL1RecursiveDescentParser(TopDownParser):
33 |     def get_trees(self, data, showerrors = False): # -> list:
34 |         """ returns a list of trees with valid guesses """
35 |         if showerrors:
36 |             raise NotImplementedError("This parser doesn't implement errors")
37 |         self.data = data
38 |         self.index = 0
39 |         try:
40 |             return [self.__aux_parser(self._productionset.initialsymbol)]
41 |         except (IndexError, ParseError):
42 |             return []
43 | 
44 |     def __aux_parser(self, symbol):
45 |         from pydsl.grammar.symbol import TerminalSymbol
46 |         if isinstance(symbol, TerminalSymbol):
47 |             LOG.debug("matching symbol %s, data:%s, index:%s" % (symbol,self.data,self.index ))
48 |             result= self.match(symbol)
49 |             LOG.debug("symbol matched %s" % result)
50 |             return result
51 |         productions = self._productionset.getProductionsBySide(symbol)
52 |         valid_firsts = []
53 |         for production in productions:
54 |             first_of_production = self._productionset.first_lookup(production.rightside[0])
55 |             if check(first_of_production, [self.current]):
56 |                 valid_firsts.append(production)
57 |         if len(valid_firsts) != 1:
58 |             raise ParseError("Expected only one valid production, found %s" % len(valid_firsts), 0)
59 |         childlist = [self.__aux_parser(x) for x in valid_firsts[0].rightside]
60 |         left = childlist[0].left
61 |         right = childlist[-1].right
62 |         content = [x.content for x in childlist]
63 |         return ParseTree(left, right, symbol, content, childlist=childlist)
64 | 
65 | 
66 |     def consume(self):
67 |         self.index +=1
68 |         if self.index > len(self.data):
69 |             raise IndexError("Attempted to consume index %s of data %s" % (self.index, self.data))
70 | 
71 |     @property
72 |     def current(self):
73 |         return self.data[self.index]
74 | 
75 |     def match(self, symbol):
76 |         if symbol.check([self.current]):
77 |             current = self.current
78 |             self.consume()
79 |             return ParseTree(self.index-1, self.index, symbol, current)
80 |         raise Exception("Not matched")
81 | 


--------------------------------------------------------------------------------
/pydsl/parser/LR0.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | #This file is part of pydsl.
  4 | #
  5 | #pydsl is free software: you can redistribute it and/or modify
  6 | #it under the terms of the GNU General Public License as published by
  7 | #the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | #pydsl is distributed in the hope that it will be useful,
 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #GNU General Public License for more details.
 14 | #
 15 | #You should have received a copy of the GNU General Public License
 16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | """SLR0 implementation"""
 19 | 
 20 | __author__ = "Nestor Arocha"
 21 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
 22 | __email__ = "nesaro@gmail.com"
 23 | 
 24 | import logging
 25 | LOG = logging.getLogger(__name__)
 26 | from pydsl.parser.parser import BottomUpParser
 27 | from pydsl.grammar.symbol import NonTerminalSymbol, TerminalSymbol, EndSymbol, Symbol
 28 | from pydsl.grammar.BNF import Production
 29 | from pydsl.grammar.definition import Grammar
 30 | from collections import Iterable, defaultdict
 31 | 
 32 | Extended_S = NonTerminalSymbol("EI")
 33 | 
 34 | def _build_item_closure(itemset, productionset):
 35 |     """Build input itemset closure """
 36 |     #For every item inside current itemset, if we have the following rule:
 37 |     #  xxx <cursor><nonterminalSymbol> xxx  append every rule from self._productionruleset that begins with that NonTerminalSymbol
 38 |     if not isinstance(itemset, LR0ItemSet):
 39 |         raise TypeError
 40 |     import copy
 41 |     resultset = copy.copy(itemset)
 42 |     changed = True
 43 |     while changed:
 44 |         changed = False
 45 |         for currentitem in resultset.itemlist:
 46 |             nextsymbol = currentitem.next_symbol()
 47 |             if nextsymbol is None:
 48 |                 break
 49 |             for rule in productionset.productions:
 50 |                 newitem = LR0Item(rule)
 51 |                 if rule.leftside[0] == nextsymbol and newitem not in resultset.itemlist:
 52 |                     resultset.append_item(newitem)
 53 |                     changed = True
 54 |     return resultset
 55 | 
 56 | def item_set_goto(itemset, inputsymbol, productionset):
 57 |     """returns an itemset
 58 |     locate inside itemset every element with inputsymbol following cursor
 59 |     for every located item, append its itemclosure"""
 60 |     resultset = LR0ItemSet()
 61 |     for item in itemset.itemlist:
 62 |         if item.next_symbol() == inputsymbol:
 63 |             newitem = LR0Item(item.rule, item.position + 1)
 64 |             resultset.append_item(newitem)
 65 |     return _build_item_closure(resultset, productionset)
 66 | 
 67 | def build_states_sets(productionset):
 68 |     symbollist = productionset.getSymbols() + [EndSymbol()]
 69 |     mainproductionrule =  Production([Extended_S] , [productionset.initialsymbol, EndSymbol()])
 70 |     mainproductionruleitem = LR0Item(mainproductionrule)
 71 |     mainproductionruleitemset = LR0ItemSet()
 72 |     mainproductionruleitemset.append_item(mainproductionruleitem)
 73 |     index0 = _build_item_closure(mainproductionruleitemset, productionset)
 74 |     LOG.debug("buildStatesSets: mainsymbol closure: " + str(index0))
 75 |     result = [index0]
 76 |     changed = True
 77 |     #returns a set of itemsets
 78 |     while changed:
 79 |         changed = False
 80 |         for itemset in result[:]:
 81 |             for symbol in symbollist:
 82 |                 if itemset.has_transition(symbol): #FIXME a symbol in a LR0item list?
 83 |                     continue
 84 |                 newitemset = item_set_goto(itemset, symbol, productionset)
 85 |                 if newitemset in result and itemset.has_transition(symbol) and itemset.get_transition(symbol) != newitemset:
 86 |                     changed = True
 87 |                     itemset.append_transition(symbol, newitemset)
 88 |                 elif newitemset in result and not itemset.has_transition(symbol):
 89 |                     changed = True
 90 |                     itemset.append_transition(symbol, newitemset)
 91 |                 elif newitemset and newitemset not in result: #avoid adding a duplicated entry
 92 |                     changed = True
 93 |                     result.append(newitemset)
 94 |                     itemset.append_transition(symbol, newitemset)
 95 |     return result
 96 | 
 97 | def _slr_build_parser_table(productionset):
 98 |     """SLR method to build parser table"""
 99 |     result = ParserTable()
100 |     statesset = build_states_sets(productionset)
101 |     for itemindex, itemset in enumerate(statesset):
102 |         LOG.debug("_slr_build_parser_table: Evaluating itemset:" + str(itemset))
103 |         for symbol in productionset.getSymbols() + [EndSymbol()]:
104 |             numberoptions = 0
105 |             for lritem in itemset.itemlist:
106 |                 #if cursor is before a terminal, and there is a transition to another itemset with the following terminal, append shift rule
107 |                 if isinstance(symbol, TerminalSymbol) and lritem.next_symbol() == symbol and itemset.has_transition(symbol):
108 |                     destinationstate = statesset.index(itemset.get_transition(symbol))
109 |                     result.append(itemindex, symbol, "Shift", destinationstate)
110 |                     numberoptions += 1
111 |                 if isinstance(symbol, NonTerminalSymbol) and lritem.next_symbol() == symbol and itemset.has_transition(symbol):
112 |                     destinationstate = statesset.index(itemset.get_transition(symbol))
113 |                     result.append_goto(itemindex, symbol, destinationstate)
114 |                 #if cursor is at the end of the rule, then append reduce rule and go transition
115 |                 if lritem.previous_symbol() == symbol and lritem.is_last_position() and symbol != Extended_S:
116 |                     for x in productionset.next_lookup(symbol):
117 |                         if isinstance(x, Grammar):
118 |                             result.append(itemindex, TerminalSymbol(x), "Reduce", None, lritem.rule)
119 |                         elif isinstance(x, Symbol):
120 |                             result.append(itemindex, x, "Reduce", None, lritem.rule)
121 |                         else:
122 |                             raise TypeError(x)
123 |                     numberoptions += 1
124 |                 #if cursor is at the end of main rule, and current symbol is end, then append accept rule
125 |                 if symbol == EndSymbol() and lritem.previous_symbol() == productionset.initialsymbol and lritem.next_symbol() == EndSymbol():
126 |                     result.append(itemindex, symbol, "Accept", None)
127 |                     numberoptions += 1
128 |             if not numberoptions:
129 |                 LOG.info("No rule found to generate a new parsertable entry ")
130 |                 LOG.debug("symbol: " + str(symbol))
131 |                 LOG.debug("itemset: " + str(itemset))
132 |             elif numberoptions > 1: #FIXME can it count duplicated entries?
133 |                 raise Exception("LR Conflict %s" % symbol)
134 |     return result
135 |     
136 | class ParserTable(defaultdict):
137 |     """ Stores a state/symbol/action/new state relation """
138 |     def __init__(self):
139 |         defaultdict.__init__(self, lambda: defaultdict(lambda:{"action":"Fail"}))
140 | 
141 |     def append(self, state, symbol, action, destinationstate, production = None):
142 |         """Appends a new rule"""
143 |         if action not in (None, "Accept", "Shift", "Reduce"):
144 |             raise TypeError
145 |         rule = {"action":action, "dest":destinationstate}
146 |         if action == "Reduce":
147 |             if rule is None:
148 |                 raise TypeError("Expected production parameter")
149 |             rule["rule"] = production
150 |         while isinstance(symbol, TerminalSymbol) and isinstance(symbol.gd, Iterable) and len(symbol.gd) == 1 and isinstance(list(symbol.gd)[0], Grammar):
151 |             symbol = TerminalSymbol(list(symbol.gd)[0]) #Reduces symbol if its gd is a Sequence/Choice of 1 element
152 |         if not isinstance(symbol, Symbol):
153 |             raise TypeError("Expected symbol, got %s" % symbol)
154 |         self[state][symbol] = rule
155 | 
156 |     def append_goto(self, state, symbol, destinationstate):
157 |         if symbol in self[state] and self[state][symbol] != destinationstate:
158 |             raise Exception
159 |         self[state][symbol] = destinationstate
160 | 
161 |     def goto(self, state, symbol):
162 |         return self[state][symbol]
163 | 
164 |     def insert(self, state, token):
165 |         """change internal state, return action"""
166 |         if token == EndSymbol():
167 |             return self[state][EndSymbol()]
168 |         from pydsl.check import check
169 |         symbol_list = [x for x in self[state] if isinstance(x, TerminalSymbol) and check(x.gd, [token])]
170 |         if not symbol_list:
171 |             return {"action":"Fail"}
172 |         if len(symbol_list) > 1:
173 |             raise Exception("Multiple symbols matches input")
174 |         symbol = symbol_list[0]
175 |         return self[state][symbol]
176 | 
177 | 
178 | 
179 | class LR0Item(object):
180 |     """LR0 table item"""
181 |     def __init__(self, rule, position = 0):
182 |         if not isinstance(rule, Production):
183 |             raise TypeError
184 |         if position > len(rule.rightside):
185 |             raise ValueError("Position is outside the rule")
186 |         self.rule = rule
187 |         self.position = position
188 | 
189 |     def __str__(self):
190 |         rscopy = [str(x) for x in self.rule.rightside]
191 |         rscopy.insert(self.position, ".")
192 |         return str([str(x) for x in self.rule.leftside]) + ": " + str(rscopy) 
193 | 
194 |     def __eq__(self, other):
195 |         if not isinstance(other, LR0Item):
196 |             return False
197 |         return self.position == other.position and self.rule == other.rule
198 | 
199 |     def previous_symbol(self):
200 |         """returns cursor's previous symbol"""
201 |         if self.position == 0:
202 |             return None
203 |         return self.rule.rightside[self.position-1]
204 | 
205 |     def next_symbol(self):
206 |         """returns the symbol located after cursor"""
207 |         try:
208 |             return self.rule.rightside[self.position]
209 |         except IndexError:
210 |             return None
211 | 
212 |     def is_last_position(self):
213 |         """Returns true if cursor if after last element"""
214 |         return self.position >= len(self.rule.rightside)
215 | 
216 | class LR0ItemSet(object):
217 |     """Stores LR0Items, and a dic with symbols and destination states"""
218 |     def __init__(self):
219 |         self.itemlist = []
220 |         self.transitions = {}
221 | 
222 |     def __str__(self):
223 |         result = "<LR0ItemSet: \n"
224 |         for item in self.itemlist:
225 |             result += str(item) + ","
226 |         if self.transitions:
227 |             result += "transitions:" + str([str(x) + str(y) for (x,y) in self.transitions.items()])
228 |         result += ">"
229 |         return result
230 | 
231 |     def __bool__(self):
232 |         return bool(self.itemlist)
233 | 
234 |     def __nonzero__(self):
235 |         return self.__bool__()
236 | 
237 |     def __eq__(self, anotherset):
238 |         """Tests on itemlist equality"""
239 |         if not isinstance(anotherset, LR0ItemSet):
240 |             raise TypeError
241 |         if len(self.itemlist) != len(anotherset.itemlist):
242 |             return False
243 |         for element in self.itemlist:
244 |             if element not in anotherset.itemlist:
245 |                 return False
246 |         return True
247 | 
248 |     def append_item(self, item):
249 |         """Append new item to set"""
250 |         if not isinstance(item, LR0Item):
251 |             raise TypeError
252 |         self.itemlist.append(item)
253 | 
254 |     def append_transition(self, symbol, targetset):
255 |         """Appends a transition"""
256 |         if symbol in self.transitions:
257 |             return
258 |         self.transitions[symbol] = targetset
259 | 
260 |     def has_transition(self, symbol):
261 |         return symbol in self.transitions
262 | 
263 |     def get_transition(self, symbol):
264 |         """gets a transition"""
265 |         return self.transitions[symbol]
266 | 
267 | class LR0Parser(BottomUpParser):
268 |     """LR0 bottomup parser. Not finished"""
269 |     def __init__(self, productionset):
270 |         #TODO: Build extended productionset before calling parent constructor
271 |         BottomUpParser.__init__(self, productionset)
272 |         #Add main item to itemsclosure with cursor at 0 position
273 |         self.__parsertable = _slr_build_parser_table(productionset)
274 |         #build GoTo and Action Table from ProductionRuleSet
275 | 
276 |     def get_trees(self, tokenlist):
277 |         return self.__parse(tokenlist)
278 | 
279 |     def __parse(self, tokenlist):
280 |         """see parent docstring"""
281 |         #empty stack
282 |         #iterate over symbollist
283 |         tokenlist = [x for x in tokenlist]
284 |         if not isinstance(tokenlist, list):
285 |             raise TypeError("Expected list, got %s" % tokenlist.__class__.__name__)
286 |         LOG.debug("get_trees: checking list: " + str(tokenlist))
287 |         stack = [(0, Extended_S)]
288 |         while True:
289 |             state = stack[-1][0]
290 |             if len(tokenlist):#FIXME: tokenlist with one element is reported as false
291 |                 token = tokenlist[0]
292 |             else:
293 |                 token = EndSymbol()
294 |             newdic = self.__parsertable.insert(state, token)
295 |             action = newdic["action"]
296 |             if action == "Fail":
297 |                 return False
298 |             elif action == "Accept":
299 |                 return True
300 |             if action == "Reduce":
301 |                 reductionrule = newdic["rule"]
302 |                 #TODO extract len(right side) of the rule and insert left side
303 |                 for rsymbol in reversed(reductionrule.rightside):
304 |                     state, symbol = stack.pop() # TODO: check
305 |                 state = stack[-1][0]
306 |                 state = self.__parsertable.goto(state,reductionrule.leftside[0])
307 |                 stack.append((state, reductionrule.leftside[0]))
308 |             elif action == "Shift":
309 |                 stack.append((newdic['dest'], tokenlist.pop(0)))
310 |             else:
311 |                 raise ValueError("Unknown action")
312 |         return False
313 | 
314 | 


--------------------------------------------------------------------------------
/pydsl/parser/PEG.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #!/usr/bin/python
 3 | # -*- coding: utf-8 -*-
 4 | #This file is part of pydsl.
 5 | #
 6 | #pydsl is free software: you can redistribute it and/or modify
 7 | #it under the terms of the GNU General Public License as published by
 8 | #the Free Software Foundation, either version 3 of the License, or
 9 | #(at your option) any later version.
10 | #
11 | #pydsl is distributed in the hope that it will be useful,
12 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | #GNU General Public License for more details.
15 | #
16 | #You should have received a copy of the GNU General Public License
17 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
18 | 
19 | from pydsl.parser.parser import Parser
20 | 
21 | class PEGParser(Parser):
22 |     def __init__(self, gd):
23 |         self.gd = gd
24 | 
25 |     def get_trees(self, data):
26 |         pass
27 | 


--------------------------------------------------------------------------------
/pydsl/parser/README.md:
--------------------------------------------------------------------------------
1 | #summary descent parser
2 | 
3 | A RecursiveDescentParser is a top-down parser built from a set of mutually-recursive procedures (or a non-recursive equivalent) where each such procedure usually implements one of the production rules of the grammar. Thus the structure of the resulting program closely mirrors that of the grammar it recognizes.
4 | 
5 | A PredictiveDescentParser is a recursive descent parser that does not require backtracking. Predictive parsing is possible only for the class of LL(k) grammars, which are the context-free grammars for which there exists some positive integer k that allows a recursive descent parser to decide which production to use by examining only the next k tokens of input. (The LL(k) grammars therefore exclude all ambiguous grammars, as well as all grammars that contain left recursion. Any context-free grammar can be transformed into an equivalent grammar that has no left recursion, but removal of left recursion does not always yield an LL(k) grammar.) A predictive parser runs in linear time.
6 | 


--------------------------------------------------------------------------------
/pydsl/parser/__init__.py:
--------------------------------------------------------------------------------
1 | from .parser import parse
2 | 


--------------------------------------------------------------------------------
/pydsl/parser/backtracing.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | #This file is part of pydsl.
  4 | #
  5 | #pydsl is free software: you can redistribute it and/or modify
  6 | #it under the terms of the GNU General Public License as published by
  7 | #the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | #pydsl is distributed in the hope that it will be useful,
 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #GNU General Public License for more details.
 14 | #
 15 | #You should have received a copy of the GNU General Public License
 16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | """Recursive descent parser"""
 19 | 
 20 | __author__ = "Nestor Arocha"
 21 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
 22 | __email__ = "nesaro@gmail.com"
 23 | 
 24 | import logging
 25 | LOG = logging.getLogger(__name__)
 26 | from .parser import TopDownParser
 27 | from pydsl.tree import ParseTree, PositionResultList
 28 | from pydsl.check import check
 29 | 
 30 | 
 31 | class BacktracingErrorRecursiveDescentParser(TopDownParser):
 32 |     """Recursive descent parser implementation. Backtracing. Null support. Error support"""
 33 |     def get_trees(self, data, showerrors = False): # -> list:
 34 |         """ returns a list of trees with valid guesses """
 35 |         if not all(check(self._productionset.alphabet, [x]) for x in data):
 36 |             raise ValueError("Unknown element in {}, alphabet:{}".format(str(data), self.productionset.alphabet))
 37 |         result = self.__recursive_parser(self._productionset.initialsymbol, data, self._productionset.main_production, showerrors)
 38 |         finalresult = []
 39 |         for eresult in result:
 40 |             if eresult.left == 0 and eresult.right == len(data) and eresult not in finalresult:
 41 |                 finalresult.append(eresult)        
 42 |         return finalresult
 43 | 
 44 |     def __recursive_parser(self, onlysymbol, data, production, showerrors = False):
 45 |         """ Aux function. helps check_word"""
 46 |         LOG.debug("__recursive_parser: Begin ")
 47 |         if not data:
 48 |             return []
 49 |         from pydsl.grammar.symbol import TerminalSymbol, NullSymbol, NonTerminalSymbol
 50 |         if isinstance(onlysymbol, TerminalSymbol):
 51 |             LOG.debug("Iteration: terminalsymbol")
 52 |             return self._reduce_terminal(onlysymbol,data[0], showerrors)
 53 |         elif isinstance(onlysymbol, NullSymbol):
 54 |             return [ParseTree(0, 0, onlysymbol, "")]
 55 |         elif isinstance(onlysymbol, NonTerminalSymbol):
 56 |             validstack = []
 57 |             invalidstack = []
 58 |             for alternative in self._productionset.getProductionsBySide(onlysymbol): #Alternative
 59 |                 alternativetree = PositionResultList()
 60 |                 alternativeinvalidstack = []
 61 |                 for symbol in alternative.rightside: # Symbol
 62 |                     symbol_success = False
 63 |                     for totalpos in alternativetree.right_limit_list(): # Right limit
 64 |                         if totalpos >= len(data):
 65 |                             continue
 66 |                         thisresult =  self.__recursive_parser(symbol, data[totalpos:], alternative, showerrors)
 67 |                         if not (thisresult and all(thisresult)):
 68 |                             alternativeinvalidstack += [x for x in thisresult if not x]
 69 |                             continue
 70 |                         symbol_success = True
 71 |                         for x in thisresult:
 72 |                             x.shift(totalpos)
 73 |                             success = alternativetree.append(x.left, x.right, x)
 74 |                             if not success:
 75 |                                 #TODO: Add as an error to the tree or to another place
 76 |                                 LOG.debug("Discarded symbol :" + str(symbol) + " position:" + str(totalpos))
 77 |                             else:
 78 |                                 LOG.debug("Added symbol :" + str(symbol) + " position:" + str(totalpos))
 79 |                     if not symbol_success:
 80 |                         LOG.debug("Symbol doesn't work" + str(symbol))
 81 |                         break #Try next alternative
 82 |                 else: # Alternative success (no break happened)
 83 |                     invalidstack += alternativeinvalidstack
 84 |                 for x in alternativetree.valid_sequences():
 85 |                     validstack.append(x)
 86 |             result = []
 87 | 
 88 |             LOG.debug("iteration result collection finished:" + str(validstack))
 89 |             for alternative in self._productionset.getProductionsBySide(onlysymbol):
 90 |                 nullcount = alternative.rightside.count(NullSymbol())
 91 |                 for results in validstack:
 92 |                     nnullresults = 0
 93 |                     left = results[0]['left']
 94 |                     right = results[-1]['right']
 95 |                     nnullresults = len([x for x in results if x['content'].symbol == NullSymbol()])
 96 |                     if len(results) - nnullresults != len(alternative.rightside) - nullcount:
 97 |                         LOG.debug("Discarded: incorrect number of non null symbols")
 98 |                         continue
 99 |                     if right > len(data):
100 |                         LOG.debug("Discarded: length mismatch")
101 |                         continue
102 |                     for x in range(min(len(alternative.rightside), len(results))):
103 |                         if results[x]['content'] != alternative.rightside[x]:
104 |                             LOG.debug("Discarded: rule doesn't match partial result")
105 |                             continue
106 |                     childlist = [x['content'] for x in results]
107 |                     allvalid = all([x.valid for x in childlist])
108 |                     if allvalid:
109 |                         newresult = ParseTree(0, right - left, onlysymbol,
110 |                                 data[left:right], childlist = childlist)
111 |                         newresult.valid = True
112 |                         result.append(newresult)
113 |             if showerrors and not result:
114 |                 erroresult = ParseTree(0,len(data), onlysymbol , data, valid = False)
115 |                 for invalid in invalidstack:
116 |                     if invalid.content in production.rightside:
117 |                         erroresult.append(invalid)
118 |                 return [erroresult]
119 |             return result
120 |         raise Exception("Unknown symbol:" + str(onlysymbol))
121 | 


--------------------------------------------------------------------------------
/pydsl/parser/parser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | """Parser module"""
19 | from pydsl.lex import lexer_factory
20 | 
21 | __author__ = "Nestor Arocha"
22 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
23 | __email__ = "nesaro@gmail.com"
24 | 
25 | from pydsl.encoding import ascii_encoding
26 | import logging
27 | LOG = logging.getLogger(__name__)
28 | 
29 | 
30 | class Parser(object):
31 |     """Expands an input based on grammar rules
32 |     At this time, all parsers are tree based"""
33 |     def __init__(self, productionset):
34 |         self._productionset = productionset
35 |     def get_trees(self, word): # -> list:
36 |         """ returns a ParseTree list with all guesses """
37 |         raise NotImplementedError
38 | 
39 |     def __call__(self, word):
40 |         return self.get_trees(word)
41 | 
42 |     @property
43 |     def productionset(self):
44 |         """returns productionset"""
45 |         return self._productionset
46 | 
47 | class TopDownParser(Parser):
48 |     """Top down parser like descent parser"""
49 |     def _reduce_terminal(self, symbol, data, showerrors = False):
50 |         from pydsl.check import check
51 |         from pydsl.tree import ParseTree
52 |         result = check(symbol.gd, [data])
53 |         if result:
54 |             return [ParseTree(0,1, symbol , data)]
55 |         if showerrors and not result:
56 |             return [ParseTree(0,1, symbol , data, valid = False)]
57 |         return []
58 | 
59 | class BottomUpParser(Parser):
60 |     """ leaf to root parser"""
61 |     def __init__(self, bnfgrammar):
62 |         self._lexer = lexer_factory(bnfgrammar.alphabet, ascii_encoding)
63 |         super().__init__(bnfgrammar)
64 | 
65 | 
66 | def parser_factory(grammar, parser = None):
67 |     from pydsl.grammar.BNF import BNFGrammar
68 |     if isinstance(grammar, BNFGrammar):
69 |         if parser in ("auto" , "default" , "descent", None):
70 |             from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser
71 |             return BacktracingErrorRecursiveDescentParser(grammar)
72 |         elif parser == "lr0":
73 |             from pydsl.parser.LR0 import LR0Parser
74 |             return LR0Parser(grammar)
75 |         elif parser == "ll1":
76 |             from pydsl.parser.LL import LL1RecursiveDescentParser
77 |             return LL1RecursiveDescentParser(grammar)
78 |         else:
79 |             raise Exception("Wrong parser name: " + str(parser))
80 |     else:
81 |         raise ValueError(grammar)
82 | 
83 | 
84 | def parse(definition, data, parser = "auto"):
85 |     return parser_factory(definition, parser)(data)
86 | 


--------------------------------------------------------------------------------
/pydsl/token.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | # This file is part of pydsl.
 4 | #
 5 | # pydsl is free software: you can redistribute it and/or modify
 6 | # it under the terms of the GNU General Public License as published by
 7 | # the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | # pydsl is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | """Token classes"""
19 | 
20 | __author__ = "Nestor Arocha"
21 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
22 | __email__ = "nesaro@gmail.com"
23 | 
24 | class Token:
25 |     def __init__(self, content, gd):
26 |         if not gd:
27 |             raise ValueError
28 |         if isinstance(content, str):
29 |             content = [x for x in content]
30 |         elif isinstance(content[0], Token):
31 |             content = [str(x) for x in content]
32 |         self.content = content
33 |         self.gd = gd
34 | 
35 |     def __eq__(self, other):
36 |         try:
37 |             return self.content == other.content and \
38 |                    self.gd == other.gd
39 |         except AttributeError:
40 |             return False
41 | 
42 |     def __str__(self):
43 |         return "".join(str(x) for x in self.content)
44 | 
45 | class PositionToken(Token):
46 |     def __init__(self, content, gd, left=None, right=None):
47 |         super().__init__(content, gd)
48 |         self.left = left
49 |         self.right = right
50 | 
51 |     def __eq__(self, other):
52 |         return self.content == other.content and \
53 |                self.gd == other.gd and \
54 |                self.left == other.left and \
55 |                self.right == other.right
56 |                
57 | 
58 |     def __str__(self):
59 |         return "".join(str(x) for x in self.content)
60 | 
61 | 
62 | def append_position_to_token_list(token_list):
63 |     """Converts a list of Token into a list of Token, asuming size == 1"""
64 |     return [PositionToken(value.content, value.gd, index, index+1) for (index, value) in enumerate(token_list)]
65 | 
66 | 
67 | def tokenize_string(string):
68 |     from .encoding import ascii_encoding
69 |     return [Token(x, ascii_encoding) for x in string]
70 | 
71 | 


--------------------------------------------------------------------------------
/pydsl/translator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | """Python Translators"""
19 | 
20 | __author__ = "Nestor Arocha"
21 | __copyright__ = "Copyright 2008-2015, Nestor Arocha"
22 | __email__ = "nesaro@gmail.com"
23 | 
24 | import logging
25 | LOG = logging.getLogger(__name__)
26 | 
27 | class PythonTranslator(object):
28 |     """ Python function based translator """
29 |     def __init__(self, function):
30 |         self._function = function
31 | 
32 |     def __call__(self, *args, **kwargs):
33 |         return self._function(*args, **kwargs)
34 | 
35 | class PLYTranslator(object):
36 |     def __init__(self, grammardefinition):
37 |         self.module = grammardefinition.module
38 | 
39 |     def __call__(self, input):
40 |         from ply import yacc, lex
41 |         lexer = lex.lex(self.module)
42 |         parser = yacc.yacc(debug=0, module = self.module)
43 |         return parser.parse(input, lexer = lexer)
44 | 
45 | class PyParsingTranslator(object):
46 |     def __init__(self, root_symbol):
47 |         self.root_symbol = root_symbol
48 | 
49 |     def __call__(self, input):
50 |         return self.root_symbol.parseString(input)
51 | 
52 | class ParsleyTranslator(object):
53 |     def __init__(self, grammar):
54 |         self.gd=grammar
55 | 
56 |     def __call__(self, input):
57 |         return getattr(self.gd.grammar(input), self.gd.root_rule)() #call grammar(data).root_rule()
58 | 
59 | 
60 | def translator_factory(function):
61 |     from pydsl.grammar.definition import PLYGrammar
62 |     from pydsl.grammar.parsley import ParsleyGrammar
63 |     if isinstance(function, PLYGrammar):
64 |         return PLYTranslator(function)
65 |     if isinstance(function, ParsleyGrammar):
66 |         return ParsleyTranslator(function)
67 |     if isinstance(function, dict):
68 |         return PythonTranslator(**function)
69 |     from pyparsing import OneOrMore
70 |     if isinstance(function, OneOrMore):
71 |         return PyParsingTranslator(function)
72 |     if isinstance(function, PythonTranslator):
73 |         return function
74 |     raise ValueError(function)
75 | 
76 | def translate(definition, data):
77 |     return translator_factory(definition)(**data)
78 | 


--------------------------------------------------------------------------------
/pydsl/tree.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | # This file is part of pydsl.
  4 | #
  5 | # pydsl is free software: you can redistribute it and/or modify
  6 | # it under the terms of the GNU General Public License as published by
  7 | # the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | # pydsl is distributed in the hope that it will be useful,
 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | # GNU General Public License for more details.
 14 | #
 15 | # You should have received a copy of the GNU General Public License
 16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | """Tree class for tree based parsers"""
 19 | 
 20 | __author__ = "Nestor Arocha"
 21 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
 22 | __email__ = "nesaro@gmail.com"
 23 | 
 24 | import logging
 25 | LOG = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | class ParseTree(object):
 29 | 
 30 |     """Stores the position of the original tree"""
 31 | 
 32 |     def __init__(self, left, right, symbol, content, childlist=None, valid=True):
 33 |         self.symbol = symbol
 34 |         if not isinstance(left, int) and left is not None:
 35 |             raise TypeError
 36 |         if not isinstance(right, int) and right is not None:
 37 |             raise TypeError
 38 |         self.childlist = childlist or []
 39 |         self.left = left
 40 |         self.right = right
 41 |         self.content = content
 42 |         self.valid = valid
 43 | 
 44 |     def __eq__(self, other):
 45 |         try:
 46 |             return self.left == other.left and self.right == other.right and self.valid == other.valid and self.content == other.content 
 47 |         except AttributeError:
 48 |             return False
 49 | 
 50 |     def __bool__(self):
 51 |         """checks if it is a null result"""
 52 |         return self.valid
 53 | 
 54 |     def __nonzero__(self):
 55 |         return self.__bool__()
 56 | 
 57 |     def shift(self, amount):
 58 |         """ shifts position """
 59 |         if self.left is not None:
 60 |             self.left += amount
 61 |         if self.left is not None:
 62 |             self.right += amount
 63 | 
 64 |     def __len__(self):
 65 |         if self.right is None and self.left is None:
 66 |             return 0
 67 |         return self.right - self.left
 68 | 
 69 |     def append(self, dpr):
 70 |         """appends dpr to childlist"""
 71 |         self.childlist.append(dpr)
 72 | 
 73 | 
 74 | class PositionResultList(object):
 75 |     """Contains a list of results"""
 76 |     def __init__(self):
 77 |         self.possible_items = []
 78 | 
 79 |     @property
 80 |     def current_right(self):
 81 |         if not self.possible_items:
 82 |             return set([0])
 83 |         return set(x['right'] for x in self.possible_items)
 84 | 
 85 |     def append(self, left, right, content, gd = None, check_position=True):
 86 |         if left > right:
 87 |             raise ValueError('Attempted to add negative length alement')
 88 |         if check_position and left and left not in self.current_right:
 89 |             raise ValueError("Unable to add element")
 90 |         result = {'left':left, 'right':right, 'content':content}
 91 |         if gd:
 92 |             result['gd'] = gd
 93 |         self.possible_items.append(result)
 94 | 
 95 |     def valid_sequences(self):
 96 |         """Returns list"""
 97 |         valid_sets = [[x] for x in self.possible_items if x['left'] == 0]
 98 |         change = True
 99 |         niter = 200
100 |         while change and niter > 0:
101 |             change = False
102 |             niter -=1
103 |             for possible in sorted(self.possible_items, key=lambda x:x['left']):
104 |                 for current_valid in valid_sets[:]:
105 |                     if possible['left'] == current_valid[-1]['right']:
106 |                         if current_valid + [possible] not in valid_sets:
107 |                             if current_valid[-1]['left'] != current_valid[-1]['right'] or possible['left'] != possible['right']: #avoids Null insertion twice
108 |                                 valid_sets.append(current_valid + [possible])
109 |                                 change = True
110 |         if not niter:
111 |             raise Exception('too many iterations')
112 |         return valid_sets
113 | 
114 |     def right_limit_list(self):
115 |         if not self.possible_items:
116 |             return [0]
117 |         return list(set([x[-1]['right'] for x in self.valid_sequences()]))
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jsonschema
2 | ply
3 | pyparsing
4 | networkx==1.11
5 | git+https://github.com/python-parsley/parsley.git
6 | pylint
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | setup(name='pydsl',
 3 |       version='0.5.4',
 4 |       description='Python Domain Specific Language Tools',
 5 |       author='Nestor Arocha',
 6 |       author_email='n@nestorarocha.com',
 7 |       url='https://github.com/nesaro/pydsl',
 8 |       packages = find_packages(exclude=['tests.*']),
 9 |       install_requires=['ply', 'jsonschema'],
10 |       package_dir={'pydsl.contrib': 'pydsl/contrib'},
11 |       package_data={'pydsl.contrib': ['grammar/*.re','grammar/*.bnf','grammar/*.parsley','dict/*.dict']},
12 |       classifiers=[
13 |           'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
14 |           'Programming Language :: Python :: 3',
15 |           ],
16 |      )
17 | 
18 | 


--------------------------------------------------------------------------------
/tests/FOL.g:
--------------------------------------------------------------------------------
 1 | grammar FOL;
 2 | 
 3 | options{
 4 | 	language=Java;
 5 | 	output=AST;
 6 | 	ASTLabelType = CommonTree;
 7 | }
 8 | 
 9 | tokens{
10 | 	PREDICATE;
11 | 	FUNCTION;
12 | }
13 | 
14 | /*------------------------------------------------------------------
15 |  * PARSER RULES
16 |  *------------------------------------------------------------------*/
17 | 
18 | condition: formula EOF! ;
19 | 
20 | formula	
21 | 	:	((FORALL^ | EXISTS^) VARIABLE)? disjunction ;
22 | 
23 | disjunction
24 | 	:	conjunction (OR^ conjunction)* ;
25 | 
26 | conjunction
27 | 	:	negation (AND^ negation)* ;
28 | 
29 | negation 
30 | 	:	NOT^? (predicate | LPAREN! formula RPAREN!) ;
31 | 
32 | predicate 
33 | 	:	PREPOSITION predicateTuple -> ^(PREDICATE PREPOSITION predicateTuple)
34 | 	| 	PREPOSITION ;
35 | 
36 | predicateTuple
37 | 	:	LPAREN! term (','! term)* RPAREN! ;
38 | 
39 | term	:	function | VARIABLE ;
40 | 
41 | function:	CONSTANT functionTuple -> ^(FUNCTION CONSTANT functionTuple)
42 | 	|	CONSTANT;
43 | 
44 | functionTuple
45 | 	:	LPAREN! (CONSTANT | VARIABLE) (','! (CONSTANT | VARIABLE) )* RPAREN!;
46 | 
47 | /*------------------------------------------------------------------
48 |  * LEXER RULES
49 |  *------------------------------------------------------------------*/
50 | 
51 | LPAREN : '(' ;
52 | RPAREN :  ')' ;
53 | AND :  '&' ;
54 | OR :  '|' ;
55 | NOT :  '!' ;
56 | FORALL :  'Forall' ;
57 | EXISTS :  'Exists' ;
58 | 
59 | VARIABLE: '?' (('a'..'z') | ('0'..'9')) CHARACTER* ;
60 | 
61 | CONSTANT: (('a'..'z') | ('0'..'9')) CHARACTER* ;
62 | 
63 | PREPOSITION: ('A'..'Z') CHARACTER* ;
64 | 
65 | fragment CHARACTER: ('0'..'9' | 'a'..'z' | 'A'..'Z' | '_') ;
66 | 
67 | WS : (' ' | '\t' | '\r' | '\n')+ {$channel = HIDDEN ;} ;
68 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/tests/__init__.py


--------------------------------------------------------------------------------
/tests/functional/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/tests/functional/__init__.py


--------------------------------------------------------------------------------
/tests/functional/test_Binary.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | __author__ = "Nestor Arocha"
19 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
20 | __email__ = "nesaro@gmail.com"
21 | 
22 | import unittest
23 | from pydsl.grammar import String
24 | from pydsl.grammar.parsley import ParsleyGrammar
25 | from pydsl.grammar.PEG import OneOrMore, Choice
26 | from pydsl.translator import ParsleyTranslator
27 | 
28 | 
29 | class TestBinaryAlphabet(unittest.TestCase):
30 |     def test_binaryAlphabet(self):
31 |         binary_alphabet = Choice([String('0'), String('1')])
32 |         binary_number = OneOrMore(binary_alphabet)
33 |         parsley_grammar = ParsleyGrammar("""digit = anything:x ?(x in '01')
34 | number = <digit+>:ds -> int(ds)
35 | expr = number:left ( '+' number:right -> left + right 
36 |                    | -> left)""", "expr")
37 |         binary_addition = ParsleyTranslator(parsley_grammar)
38 |         self.assertEqual(binary_addition('01+10'), 11)
39 | 
40 | 


--------------------------------------------------------------------------------
/tests/functional/test_Case.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | #This file is part of pydsl.
  4 | #
  5 | #pydsl is free software: you can redistribute it and/or modify
  6 | #it under the terms of the GNU General Public License as published by
  7 | #the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | #pydsl is distributed in the hope that it will be useful,
 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #GNU General Public License for more details.
 14 | #
 15 | #You should have received a copy of the GNU General Public License
 16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | __author__ = "Nestor Arocha"
 19 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
 20 | __email__ = "nesaro@gmail.com"
 21 | 
 22 | import unittest
 23 | from pydsl.encoding import ascii_encoding
 24 | from pydsl.lex import lexer_factory
 25 | from pydsl.parser.LL import LL1RecursiveDescentParser
 26 | 
 27 | class TestCase(unittest.TestCase):
 28 |     def test_main_case(self):
 29 |         input_data = "1+2"
 30 |         ascii_lexer = lexer_factory(ascii_encoding, None)
 31 |         ascii_tokens = [x for x in ascii_lexer(input_data)]
 32 |         self.assertListEqual([str(x) for x in ascii_tokens], ['1', '+', '2'])
 33 | 
 34 |         def concept_translator_fun(inputtokens):
 35 |             result = []
 36 |             for x in inputtokens:
 37 |                 if str(x) == "1":
 38 |                     result.append("one")
 39 |                 elif str(x) == "2":
 40 |                     result.append("two")
 41 |                 elif str(x) == "+":
 42 |                     result.append("addition")
 43 |                 else:
 44 |                     raise Exception(x.__class__.__name__)
 45 | 
 46 |             return result
 47 |         def to_number(number):
 48 |             if number == "one":
 49 |                 return 1
 50 |             if number == "two":
 51 |                 return 2
 52 |  
 53 |         math_expression_concepts = concept_translator_fun(ascii_tokens)
 54 |         self.assertListEqual(math_expression_concepts, ['one', 'addition', 'two'])
 55 |         grammar_def = [
 56 |                 "S ::= E",
 57 |                 "E ::= one addition two",
 58 |                 "one := String,one",
 59 |                 "two := String,two",
 60 |                 "addition := String,addition",
 61 |                 ]
 62 |         from pydsl.file.BNF import strlist_to_production_set
 63 |         production_set = strlist_to_production_set(grammar_def, {})
 64 |         from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser
 65 |         rdp = BacktracingErrorRecursiveDescentParser(production_set)
 66 |         parse_tree = rdp(math_expression_concepts)
 67 |         from pydsl.grammar.symbol import NonTerminalSymbol
 68 |         def parse_tree_walker(tree):
 69 |             if tree.symbol == NonTerminalSymbol("S"):
 70 |                 return parse_tree_walker(tree.childlist[0])
 71 |             if tree.symbol == NonTerminalSymbol("E"):
 72 |                 return to_number(tree.childlist[0].symbol.gd) + to_number(tree.childlist[2].symbol.gd)
 73 |             raise Exception
 74 |             
 75 |         result = parse_tree_walker(parse_tree[0])
 76 |         self.assertEqual(result, 3)
 77 | 
 78 | 
 79 |     def test_calculator_simple(self):
 80 |         grammar_def = [
 81 |                 "S ::= E",
 82 |                 "E ::= number operator number",
 83 |                 "number := Word,integer,max",
 84 |                 "operator := String,+",
 85 |                 ]
 86 |         from pydsl.file.BNF import strlist_to_production_set
 87 |         from pydsl.grammar import RegularExpression
 88 |         repository = {'integer':RegularExpression("^[0123456789]*$")}
 89 |         production_set = strlist_to_production_set(grammar_def, repository)
 90 |         rdp = LL1RecursiveDescentParser(production_set)
 91 |         parse_tree = rdp("1+2")
 92 | 
 93 |         def parse_tree_walker(tree):
 94 |             from pydsl.grammar.symbol import NonTerminalSymbol
 95 |             if tree.symbol == NonTerminalSymbol("S"):
 96 |                 return parse_tree_walker(tree.childlist[0])
 97 |             if tree.symbol == NonTerminalSymbol("E"):
 98 |                 return int(str(tree.childlist[0].content)) + int(str(tree.childlist[2].content))
 99 |             else:
100 |                 raise Exception
101 |             
102 |         result = parse_tree_walker(parse_tree[0])
103 |         self.assertEqual(result, 3)
104 |         from pydsl.grammar.PEG import Choice
105 |         from pydsl.grammar.definition import String, RegularExpression
106 |         from pydsl.encoding import ascii_encoding
107 |         math_alphabet = Choice([RegularExpression("^[0123456789]*$"),Choice([String('+')])])
108 |         from pydsl.lex import lex
109 |         tokens = [x for x in lex(math_alphabet, ascii_encoding, "11+2")]
110 |         parse_tree = rdp(tokens)
111 |         result = parse_tree_walker(parse_tree[0])
112 |         self.assertEqual(result, 13)
113 | 
114 | 


--------------------------------------------------------------------------------
/tests/functional/test_LogicGrammars.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | __author__ = "Nestor Arocha"
 5 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
 6 | __email__ = "nesaro@gmail.com"
 7 | 
 8 | import unittest
 9 | from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser
10 | from pydsl.file.BNF import load_bnf_file
11 | from pydsl.lex import lex
12 | from pydsl.grammar import RegularExpression, String
13 | from pydsl.encoding import ascii_encoding
14 | from pydsl.check import checker_factory
15 | from pydsl.token import Token
16 | 
17 | 
18 | class TestLogicGrammars(unittest.TestCase):
19 |     def setUp(self):
20 |         self.tokelist5 = "True"
21 | 
22 |     def testLogicalExp(self):
23 |         repository = {'TrueFalse':load_bnf_file("pydsl/contrib/grammar/TrueFalse.bnf")}
24 |         productionrulesetlogical = load_bnf_file("pydsl/contrib/grammar/LogicalExpression.bnf", repository)
25 |         parser = BacktracingErrorRecursiveDescentParser(productionrulesetlogical)
26 |         tokens = [x for x in lex(repository['TrueFalse'].alphabet, ascii_encoding, self.tokelist5)]
27 |         self.assertEqual(len(tokens), 1)
28 |         #tokens = [x[0] for x in lex(productionrulesetlogical.alphabet, Encoding('ascii'), tokens)] #FIXME
29 |         tokens = [Token('True', repository['TrueFalse'])]
30 |         result = parser.get_trees(tokens)
31 |         self.assertTrue(result)
32 | 
33 |     def testTrueFalse(self):
34 |         productionrulesetlogical = load_bnf_file("pydsl/contrib/grammar/TrueFalse.bnf")
35 |         parser = BacktracingErrorRecursiveDescentParser(productionrulesetlogical)
36 |         tokens = [x for x in lex(productionrulesetlogical.alphabet, ascii_encoding, self.tokelist5)]
37 |         result = parser.get_trees(tokens)
38 |         self.assertTrue(result)
39 | 
40 |     @unittest.skip('overlapping input')
41 |     def testLogicalExpression(self):
42 |         repository = {'TrueFalse':load_bnf_file("pydsl/contrib/grammar/TrueFalse.bnf")}
43 |         productionrulesetlogical = load_bnf_file("pydsl/contrib/grammar/LogicalExpression.bnf", repository)
44 |         parser = BacktracingErrorRecursiveDescentParser(productionrulesetlogical)
45 |         tokens = [x[0] for x in lex(productionrulesetlogical.alphabet, ascii_encoding, "True&&False")]
46 |         result = parser.get_trees(tokens)
47 |         self.assertTrue(result)
48 |         result = parser.get_trees("True&|False")
49 |         self.assertFalse(result)
50 | 
51 | 
52 | 
53 | class TestHTMLGrammars(unittest.TestCase):
54 |     def testHTMLTable(self):
55 |         repository = {'integer':RegularExpression("^[0123456789]*$")}
56 |         productionrulesetlogical = load_bnf_file("pydsl/contrib/grammar/TrueHTMLTable.bnf", repository)
57 |         parser = BacktracingErrorRecursiveDescentParser(productionrulesetlogical)
58 |         lexed = lex(productionrulesetlogical.alphabet, ascii_encoding, "<table><tr><td>1</td></tr></table>")
59 |         self.assertTrue(lexed)
60 |         result = parser.get_trees(lexed)
61 |         self.assertTrue(result)
62 |         lexed = [x for x in lex(productionrulesetlogical.alphabet, ascii_encoding, "<table><td>1</td></tr></table>")]
63 |         result = parser.get_trees(lexed)
64 |         self.assertFalse(result)
65 | 
66 | 
67 | class TestLogGrammar(unittest.TestCase):
68 |     def testLogLine(self):
69 |         repository = {'space':String(' '), 
70 |                       'integer':RegularExpression("^[0123456789]*$"),
71 |                       'ipv4':RegularExpression("^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"),
72 |                       'characters':RegularExpression("^[A-z]+$")}
73 |         grammar = load_bnf_file("pydsl/contrib/grammar/logline.bnf", repository)
74 |         checker = checker_factory(grammar)
75 |         original_string = "1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GET\" 1 1 \"referer\" \"useragent\""
76 |         tokenized = lex(grammar.alphabet, ascii_encoding, original_string, force_lexer="general")
77 |         self.assertTrue(checker.check(tokenized))
78 |         self.assertFalse(checker.check("1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GOT\" 1 1 \"referer\" \"useragent\""))
79 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/test_Alphabet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | __author__ = "Nestor Arocha"
19 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
20 | __email__ = "nesaro@gmail.com"
21 | 
22 | import unittest
23 | from pydsl.check import checker_factory
24 | from pydsl.lex import lexer_factory
25 | from pydsl.grammar import String, RegularExpression
26 | from pydsl.grammar.PEG import Sequence, Choice
27 | from pydsl.encoding import ascii_encoding
28 | from pydsl.file.BNF import load_bnf_file
29 | from pydsl.file.python import load_python_file
30 | from pydsl.token import Token
31 | import sys
32 | 
33 | 
34 | class TestAlphabet(unittest.TestCase):
35 |     def setUp(self):
36 |         self.integer = RegularExpression("^[0123456789]*$")
37 |         self.date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':self.integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')})
38 | 
39 |     def testChecker(self):
40 |         alphabet = Choice([self.integer,self.date])
41 |         checker = checker_factory(alphabet)
42 |         self.assertTrue(checker.check("1234"))
43 |         self.assertTrue(checker.check([x for x in "1234"]))
44 |         self.assertFalse(checker.check("11/11/1991")) #Non tokenized input
45 |         self.assertFalse(checker.check([x for x in "11/11/1991"])) #Non tokenized input
46 |         self.assertTrue(checker.check([Token(x, ascii_encoding) for x in ["11","/","11","/","1991"]]))
47 |         self.assertFalse(checker.check("bcdf"))
48 |         self.assertFalse(checker.check([x for x in "bcdf"]))
49 | 
50 |     def testEncoding(self):
51 |         alphabet = ascii_encoding
52 |         self.assertEqual(len(alphabet), 128)
53 | 


--------------------------------------------------------------------------------
/tests/unit/test_BNF.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | import unittest
19 | from pydsl.grammar.definition import String
20 | 
21 | 
22 | class TestBNF(unittest.TestCase):
23 |     def setUp(self):
24 |         from pydsl.contrib.bnfgrammar import productionset0
25 |         self.grammardef = productionset0
26 | 
27 |     @unittest.skip("Not implemented")
28 |     def testEnumerate(self):
29 |         self.assertListEqual([x for x in self.grammardef.enum()], ["SR"])
30 | 
31 |     def testFirst(self):
32 |         self.assertSetEqual(self.grammardef.first, set([String("S")]))
33 | 
34 |     @unittest.skip("Not implemented")
35 |     def testMin(self):
36 |         self.assertEqual(self.grammardef.minsize,2)
37 | 
38 |     @unittest.skip("Not implemented")
39 |     def testMax(self):
40 |         self.assertEqual(self.grammardef.maxsize,2)
41 | 
42 |     def testFirstLookup(self):
43 |         from pydsl.grammar.symbol import NonTerminalSymbol, TerminalSymbol
44 |         from pydsl.grammar.PEG import Choice
45 |         self.assertEqual(self.grammardef.first_lookup(NonTerminalSymbol("exp")),Choice([String("S")]))
46 | 
47 |     def testNextLookup(self):
48 |         from pydsl.grammar.symbol import NonTerminalSymbol, EndSymbol
49 |         self.grammardef.next_lookup(NonTerminalSymbol("exp"))[0]
50 |         self.assertListEqual(self.grammardef.next_lookup(NonTerminalSymbol("exp")),[EndSymbol()])
51 | 
52 |     def testAlphabet(self):
53 |         self.assertEqual(self.grammardef.alphabet, set([String(x) for x in ["S","R"]]))
54 | 


--------------------------------------------------------------------------------
/tests/unit/test_BNFLoad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | #Copyright (C) 2008-2013 Nestor Arocha
 5 | 
 6 | """Test BNF file loading"""
 7 | 
 8 | import unittest
 9 | from pydsl.file.BNF import load_bnf_file
10 | from pydsl.file.python import load_python_file
11 | from pydsl.grammar.definition import RegularExpression
12 | 
13 | class TestFileLoader(unittest.TestCase):
14 |     """Loading a bnf instance from a .bnf file"""
15 |     def testFileLoader(self):
16 |         repository = {'integer':RegularExpression("^[0123456789]*$"), 
17 |                 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')}
18 |         self.assertTrue(load_bnf_file("pydsl/contrib/grammar/Date.bnf", repository))
19 | 


--------------------------------------------------------------------------------
/tests/unit/test_Checker.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #This file is part of pydsl.
  3 | #
  4 | #pydsl is free software: you can redistribute it and/or modify
  5 | #it under the terms of the GNU General Public License as published by
  6 | #the Free Software Foundation, either version 3 of the License, or
  7 | #(at your option) any later version.
  8 | #
  9 | #pydsl is distributed in the hope that it will be useful,
 10 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | #GNU General Public License for more details.
 13 | #
 14 | #You should have received a copy of the GNU General Public License
 15 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | __author__ = "Nestor Arocha"
 18 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
 19 | __email__ = "nesaro@gmail.com"
 20 | 
 21 | import unittest
 22 | from pydsl.check import checker_factory
 23 | from pydsl.grammar.definition import String
 24 | from pydsl.grammar.PEG import Sequence
 25 | import sys
 26 | 
 27 | class TestBNFChecker(unittest.TestCase):
 28 |     """BNF Checker"""
 29 |     def testStringInput(self):
 30 |         """Test checker instantiation and call"""
 31 |         from pydsl.contrib.bnfgrammar import productionset0
 32 |         from pydsl.check import BNFChecker
 33 |         grammardef = productionset0
 34 |         checker = BNFChecker(grammardef)
 35 |         self.assertTrue(checker.check("SR"))
 36 |         self.assertTrue(checker.check("SR"))
 37 |         self.assertTrue(checker.check(("S","R")))
 38 |         self.assertFalse(checker.check("SL"))
 39 |         self.assertFalse(checker.check(("S","L")))
 40 |         self.assertFalse(checker.check(""))
 41 | 
 42 | class TestRegularExpressionChecker(unittest.TestCase):
 43 |     """BNF Checker"""
 44 |     def testCheck(self):
 45 |         """Test checker instantiation and call"""
 46 |         from pydsl.check import RegularExpressionChecker
 47 |         input_str = "abc"
 48 |         checker = RegularExpressionChecker(input_str)
 49 |         self.assertTrue(checker.check(input_str))
 50 |         self.assertTrue(checker.check([x for x in input_str]))
 51 |         self.assertTrue(checker.check([x for x in input_str]))
 52 |         self.assertTrue(checker.check(input_str))
 53 |         self.assertFalse(checker.check("abd"))
 54 |         self.assertFalse(checker.check(""))
 55 | 
 56 | class TestPLYChecker(unittest.TestCase):
 57 |     def testCheck(self):
 58 |         """Test checker instantiation and call"""
 59 |         from pydsl.check import PLYChecker
 60 |         from pydsl.contrib.grammar import example_ply
 61 |         from pydsl.grammar.definition import PLYGrammar
 62 |         grammardef = PLYGrammar(example_ply)
 63 |         checker = PLYChecker(grammardef)
 64 |         self.assertTrue(checker.check("O"))
 65 |         self.assertTrue(checker.check(["O"]))
 66 |         self.assertFalse(checker.check("FALSE"))
 67 |         #self.assertFalse(checker.check("")) #FIXME
 68 | 
 69 | 
 70 | 
 71 | class TestJsonSchemaChecker(unittest.TestCase):
 72 |     def testCheck(self):
 73 |         """Test checker instantiation and call"""
 74 |         from pydsl.grammar.definition import JsonSchema
 75 |         from pydsl.check import JsonSchemaChecker
 76 |         schema = {
 77 |                 "type" : "object",
 78 |                 "required":["foo"],
 79 |                 "properties" : {
 80 |                     "foo" : {"enum" : [1, 3]},
 81 |                     "bar" : {"format": "number_three"} #Ignored by jsonschema
 82 |                 }
 83 |         }
 84 |         grammardef = JsonSchema(schema)
 85 |         checker = JsonSchemaChecker(grammardef)
 86 |         self.assertFalse(checker.check("a"))
 87 |         self.assertTrue(checker.check({"foo":1}))
 88 |         self.assertFalse(checker.check({"foo":2}))
 89 |         self.assertTrue(checker.check({"foo":3}))
 90 |         self.assertFalse(checker.check([1, {"foo" : 2, "bar" : {"baz" : [1]}}, "quux"]))
 91 |         self.assertRaises(Exception, checker.check, [1, {"foo" : 2, "bar" : {"baz" : [1]}}, "quux"], raise_exceptions=True)
 92 |         number_three = checker_factory(String("3"))
 93 |         fc = {"number_three":number_three}
 94 |         grammardef = JsonSchema(schema)
 95 |         checker = JsonSchemaChecker(grammardef, fc) # Adds a format checker
 96 |         self.assertFalse(checker.check({"foo" : 1, "bar" : "123456"}))
 97 |         self.assertTrue(checker.check({"foo" : 1, "bar" : "3"}))
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | class TestChoiceChecker(unittest.TestCase):
104 |     def testCheck(self):
105 |         from pydsl.check import ChoiceChecker
106 |         from pydsl.grammar.PEG import Choice
107 |         from pydsl.grammar import RegularExpression
108 |         a = Choice([RegularExpression('^[0123456789]*$')])
109 |         checker = ChoiceChecker(a)
110 |         self.assertTrue(checker.check([x for x in '1234']))
111 |         self.assertTrue(checker.check('1234'))
112 |         self.assertFalse(checker.check('abc'))
113 |         self.assertFalse(checker.check(''))
114 | 
115 | class TestStringChecker(unittest.TestCase):
116 |     def testCheck(self):
117 |         """Test checker instantiation and call"""
118 |         from pydsl.check import StringChecker
119 |         grammarchecker = StringChecker(String("3"))
120 |         self.assertTrue(grammarchecker("3"))
121 |         self.assertTrue(grammarchecker(["3"]))
122 |         self.assertTrue(grammarchecker(("3",)))
123 |         self.assertFalse(grammarchecker(''))
124 | 
125 | class TestSequenceChecker(unittest.TestCase):
126 |     def testCheck(self):
127 |         from pydsl.grammar.PEG import Sequence
128 |         from pydsl.check import SequenceChecker
129 |         sequence = Sequence((String("a"), String("b"), String("c")))
130 |         checker = SequenceChecker(sequence)
131 |         self.assertTrue(checker.check("abc"))
132 |         self.assertTrue(checker.check([x for x in "abc"]))
133 |         self.assertFalse(checker.check("abd"))
134 |         self.assertFalse(checker.check(""))
135 | 


--------------------------------------------------------------------------------
/tests/unit/test_Diff.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | __author__ = "Nestor Arocha"
19 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
20 | __email__ = "nesaro@gmail.com"
21 | 
22 | import unittest
23 | from pydsl.grammar.PEG import Choice
24 | from pydsl.grammar.definition import String
25 | 
26 | class TestDiff(unittest.TestCase):
27 |     def testDiffSimple(self):
28 |         alphabet = Choice([String(x) for x in "abcde1"])
29 |         elem1 = "abcde"
30 |         elem2 = "abcd1"
31 |         from pydsl.diff import diff
32 |         self.assertEqual(diff(alphabet, elem1, elem2)[0].a, 0)
33 |         self.assertEqual(diff(alphabet, elem1, elem2)[0].b, 0)
34 |         self.assertEqual(diff(alphabet, elem1, elem2)[0].size, 4)
35 | 


--------------------------------------------------------------------------------
/tests/unit/test_Equal.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #This file is part of pydsl.
 3 | #
 4 | #pydsl is free software: you can redistribute it and/or modify
 5 | #it under the terms of the GNU General Public License as published by
 6 | #the Free Software Foundation, either version 3 of the License, or
 7 | #(at your option) any later version.
 8 | #
 9 | #pydsl is distributed in the hope that it will be useful,
10 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | #GNU General Public License for more details.
13 | #
14 | #You should have received a copy of the GNU General Public License
15 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | __author__ = "Nestor Arocha"
18 | __copyright__ = "Copyright 2008-2020, Nestor Arocha"
19 | __email__ = "nesaro@gmail.com"
20 | 
21 | import unittest
22 | from pydsl.equal import equal_factory, equal
23 | from pydsl.grammar.definition import String
24 | 
25 | class TestStringEqual(unittest.TestCase):
26 |     """BNF Checker"""
27 |     def testBasic(self):
28 |         self.assertTrue(equal(String('a'), 'a', 'a'))
29 |         self.assertRaises(ValueError, equal, String('a'), 'b', 'a')
30 | 
31 | 


--------------------------------------------------------------------------------
/tests/unit/test_Extract.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | #This file is part of pydsl.
  4 | #
  5 | #pydsl is free software: you can redistribute it and/or modify
  6 | #it under the terms of the GNU General Public License as published by
  7 | #the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | #pydsl is distributed in the hope that it will be useful,
 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #GNU General Public License for more details.
 14 | #
 15 | #You should have received a copy of the GNU General Public License
 16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | __author__ = "Nestor Arocha"
 19 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
 20 | __email__ = "nesaro@gmail.com"
 21 | 
 22 | import unittest
 23 | from pydsl.extract import extract, extract_alphabet, match, search, filter_subsets
 24 | from pydsl.grammar import RegularExpression, String
 25 | from pydsl.grammar.PEG import Choice
 26 | from pydsl.encoding import ascii_encoding
 27 | from pydsl.token import Token, PositionToken
 28 | import sys
 29 | 
 30 | 
 31 | class TestFilterSubsets(unittest.TestCase):
 32 |     def testNoInputs(self):
 33 |         self.assertFalse(filter_subsets([]))
 34 | 
 35 |     def testNonOverlap(self):
 36 |         non_overlapping_sequence = [(0, 1, None, None), (1, 2, None, None)]
 37 |         self.assertListEqual(filter_subsets(non_overlapping_sequence), non_overlapping_sequence)
 38 | 
 39 | 
 40 | class TestGrammarExtract(unittest.TestCase):
 41 | 
 42 |     def testRegularExpressionExtract(self):
 43 |         self.maxDiff = None
 44 |         gd = RegularExpression('^[0123456789]*$')
 45 |         expected_result = [
 46 |                 PositionToken(content='1', gd=gd, left=3, right=4),
 47 |                 PositionToken(content='12', gd=gd, left=3, right=5),
 48 |                 PositionToken(content='123', gd=gd, left=3, right=6),
 49 |                 PositionToken(content='1234', gd=gd, left=3, right=7),
 50 |                 PositionToken(content='2', gd=gd, left=4, right=5),
 51 |                 PositionToken(content='23', gd=gd, left=4, right=6),
 52 |                 PositionToken(content='234', gd=gd, left=4, right=7),
 53 |                 PositionToken(content='3', gd=gd, left=5, right=6),
 54 |                 PositionToken(content='34', gd=gd, left=5, right=7),
 55 |                 PositionToken(content='4', gd=gd, left=6, right=7)]
 56 |         self.assertListEqual(extract(gd,'abc1234abc'), expected_result)
 57 |         expected_result = [
 58 |                 PositionToken(content=['1'], gd=gd, left=3, right=4),
 59 |                 PositionToken(content=['1','2'], gd=gd, left=3, right=5),
 60 |                 PositionToken(content=['1','2','3'], gd=gd, left=3, right=6),
 61 |                 PositionToken(content=['1','2','3','4'], gd=gd, left=3, right=7),
 62 |                 PositionToken(content=['2'], gd=gd, left=4, right=5),
 63 |                 PositionToken(content=['2','3'], gd=gd, left=4, right=6),
 64 |                 PositionToken(content=['2','3','4'], gd=gd, left=4, right=7),
 65 |                 PositionToken(content=['3'], gd=gd, left=5, right=6),
 66 |                 PositionToken(content=['3','4'], gd=gd, left=5, right=7),
 67 |                 PositionToken(content=['4'], gd=gd, left=6, right=7)]
 68 |         self.assertListEqual(extract(gd,[Token(x, gd) for x in 'abc1234abc']), expected_result)
 69 |         self.assertListEqual(extract(gd,[x for x in 'abc1234abc']), expected_result)
 70 |         self.assertRaises(Exception, extract, None)
 71 |         self.assertListEqual(extract(gd,''), []) #Empty input
 72 | 
 73 |     def testRegularExpressionSearch(self):
 74 |         gd = RegularExpression('^[0123456789]*$')
 75 |         expected_result = PositionToken(content='1', gd=gd, left=3, right=4)
 76 |         self.assertEqual(search(gd,'abc1234abc'), expected_result)
 77 |         expected_result = PositionToken(content='1', gd=gd, left=3, right=4)
 78 |         self.assertEqual(search(gd,[Token(x, ascii_encoding) for x in 'abc1234abc']), expected_result)
 79 |         self.assertEqual(search(gd,[x for x in 'abc1234abc']), expected_result)
 80 |         self.assertRaises(Exception, search, None)
 81 |         self.assertListEqual(search(gd,''), []) #Empty input
 82 | 
 83 |     def testRegularExpressionMatch(self):
 84 |         gd = RegularExpression('^[0123456789]*$')
 85 |         expected_result = []
 86 |         self.assertEqual(match(gd,'abc1234abc'), expected_result)
 87 |         self.assertEqual(match(gd,[Token(x, ascii_encoding) for x in 'abc1234abc']), expected_result)
 88 |         self.assertEqual(match(gd,[x for x in 'abc1234abc']), expected_result)
 89 |         self.assertRaises(Exception, match, None)
 90 |         self.assertListEqual(match(gd,''), []) #Empty input
 91 | 
 92 | 
 93 | 
 94 | class TestAlphabetExtract(unittest.TestCase):
 95 | 
 96 |     def testEncoding(self):
 97 |         ad = ascii_encoding
 98 |         self.assertListEqual(extract(ad,''), [])
 99 |         self.assertListEqual(extract(ad,'a£'), [PositionToken('a', ad, 0,1)])
100 |         self.assertListEqual(extract(ad,['a','£']), [PositionToken(['a'], ad, 0,1)])
101 |         self.assertRaises(Exception, extract, None)
102 | 
103 |     def testChoices(self):
104 |         gd = Choice([String('a'), String('b'), String('c')])
105 |         self.assertListEqual(extract_alphabet(gd, 'axbycz'), [PositionToken('a', String('a'),0,1), PositionToken('b', String('b'), 2,3), PositionToken('c', String('c'), 4,5)])
106 |         self.assertListEqual(extract_alphabet(gd,'xyzabcxyz'), [PositionToken('a', String('a'),3,4), PositionToken('b', String('b'), 4,5), PositionToken('c', String('c'), 5,6)])
107 |         first_three = [PositionToken('a', String('a'),0,1), PositionToken('b', String('b'), 1,2), PositionToken('c', String('c'), 2,3)]
108 |         self.assertListEqual(extract_alphabet(gd,'abcxyz'), first_three)
109 |         first_three_list = [PositionToken(['a'], String('a'),0,1), PositionToken(['b'], String('b'), 1,2), PositionToken(['c'], String('c'), 2,3)]
110 |         self.assertListEqual(extract_alphabet(gd,[Token(x, ascii_encoding) for x in 'abcxyz']), first_three_list)
111 |         self.assertListEqual(extract_alphabet(gd,'abc'), first_three)
112 |         self.assertListEqual(extract_alphabet(gd,''), [])
113 | 


--------------------------------------------------------------------------------
/tests/unit/test_GrammarDefinition.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | 
19 | """Tests the Grammar definition instances"""
20 | 
21 | 
22 | __author__ = "Nestor Arocha"
23 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
24 | __email__ = "nesaro@gmail.com"
25 | 
26 | import unittest
27 | from pydsl.grammar.definition import String
28 | from pydsl.encoding import ascii_encoding
29 | 
30 | 
31 | @unittest.skip
32 | class TestGrammarDefinitionPLY(unittest.TestCase):
33 |     def setUp(self):
34 |         import plye
35 |         from pydsl.grammar.definition import PLYGrammar
36 |         self.grammardef = PLYGrammar(plye)
37 | 
38 |     @unittest.skip
39 |     def testEnumerate(self):
40 |         self.grammardef.enum()
41 | 
42 |     @unittest.skip
43 |     def testFirst(self):
44 |         self.grammardef.first
45 | 
46 |     @unittest.skip
47 |     def testMin(self):
48 |         self.grammardef.minsize
49 | 
50 |     @unittest.skip
51 |     def testMax(self):
52 |         self.grammardef.maxsize
53 | 
54 |     def testAlphabet(self):
55 |         self.assertListEqual(self.grammardef.alphabet, frozenset)
56 | 
57 | class TestGrammarDefinitionString(unittest.TestCase):
58 |     def setUp(self):
59 |         self.grammardef = String('abc')
60 | 
61 |     def testEnumerate(self):
62 |         self.assertListEqual(['abc'], [x for x in self.grammardef.enum()])
63 | 
64 |     def testFirst(self):
65 |         self.assertEqual(self.grammardef.first, String('a'))
66 | 
67 |     def testMin(self):
68 |         self.assertEqual(self.grammardef.minsize, 3)
69 | 
70 |     def testMax(self):
71 |         self.assertEqual(self.grammardef.maxsize, 3)
72 | 
73 |     def testAlphabet(self):
74 |         self.assertSetEqual(self.grammardef.alphabet, ascii_encoding)
75 | 
76 | 
77 | class TestGrammarDefinitionJson(unittest.TestCase):
78 |     def setUp(self):
79 |         from pydsl.grammar.definition import JsonSchema
80 |         self.grammardef = JsonSchema({})
81 | 
82 |     def testEnumerate(self):
83 |         self.assertRaises(NotImplementedError, self.grammardef.enum)
84 | 
85 |     def testFirst(self):
86 |         self.assertSetEqual(self.grammardef.first, ascii_encoding)
87 | 
88 |     def testMin(self):
89 |         self.grammardef.minsize
90 | 
91 |     def testMax(self):
92 |         self.grammardef.maxsize
93 | 
94 |     def testAlphabet(self):
95 |         self.assertSetEqual(self.grammardef.alphabet, ascii_encoding)
96 | 
97 | 


--------------------------------------------------------------------------------
/tests/unit/test_Guess.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | __author__ = "Nestor Arocha"
19 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
20 | __email__ = "nesaro@gmail.com"
21 | 
22 | import unittest
23 | from pydsl.grammar import RegularExpression
24 | from pydsl.guess import Guesser
25 | 
26 | class TestGuesser(unittest.TestCase):
27 |     def testGuesser(self):
28 |         cstring = RegularExpression('.*')
29 |         g1234 = RegularExpression('1234')
30 |         memorylist = [cstring, g1234 ]
31 |         guesser = Guesser(memorylist)
32 |         self.assertListEqual(guesser('1234'), [cstring, g1234])
33 |         self.assertListEqual(guesser([x for x in '1234']), [cstring, g1234])
34 |         self.assertListEqual(guesser('134'), [cstring])
35 | 
36 | 


--------------------------------------------------------------------------------
/tests/unit/test_Lexer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | #This file is part of pydsl.
  4 | #
  5 | #pydsl is free software: you can redistribute it and/or modify
  6 | #it under the terms of the GNU General Public License as published by
  7 | #the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | #pydsl is distributed in the hope that it will be useful,
 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #GNU General Public License for more details.
 14 | #
 15 | #You should have received a copy of the GNU General Public License
 16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | __author__ = "Nestor Arocha"
 19 | __copyright__ = "Copyright 2008-2017, Nestor Arocha"
 20 | __email__ = "nesaro@gmail.com"
 21 | 
 22 | import unittest
 23 | from pydsl.lex import DummyLexer, lexer_factory, graph_from_alphabet
 24 | from pydsl.contrib.bnfgrammar import *
 25 | from pydsl.grammar.definition import String
 26 | from pydsl.grammar.PEG import Sequence, Choice
 27 | from pydsl.file.BNF import load_bnf_file
 28 | from pydsl.token import Token, PositionToken
 29 | from pydsl.encoding import ascii_encoding
 30 | 
 31 | 
 32 | class TestEncodingLexer(unittest.TestCase):
 33 |     def testLexer(self):
 34 |         """Lexer call"""
 35 |         lexer = lexer_factory(productionset1.alphabet, ascii_encoding)
 36 |         result = list(lexer(string1))
 37 |         self.assertTrue(result)
 38 | 
 39 |     def testencodingLexer(self):
 40 |         lexer = DummyLexer()
 41 |         result = list(lexer("abcde"))
 42 |         self.assertTrue([str(x) for x in result])
 43 |         result = list(lexer([x for x in "abcde"]))
 44 |         self.assertTrue([str(x) for x in result])
 45 | 
 46 | class TestChoiceBruteForceLexer(unittest.TestCase):
 47 |     def testEmptyInput(self):
 48 |         integer = RegularExpression("^[0123456789]*$")
 49 |         date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')})
 50 |         mydef = Choice([integer, date])
 51 |         lexer = lexer_factory(mydef, ascii_encoding)
 52 |         self.assertFalse(lexer(""))
 53 | 
 54 |     def testSimpleLexing(self):
 55 |         """Test checker instantiation and call"""
 56 |         integer = RegularExpression("^[0123456789]*$")
 57 |         date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')})
 58 |         mydef = Choice([integer, date])
 59 |         lexer = lexer_factory(mydef, ascii_encoding)
 60 |         self.assertListEqual(lexer("1234"), [Token("1234", integer)])
 61 |         self.assertListEqual(lexer([Token(x, ascii_encoding) for x in "1234"]), [Token("1234", integer)])
 62 | 
 63 |     @unittest.skip('FIXME:  Non contiguous parsing from sucessors')
 64 |     def testOverlappingLexing(self):
 65 |         integer = RegularExpression("^[0123456789]*$")
 66 |         date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')})
 67 |         mydef = Choice([integer,date])
 68 |         lexer = lexer_factory(mydef, ascii_encoding)
 69 |         self.assertListEqual(lexer("123411/11/2001"), [("1234", integer),("11/11/2001", date)])
 70 |         self.assertListEqual(lexer([x for x in "123411/11/2001"]), [("1234", integer),("11/11/2001", date)])
 71 | 
 72 |     def testSecondLevelGrammar(self):
 73 |         a = String("a")
 74 |         b = String("b")
 75 |         c = String("c")
 76 |         x = String("x")
 77 |         y = String("y")
 78 |         z = String("z")
 79 |         first_level = Choice([a,b,c])
 80 |         first_levelb = Choice([x,y,z])
 81 |         second_level = Sequence([a,b], base_alphabet=first_level)
 82 |         from pydsl.check import checker_factory
 83 |         checker = checker_factory(second_level)
 84 |         self.assertTrue(checker([a,b]))
 85 |         second_level_alphabet = Choice([first_level, first_levelb]) 
 86 |         lexer = lexer_factory(second_level_alphabet, base=first_level+first_levelb)
 87 |         self.assertListEqual(lexer("ab"), [Token("a",first_level),Token("b",first_level)])
 88 | 
 89 | 
 90 | class TestChoiceLexer(unittest.TestCase):
 91 |     def setUp(self):
 92 |         self.maxDiff = None
 93 | 
 94 |     def testSimpleChoiceLexer(self):
 95 |         a1 = Choice([String('a'), String('b'), String('c')])
 96 |         from pydsl.lex import ChoiceLexer
 97 |         lexer = ChoiceLexer(a1)
 98 |         self.assertListEqual(lexer("abc"), [Token("a", String('a')), Token("b", String("b")), Token("c", String("c"))])
 99 | 
100 | class TestPythonLexer(unittest.TestCase):
101 |     def test_Concept(self):
102 |         red = String("red")
103 |         green = String("green")
104 |         blue = String("blue")
105 |         alphabet = Choice([red, green, blue], ascii_encoding)
106 |         lexer = lexer_factory(alphabet, ascii_encoding)
107 | 
108 |         def concept_translator_fun(inputtokens):
109 |             result = []
110 |             for token in inputtokens:
111 |                 x = str(token)
112 |                 if x == "red":
113 |                     result.append("color red")
114 |                 elif x == "green":
115 |                     result.append("color green")
116 |                 elif x == "blue":
117 |                     result.append("color blue")
118 |                 else:
119 |                     raise Exception("%s,%s" % (x, x.__class__.__name__))
120 | 
121 |             return result
122 | 
123 |         ct = concept_translator_fun
124 | 
125 | 
126 |         self.assertListEqual(ct(lexer("red")), ["color red"])
127 |         red_list = [PositionToken(content=character, gd=ascii_encoding, left=i, right=i+1) for i, character in enumerate("red")]
128 |         self.assertListEqual(ct(lexer(red_list)), ["color red"])
129 | 
130 | class TestGraphFromAlphabet(unittest.TestCase):
131 |     def test_simple(self):
132 |         alphabet = Choice([String('a')])
133 |         base = Choice([String('a')])
134 |         graph = graph_from_alphabet(alphabet, base)
135 |         self.assertEqual(len(graph.node), 2)
136 | 


--------------------------------------------------------------------------------
/tests/unit/test_PEG.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | """Tests PEG grammars"""
19 | 
20 | __author__ = "Nestor Arocha"
21 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
22 | __email__ = "nesaro@gmail.com"
23 | 
24 | import unittest
25 | from pydsl.grammar.definition import String, Grammar
26 | from pydsl.grammar.PEG import ZeroOrMore, OneOrMore, Not, Sequence, Choice
27 | from pydsl.check import check
28 | 
29 | class TestPEG(unittest.TestCase):
30 |     def testOneOrMore(self):
31 |         mygrammar = OneOrMore(String("a"))
32 |         self.assertTrue(isinstance(mygrammar, Grammar))
33 |         self.assertEqual(mygrammar.first(), Choice([String("a")]))
34 |         self.assertTrue(check(mygrammar, "a"))
35 |         self.assertTrue(check(mygrammar, "aa"))
36 |         self.assertTrue(check(mygrammar, "aaaa"))
37 |         self.assertFalse(check(mygrammar, ""))
38 |         self.assertFalse(check(mygrammar, "b"))
39 | 
40 |     def testZeroOrMore(self):
41 |         mygrammar = ZeroOrMore(String("a"))
42 |         self.assertTrue(isinstance(mygrammar, Grammar))
43 |         self.assertEqual(mygrammar.first(), Choice([String("a")]))
44 |         self.assertTrue(check(mygrammar, "a"))
45 |         self.assertTrue(check(mygrammar, "aa"))
46 |         self.assertTrue(check(mygrammar, "aaaa"))
47 |         self.assertTrue(check(mygrammar, ""))
48 |         self.assertFalse(check(mygrammar, "b"))
49 | 
50 |     def testChoice(self):
51 |         mygrammar = Choice((String("a"), String("b")))
52 |         self.assertTrue(check(mygrammar, "a"))
53 |         self.assertTrue(check(mygrammar, "b"))
54 |         self.assertFalse(check(mygrammar, "c"))
55 | 
56 |     def testNot(self):
57 |         mygrammar = Not(String("a"))
58 |         self.assertTrue(isinstance(mygrammar, Not))
59 | 
60 |     def testSequence(self):
61 |         mygrammar = Sequence((String("a"), String("b")))
62 |         self.assertTrue(isinstance(mygrammar, Grammar))
63 | 


--------------------------------------------------------------------------------
/tests/unit/test_Parser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | #This file is part of pydsl.
  4 | #
  5 | #pydsl is free software: you can redistribute it and/or modify
  6 | #it under the terms of the GNU General Public License as published by
  7 | #the Free Software Foundation, either version 3 of the License, or
  8 | #(at your option) any later version.
  9 | #
 10 | #pydsl is distributed in the hope that it will be useful,
 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #GNU General Public License for more details.
 14 | #
 15 | #You should have received a copy of the GNU General Public License
 16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | __author__ = "Nestor Arocha"
 19 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
 20 | __email__ = "nesaro@gmail.com"
 21 | 
 22 | 
 23 | 
 24 | from pydsl.contrib.bnfgrammar import *
 25 | from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser
 26 | from pydsl.parser.LR0 import LR0Parser
 27 | from pydsl.lex import DummyLexer, lex
 28 | from pydsl.parser.LL import LL1RecursiveDescentParser
 29 | import unittest
 30 | 
 31 | class TestBacktracingRecursiveDescentParser(unittest.TestCase):
 32 |     def testRecursiveLeftRecursion(self):
 33 |         descentparser = BacktracingErrorRecursiveDescentParser(productionsetlr)
 34 |         self.assertRaises(RuntimeError, descentparser, dots)
 35 | 
 36 |     def testRightRecursion(self):
 37 |         descentparser = BacktracingErrorRecursiveDescentParser(productionsetrr)
 38 |         result = descentparser(dots)
 39 |         self.assertTrue(result)
 40 |         result = descentparser(list(dots))
 41 |         self.assertTrue(result)
 42 | 
 43 |     def testCenterRecursion(self):
 44 |         descentparser = BacktracingErrorRecursiveDescentParser(productionsetcr)
 45 |         result = descentparser(dots)
 46 |         self.assertTrue(result)
 47 |         result = descentparser(list(dots))
 48 |         self.assertTrue(result)
 49 | 
 50 |     def testRecursiveDescentParserStore(self):
 51 |         descentparser = BacktracingErrorRecursiveDescentParser(productionset1)
 52 |         result = descentparser(string1)
 53 |         self.assertTrue(result)
 54 |         result = descentparser(list(string1))
 55 |         self.assertTrue(result)
 56 | 
 57 |     def testRecursiveDescentParserBad(self):
 58 |         descentparser = BacktracingErrorRecursiveDescentParser(productionset1)
 59 |         result = descentparser(string2)
 60 |         self.assertFalse(result)
 61 |         result = descentparser(list(string2))
 62 |         self.assertFalse(result)
 63 | 
 64 | 
 65 |     def testRecursiveDescentParserNull(self):
 66 |         descentparser = BacktracingErrorRecursiveDescentParser(productionset2)
 67 |         result = descentparser(string3)
 68 |         self.assertTrue(result)
 69 |         result = descentparser(list(string3))
 70 |         self.assertTrue(result)
 71 | 
 72 |     def testRecursiveDescentParserNullBad(self):
 73 |         descentparser = BacktracingErrorRecursiveDescentParser(productionset2)
 74 |         from pydsl.encoding import ascii_encoding
 75 |         ascii_encoding = ascii_encoding
 76 |         lexed_string4 = lex(productionset2.alphabet, ascii_encoding, string4)
 77 |         result = descentparser(lexed_string4)
 78 |         self.assertFalse(result)
 79 |         result = descentparser(list(string4))
 80 |         self.assertFalse(result)
 81 | 
 82 | 
 83 | class TestLR0Parser(unittest.TestCase):
 84 |     def testLR0ParseTable(self):
 85 |         """Tests the lr0 table generation"""
 86 |         from pydsl.parser.LR0 import _slr_build_parser_table, build_states_sets
 87 |         state_sets = build_states_sets(productionset0)
 88 |         self.assertEqual(len(state_sets), 5)
 89 |         #0 . EI: : . exp $ , 
 90 |         #   exp : .SR
 91 |         #       transitions: S -> 2,
 92 |         #       goto: exp -> 1
 93 |         #1 EI:  exp . $ ,
 94 |         #       transitions: $ -> 3
 95 |         #2 exp:  S . R,
 96 |         #       transitions: R -> 4
 97 |         #3 EI: exp $ .
 98 |         #4 exp:  S R .
 99 |         #       reduce
100 | 
101 |         parsetable = _slr_build_parser_table(productionset0)
102 |         self.assertEqual(len(parsetable), 4)
103 | 
104 | 
105 |     def testLR0ParserStore(self):
106 |         parser = LR0Parser(productionset0)
107 |         tokelist = [x for x in DummyLexer()(p0good)]
108 |         result = parser(tokelist)
109 |         self.assertTrue(result)
110 | 
111 |     def testLR0ParserBad(self):
112 |         parser = LR0Parser(productionset1)
113 |         result = parser(string2)
114 |         self.assertFalse(result)
115 |         result = parser(list(string2))
116 |         self.assertFalse(result)
117 | 
118 |     def testCenterRecursion(self):
119 |         self.assertRaises(Exception, LR0Parser, productionsetcr)
120 | 
121 |     def testArithmetic(self):
122 |         parser = LR0Parser(productionset_arithmetic)
123 |         self.assertFalse(parser('1'))
124 |         self.assertTrue(parser(['123']))
125 |         self.assertTrue(parser(['123','+','123']))
126 |         self.assertTrue(parser(['123','*','123']))
127 |         self.assertFalse(parser(['123a','+','123']))
128 |         self.assertFalse(parser(['123','+','+']))
129 | 
130 | 
131 | class TestLL1RecursiveDescentParser(unittest.TestCase):
132 |     @unittest.skip
133 |     def testRecursiveLeftRecursion(self):
134 |         descentparser = LL1RecursiveDescentParser(productionsetlr)
135 |         result = descentparser(dots)
136 |         self.assertTrue(result)
137 | 
138 |     def testRightRecursion(self):
139 |         descentparser = LL1RecursiveDescentParser(productionsetrr)
140 |         self.assertFalse(descentparser(dots)) #Ambiguous grammar
141 | 
142 |     def testCenterRecursion(self):
143 |         descentparser = LL1RecursiveDescentParser(productionsetcr)
144 |         self.assertFalse(descentparser(dots)) #Ambiguous grammar
145 | 
146 |     def testLL1RecursiveDescentParserStore(self):
147 |         descentparser = LL1RecursiveDescentParser(productionset1)
148 |         result = descentparser(string1)
149 |         self.assertTrue(result)
150 |         result = descentparser(list(string1))
151 |         self.assertTrue(result)
152 | 
153 |     def testLL1RecursiveDescentParserBad(self):
154 |         descentparser = LL1RecursiveDescentParser(productionset1)
155 |         result = descentparser(string2)
156 |         self.assertFalse(result)
157 |         result = descentparser(list(string2))
158 |         self.assertFalse(result)
159 | 
160 | @unittest.skip
161 | class TestPEGParser(unittest.TestCase):
162 |     def testBasicChoice(self):
163 |         from pydsl.grammar.PEG import Choice
164 |         from pydsl.tree import ParseTree
165 |         from pydsl.parser.PEG import PEGParser
166 |         gd = Choice([String('a'), String('b')])
167 |         parser = PEGParser(gd)
168 |         result = parser('a')
169 |         self.assertTrue(isinstance(result, ParseTree))
170 | 
171 | 
172 | 
173 | class TestParse(unittest.TestCase):
174 |     def testverb(self):
175 |         """Tests the lr0 table generation"""
176 |         from pydsl.parser.parser import parse, parser_factory
177 |         tokelist = [x for x in DummyLexer()(p0good)]
178 |         self.assertTrue(parse(productionset0, tokelist , "default"))
179 |         self.assertTrue(parse(productionset0, tokelist , "lr0"))
180 |         self.assertTrue(parse(productionset0, tokelist , "ll1"))
181 |         tokelist = [x for x in DummyLexer()(p0bad)]
182 |         self.assertFalse(parse(productionset0, tokelist , "default"))
183 |         self.assertFalse(parse(productionset0, tokelist , "lr0"))
184 |         self.assertFalse(parse(productionset0, tokelist , "ll1"))
185 | 


--------------------------------------------------------------------------------
/tests/unit/test_Parsley.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | # This file is part of pydsl.
 4 | #
 5 | # pydsl is free software: you can redistribute it and/or modify
 6 | # it under the terms of the GNU General Public License as published by
 7 | # the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | # pydsl is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | # GNU General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU General Public License
16 | # along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | import unittest
19 | from pydsl.translator import translator_factory
20 | from pydsl.check import checker_factory
21 | from pydsl.file.python import load_python_file
22 | import parsley
23 | import sys
24 | 
25 | __author__ = "Ptolom"
26 | __copyright__ = "Copyright 2014, Ptolom"
27 | __email__ = "ptolom@hexifact.co.uk"
28 | 
29 | class TestParsley(unittest.TestCase):
30 |     def testDate(self):
31 |         from pydsl.file.parsley import load_parsley_grammar_file
32 |         repository = {'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')} #DayOfMonth loaded as checker
33 |         G=load_parsley_grammar_file("pydsl/contrib/grammar/Date.parsley", "expr", repository)
34 |         C=checker_factory(G)
35 |         T=translator_factory(G)
36 |         self.assertTrue(C("2/4/12"))
37 |         self.assertEqual(T("2/4/12"),(2,4,12))
38 |         self.assertRaises(parsley.ParseError,T, "40/4/12")
39 |         
40 |     def testCalculator(self):
41 |         G=load_python_file("pydsl/contrib/translator/calculator.py")
42 |         T=translator_factory(G)
43 |         self.assertEqual(T("1+1"),2)
44 |         
45 | 
46 | if __name__ == '__main__':
47 |         unittest.main()
48 | 


--------------------------------------------------------------------------------
/tests/unit/test_RegularExpression.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | 
19 | __author__ = "Nestor Arocha"
20 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
21 | __email__ = "nesaro@gmail.com"
22 | 
23 | import unittest
24 | from pydsl.grammar.definition import RegularExpression
25 | import re
26 | 
27 | class TestRegularExpression(unittest.TestCase):
28 |     """Regular expression method tests"""
29 |     def testInstantiation(self):
30 |         re1 = RegularExpression('^a$')
31 |         re2 = RegularExpression(re.compile('^a$'))
32 |         self.assertEqual(str(re1), str(re2)) #FIXME python3 default flag value is 32
33 | 
34 |     def testEnumerate(self):
35 |         re1 = RegularExpression(re.compile('^a$'))
36 |         self.assertRaises(NotImplementedError, re1.enum)
37 | 
38 |     def testFirst(self):
39 |         re1 = RegularExpression(re.compile('^a$'))
40 |         self.assertEqual(len(re1.first),1)
41 |         from pydsl.grammar.definition import String
42 |         self.assertIn(String('a'), re1.first)
43 | 
44 |     def testMin(self):
45 |         re1 = RegularExpression(re.compile('^a$'))
46 |         re1.minsize
47 | 
48 |     def testMax(self):
49 |         re1 = RegularExpression(re.compile('^a$'))
50 |         re1.maxsize
51 | 
52 |     def testAlphabet(self):
53 |         from pydsl.encoding import ascii_encoding
54 |         re1 = RegularExpression(re.compile('^a$'))
55 |         self.assertEqual(re1.alphabet, ascii_encoding)
56 | 
57 | 


--------------------------------------------------------------------------------
/tests/unit/test_Translate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | __author__ = "Nestor Arocha"
19 | __copyright__ = "Copyright 2008-2015, Nestor Arocha"
20 | __email__ = "nesaro@gmail.com"
21 | 
22 | import unittest
23 | 
24 | class TestTranslate(unittest.TestCase):
25 |     def testEcho(self):
26 |         from pydsl.translator import translate, PythonTranslator
27 |         from pydsl.grammar.definition import RegularExpression
28 |         from pydsl.check import checker_factory
29 |         cstring = checker_factory(RegularExpression('.*'))
30 |         def function(my_input):
31 |             return my_input
32 |         pt = PythonTranslator(function)
33 |         self.assertEqual(translate(pt,{'my_input':"1234"}),"1234")
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/unit/test_Tree.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #This file is part of pydsl.
 4 | #
 5 | #pydsl is free software: you can redistribute it and/or modify
 6 | #it under the terms of the GNU General Public License as published by
 7 | #the Free Software Foundation, either version 3 of the License, or
 8 | #(at your option) any later version.
 9 | #
10 | #pydsl is distributed in the hope that it will be useful,
11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #GNU General Public License for more details.
14 | #
15 | #You should have received a copy of the GNU General Public License
16 | #along with pydsl.  If not, see <http://www.gnu.org/licenses/>.
17 | 
18 | __author__ = "Nestor Arocha"
19 | __copyright__ = "Copyright 2008-2014, Nestor Arocha"
20 | __email__ = "nesaro@gmail.com"
21 | 
22 | import unittest
23 | 
24 | class TestTrees(unittest.TestCase):
25 |     def setUp(self):
26 |         from pydsl.tree import ParseTree
27 |         a = ParseTree(0,6, None, "abcdef")
28 |         self.firstleaf1 = ParseTree(0,1, None, "a")
29 |         a.append(self.firstleaf1)
30 |         b = ParseTree(1,3,None, "bc")
31 |         a.append(b)
32 |         b.append(ParseTree(1,2,None, "b"))
33 |         b.append(ParseTree(2,3,None, "c"))
34 |         a.append(ParseTree(3,4,None, "d"))
35 |         a.append(ParseTree(4,5,None, "e"))
36 |         a.append(ParseTree(5,6,None, "f"))
37 |         self.tree1 = a
38 |         c = ParseTree(0,6, None, "abcdef")
39 |         self.firstleaf2 = ParseTree(0,1, None, "a")
40 |         c.append(self.firstleaf2)
41 |         b = ParseTree(1,3, None, "bc")
42 |         c.append(b)
43 |         b.append(ParseTree(1,2, None, "b"))
44 |         b.append(ParseTree(2,3, None, "j"))
45 |         c.append(ParseTree(3,4, None, "d"))
46 |         c.append(ParseTree(4,5, None, "e"))
47 |         c.append(ParseTree(5,6, None, "f"))
48 |         self.tree2 = c
49 | 
50 |     def testBasics(self):
51 |         self.assertTrue(len(self.tree1) == 6)
52 | 
53 | 
54 | class TestPositionResultList(unittest.TestCase):
55 |     def testMain(self):
56 |         from pydsl.tree import PositionResultList
57 |         seq = PositionResultList()
58 |         seq.append(0,1,".")
59 |         seq.append(1,2,".")
60 |         seq.append(2,3,".")
61 |         seq.append(3,4,".")
62 |         seq.append(4,5,".")
63 |         self.assertEqual(len(seq.valid_sequences()[-1]), 5)
64 | 
65 | 


--------------------------------------------------------------------------------