├── .gitignore ├── .travis.yml ├── CONTRIBUTIONS ├── LICENSE ├── MANIFEST.in ├── README.md ├── documentation └── example.md ├── pydsl ├── __init__.py ├── check.py ├── contrib │ ├── __init__.py │ ├── alphabet │ │ ├── __init__.py │ │ └── test_alphabet.py │ ├── bnfgrammar.py │ ├── grammar │ │ ├── Date.bnf │ │ ├── Date.parsley │ │ ├── DayOfMonth.py │ │ ├── Grammar2RecursiveDescentParserRecognizer.py │ │ ├── HTMLTable.bnf │ │ ├── ImageFile.py │ │ ├── LogicalExpression.bnf │ │ ├── MimeType.py │ │ ├── SpanishID.py │ │ ├── TrueFalse.bnf │ │ ├── TrueHTMLTable.bnf │ │ ├── __init__.py │ │ ├── calc_ply.py │ │ ├── cstring.py │ │ ├── example_ply.py │ │ ├── integerop.py │ │ ├── logline.bnf │ │ ├── mongoquery.bnf │ │ └── protocol.py │ ├── mongogrammar.py │ ├── regexps.py │ ├── spark │ │ ├── spark_example.py │ │ ├── spark_parse_example.py │ │ └── spark_scan_example.py │ └── translator │ │ ├── calculator.py │ │ ├── calculator_bnf.py │ │ ├── chemicalFormulas.py │ │ └── echo.py ├── diff.py ├── encoding.py ├── equal.py ├── exceptions.py ├── external │ ├── __init__.py │ └── spark.py ├── extract.py ├── file │ ├── BNF.py │ ├── __init__.py │ ├── parsley.py │ ├── python.py │ └── regexp.py ├── grammar │ ├── BNF.py │ ├── PEG.py │ ├── __init__.py │ ├── definition.py │ ├── parsley.py │ └── symbol.py ├── guess.py ├── lex.py ├── parser │ ├── LL.py │ ├── LR0.py │ ├── PEG.py │ ├── README.md │ ├── __init__.py │ ├── backtracing.py │ └── parser.py ├── token.py ├── translator.py └── tree.py ├── requirements.txt ├── setup.py └── tests ├── FOL.g ├── __init__.py ├── functional ├── __init__.py ├── test_Binary.py ├── test_Case.py └── test_LogicGrammars.py └── unit ├── __init__.py ├── test_Alphabet.py ├── test_BNF.py ├── test_BNFLoad.py ├── test_Checker.py ├── test_Diff.py ├── test_Equal.py ├── test_Extract.py ├── test_GrammarDefinition.py ├── test_Guess.py ├── test_Lexer.py ├── test_PEG.py ├── test_Parser.py ├── test_Parsley.py ├── test_RegularExpression.py ├── test_Translate.py └── test_Tree.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | tags 3 | pylintrc 4 | .*.swp 5 | *.png 6 | *~ 7 | build 8 | .coverage 9 | htmlcov 10 | comp 11 | dist 12 | *.egg-info 13 | *.out 14 | *.dictc 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.4" 4 | - "3.5" 5 | # command to install dependencies 6 | install: 7 | - "pip install ." 8 | - "pip install -r requirements.txt" 9 | # command to run tests 10 | script: 11 | - nosetests tests 12 | - pylint -E pydsl/ 13 | -------------------------------------------------------------------------------- /CONTRIBUTIONS: -------------------------------------------------------------------------------- 1 | * check existing issues: https://github.com/nesaro/pydsl 2 | * read the project's blog: http://pydsl.blogspot.co.uk 3 | * pull requests :) 4 | 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include pydsl/contrib/dict *.dict 2 | recursive-include pydsl/contrib/grammar *.bnf *.re *.parsley 3 | exclude tests 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DESCRIPTION 2 | =========== 3 | 4 | pydsl is a language library written in python. It provides some verbs for Grammars. 5 | 6 | from pydsl.file.BNF import load_bnf_file 7 | grammardefinition = loand_bnf_file('myfile.bnf') 8 | grammardefinition.alphabet # Returns the alphabet used by this grammar 9 | grammardefinition.minsize 10 | grammardefinition.maxsize 11 | grammardefinition.enumerate # Returns a generator that generates every accepted word 12 | 13 | from pydsl.check import check 14 | check(grammardefinition,'mystring') # returns True or False 15 | 16 | from pydsl.parser import parse 17 | parse(grammardefinition,'mystring') # returns a ParseTree 18 | 19 | from pydsl.extract import extract 20 | extract(grammardefinition,'abcmystringabc') # returns ('mystring',3,11) 21 | 22 | FORMATS 23 | ======= 24 | 25 | Functions 26 | --------- 27 | 28 | | Format |Check|Match|Search|Split|Extract|Translate|Validate|Diff| 29 | | ------- |:---:|-----|------|-----|:-----:|:-------:|:------:|----| 30 | | BNF | V | | | |V | Parse | V | | 31 | | regexp | V | | | |V | X | | | 32 | | ply | V | | | |V | V | | | 33 | | parsley | V | | | |V | V | | | 34 | |pyparsing| V | | | |V | | | | 35 | 36 | Properties 37 | ---------- 38 | 39 | | Format |First|Min|Max|Enumerate| 40 | | ------- |:---:|---|---|---------| 41 | | BNF | V | | | | 42 | | regexp | | | | | 43 | | ply | | | | | 44 | | parsley | | | | | 45 | |pyparsing| | | | | 46 | 47 | 48 | INSTALLATION 49 | ============ 50 | * disttools: 51 | * python3 setup.py install 52 | * pip: 53 | * pip install pydsl 54 | 55 | CONTRIBUTIONS 56 | ============= 57 | * check existing issues: https://github.com/nesaro/pydsl 58 | * read the project's blog: http://pydsl.blogspot.co.uk 59 | * pull requests :) 60 | 61 | 62 | REQUIREMENTS 63 | ============ 64 | * python >= 3.4 65 | * optional: ply library ( http://www.dabeaz.com/ply/ ) 66 | 67 | ## Example 68 | 69 | See [examples](documentation/example.md) 70 | 71 | ABOUT 72 | ===== 73 | Copyright (C) 2008-2015 Nestor Arocha (nesaro@gmail.com) 74 | 75 | 76 | -------------------------------------------------------------------------------- /documentation/example.md: -------------------------------------------------------------------------------- 1 | ```python 2 | from pydsl.file.BNF import load_bnf_file 3 | from pydsl.file.python import load_python_file 4 | from pydsl.translator import translator_factory 5 | truefalse = load_bnf_file('pydsl/contrib/grammar/TrueFalse.bnf') 6 | grammardefinition = load_bnf_file('pydsl/contrib/grammar/LogicalExpression.bnf', {'TrueFalse':truefalse}) 7 | grammardefinition.alphabet # Returns the alphabet used by this grammar 8 | grammardefinition.minsize 9 | grammardefinition.maxsize 10 | from pydsl.check import check 11 | check(grammardefinition,['(']) # returns True or False 12 | from pydsl.parser import parse 13 | parse(grammardefinition,[')']) # returns a ParseTree 14 | from pydsl.extract import extract 15 | extract(grammardefinition,'abc()abc') # returns ('False',3,11) 16 | solver = translator_factory(load_python_file('pydsl/contrib/translator/echo.py')) 17 | mystring = "True||False" 18 | result = solver(mystring) 19 | print(result) 20 | ``` 21 | 22 | -------------------------------------------------------------------------------- /pydsl/__init__.py: -------------------------------------------------------------------------------- 1 | VERSION = (0,5,3) 2 | -------------------------------------------------------------------------------- /pydsl/check.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | #This file is part of pydsl. 4 | # 5 | #pydsl is free software: you can redistribute it and/or modify 6 | #it under the terms of the GNU General Public License as published by 7 | #the Free Software Foundation, either version 3 of the License, or 8 | #(at your option) any later version. 9 | # 10 | #pydsl is distributed in the hope that it will be useful, 11 | #but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | #GNU General Public License for more details. 14 | # 15 | #You should have received a copy of the GNU General Public License 16 | #along with pydsl. If not, see . 17 | 18 | 19 | __author__ = "Nestor Arocha" 20 | __copyright__ = "Copyright 2008-2017, Nestor Arocha" 21 | __email__ = "nesaro@gmail.com" 22 | 23 | import logging 24 | from collections import Iterable 25 | from jsonschema import FormatChecker 26 | LOG = logging.getLogger(__name__) 27 | 28 | 29 | def check(definition, data, *args, **kwargs): 30 | """Checks if the input follows the definition""" 31 | checker = checker_factory(definition) 32 | return checker(data, *args, **kwargs) 33 | 34 | def checker_factory(grammar): 35 | from pydsl.grammar.BNF import BNFGrammar 36 | from pydsl.grammar.PEG import Sequence, Choice, OneOrMore, ZeroOrMore 37 | from pydsl.grammar.definition import PLYGrammar, RegularExpression, String, PythonGrammar, JsonSchema 38 | from pydsl.grammar.parsley import ParsleyGrammar 39 | if isinstance(grammar, str) and not isinstance(grammar, String): 40 | raise TypeError(grammar) 41 | if isinstance(grammar, BNFGrammar): 42 | return BNFChecker(grammar) 43 | elif isinstance(grammar, JsonSchema): 44 | return JsonSchemaChecker(grammar) 45 | elif isinstance(grammar, RegularExpression): 46 | return RegularExpressionChecker(grammar) 47 | elif isinstance(grammar, PythonGrammar) or isinstance(grammar, dict) and "matchFun" in grammar: 48 | return PythonChecker(grammar) 49 | elif isinstance(grammar, PLYGrammar): 50 | return PLYChecker(grammar) 51 | elif isinstance(grammar, Choice): 52 | return ChoiceChecker(grammar) 53 | elif isinstance(grammar, ParsleyGrammar): 54 | return ParsleyChecker(grammar) 55 | elif isinstance(grammar, String): 56 | return StringChecker(grammar) 57 | elif isinstance(grammar, Sequence): 58 | return SequenceChecker(grammar) 59 | elif isinstance(grammar, OneOrMore): 60 | return OneOrMoreChecker(grammar) 61 | elif isinstance(grammar, ZeroOrMore): 62 | return ZeroOrMoreChecker(grammar) 63 | elif isinstance(grammar, Iterable): 64 | return ChoiceChecker(grammar) 65 | else: 66 | raise ValueError(grammar) 67 | 68 | 69 | class Checker(object): 70 | """ Ensures that input follows a rule, protocol, grammar alphabet...""" 71 | def __init__(self): 72 | pass 73 | 74 | def __call__(self, *args, **kwargs): 75 | return self.check(*args, **kwargs) 76 | 77 | def check(self, value):# -> bool: 78 | raise NotImplementedError 79 | 80 | class RegularExpressionChecker(Checker): 81 | def __init__(self, regexp, flags = ""): 82 | Checker.__init__(self) 83 | import re 84 | self.__regexpstr = regexp 85 | myflags = 0 86 | if "i" in flags: 87 | myflags |= re.I 88 | if isinstance(regexp, str): 89 | self.__regexp = re.compile(regexp, myflags) 90 | else: 91 | self.__regexp = regexp 92 | 93 | def check(self, data): 94 | """returns True if any match any regexp""" 95 | if isinstance(data, Iterable): 96 | data = "".join(str(x) for x in data) 97 | try: 98 | data = str(data) 99 | except UnicodeDecodeError: 100 | return False 101 | return bool(data and self.__regexp.match(data)) 102 | 103 | 104 | class BNFChecker(Checker): 105 | """Calls another program to perform checking. Args are always file names""" 106 | def __init__(self, bnf, parser = None): 107 | Checker.__init__(self) 108 | self.gd = bnf 109 | parser = bnf.options.get("parser", parser) 110 | if parser in ("descent", "auto", "default", None): 111 | from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser 112 | self.__parser = BacktracingErrorRecursiveDescentParser(bnf) 113 | else: 114 | raise ValueError("Unknown parser : " + parser) 115 | 116 | def check(self, data): 117 | if isinstance(data, str): 118 | from pydsl.token import PositionToken 119 | from pydsl.encoding import ascii_encoding 120 | data = [PositionToken(x, ascii_encoding, i, i+1) for i,x in enumerate(data)] 121 | if not isinstance(data, Iterable): 122 | raise TypeError(data) 123 | if not all(check(self.gd.alphabet, [x]) for x in data): 124 | LOG.warning("Invalid input: %s,%s" % (self.gd.alphabet, data)) 125 | return False 126 | try: 127 | return len(self.__parser.get_trees(data)) > 0 128 | except IndexError: 129 | return False 130 | 131 | class ParsleyChecker(Checker): 132 | def __init__(self, grammar): 133 | Checker.__init__(self) 134 | self.gd=grammar 135 | 136 | def check(self, data): 137 | from parsley import ParseError 138 | try: 139 | getattr(self.gd.grammar(data), self.gd.root_rule)() #call grammar(data).root_rule() 140 | return True 141 | except ParseError: 142 | return False 143 | 144 | class PythonChecker(Checker): 145 | def __init__(self, module): 146 | Checker.__init__(self) 147 | self._matchFun = module["matchFun"] 148 | 149 | def check(self, data): 150 | return self._matchFun(data) 151 | 152 | 153 | class PLYChecker(Checker): 154 | def __init__(self, gd): 155 | Checker.__init__(self) 156 | self.module = gd.module 157 | 158 | def check(self, data): 159 | if isinstance(data, Iterable): 160 | data = "".join([str(x) for x in data]) 161 | from ply import yacc, lex 162 | lexer = lex.lex(self.module) 163 | parser = yacc.yacc(module = self.module) 164 | from pydsl.exceptions import ParseError 165 | try: 166 | parser.parse(data, lexer = lexer) 167 | except ParseError: 168 | return False 169 | return True 170 | 171 | class StringChecker(Checker): 172 | def __init__(self, gd): 173 | Checker.__init__(self) 174 | self.gd = gd 175 | 176 | def check(self, data): 177 | if isinstance(data, Iterable) and not isinstance(data, str): 178 | data = "".join([str(x) for x in data]) 179 | if not isinstance(data, str): 180 | raise TypeError(data.__class__.__name__) 181 | return self.gd == str(data) 182 | 183 | def formatchecker_factory(**checkerdict): 184 | """Converts a dictionary of strings:checkers into a formatchecker object""" 185 | fc = FormatChecker() 186 | for format_name, checker in checkerdict.items(): 187 | fc.checks(format_name)(checker) 188 | return fc 189 | 190 | 191 | class JsonSchemaChecker(Checker): 192 | def __init__(self, gd, formatdict = None): 193 | Checker.__init__(self) 194 | self.gd = gd 195 | formatdict = formatdict or {} 196 | self.formatchecker = formatchecker_factory(**formatdict) 197 | 198 | def check(self, data, raise_exceptions = False): 199 | from jsonschema import validate, ValidationError 200 | try: 201 | validate(data, self.gd, format_checker = self.formatchecker) 202 | except ValidationError: 203 | if raise_exceptions: 204 | raise 205 | return False 206 | return True 207 | 208 | class ChoiceChecker(Checker): 209 | def __init__(self, gd): 210 | Checker.__init__(self) 211 | self.gd = gd 212 | self.checkerinstances = [checker_factory(x) for x in self.gd] 213 | 214 | def check(self, data): 215 | if not isinstance(data, Iterable): 216 | raise TypeError(data.__class__.__name__) 217 | return any((x.check(data) for x in self.checkerinstances)) 218 | 219 | class SequenceChecker(Checker): 220 | def __init__(self, sequence): 221 | Checker.__init__(self) 222 | from pydsl.grammar import Grammar 223 | for x in sequence: 224 | if not isinstance(x, Grammar): 225 | raise TypeError("Expected grammar, got %s" % (x.__class__.__name__,)) 226 | self.sequence = sequence 227 | 228 | def check(self, data): 229 | if not isinstance(data, Iterable): 230 | raise TypeError(data.__class__.__name__) 231 | if len(self.sequence) != len(data): 232 | return False 233 | return all(check(self.sequence[x], [data[x]]) for x in range(len(self.sequence))) 234 | 235 | 236 | class OneOrMoreChecker(Checker): 237 | def __init__(self, element): 238 | Checker.__init__(self) 239 | self.element = element 240 | 241 | def check(self, data): 242 | return bool(data) and all(check(self.element.element, x) for x in data) 243 | 244 | class ZeroOrMoreChecker(Checker): 245 | def __init__(self, element): 246 | Checker.__init__(self) 247 | self.element = element 248 | 249 | def check(self, data): 250 | return all(check(self.element.element, x) for x in data) 251 | -------------------------------------------------------------------------------- /pydsl/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/pydsl/contrib/__init__.py -------------------------------------------------------------------------------- /pydsl/contrib/alphabet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/pydsl/contrib/alphabet/__init__.py -------------------------------------------------------------------------------- /pydsl/contrib/alphabet/test_alphabet.py: -------------------------------------------------------------------------------- 1 | grammarlist = ["integer","Date"] 2 | iclass = "AlphabetList" 3 | -------------------------------------------------------------------------------- /pydsl/contrib/bnfgrammar.py: -------------------------------------------------------------------------------- 1 | """BNF grammars for testing""" 2 | 3 | from pydsl.grammar.symbol import TerminalSymbol, NonTerminalSymbol, NullSymbol 4 | from pydsl.grammar.BNF import Production, BNFGrammar 5 | from pydsl.file.BNF import strlist_to_production_set 6 | from pydsl.file.python import load_python_file 7 | from pydsl.grammar.definition import String, RegularExpression 8 | 9 | leftrecursive=["S ::= E","E ::= E dot | dot","dot := String,."] 10 | rightrecursive=["S ::= E","E ::= dot E | dot","dot := String,."] 11 | centerrecursive=["S ::= E","E ::= dot E dot | dot","dot := String,."] 12 | 13 | #productionset0 definition 14 | 15 | symbol1 = TerminalSymbol(String("S")) 16 | symbol2 = TerminalSymbol(String("R")) 17 | final1 = NonTerminalSymbol("exp") 18 | rule1 = Production([final1], (symbol1, symbol2)) 19 | productionset0 = BNFGrammar(final1, (rule1,symbol1,symbol2)) 20 | p0good = "SR" 21 | p0bad = "RS" 22 | 23 | 24 | #productionset1 definition 25 | symbol1 = TerminalSymbol(String("S")) 26 | symbol2 = TerminalSymbol(String("R")) 27 | symbol3 = TerminalSymbol(String(":")) 28 | symbol4 = TerminalSymbol(RegularExpression("^[0123456789]*$")) 29 | symbol5 = TerminalSymbol(load_python_file('pydsl/contrib/grammar/cstring.py')) 30 | final1 = NonTerminalSymbol("storeexp") 31 | final2 = NonTerminalSymbol("retrieveexp") 32 | final3 = NonTerminalSymbol("exp") 33 | rule1 = Production([final1], (symbol1, symbol3, symbol5)) 34 | rule2 = Production([final2], (symbol2, symbol3, symbol4)) 35 | rule3 = Production([final3], [final1]) 36 | rule4 = Production([final3], [final2]) 37 | rulelist = (rule1, rule2, rule3, rule4, symbol1, symbol2, symbol3, symbol4, symbol5) 38 | productionset1 = BNFGrammar(final3, rulelist) 39 | 40 | #productionset2 definition 41 | symbola = TerminalSymbol(String("A")) 42 | symbolb = TerminalSymbol(String("B")) 43 | nonterminal = NonTerminalSymbol("res") 44 | rulea = Production ((nonterminal,), (symbola, NullSymbol(), symbolb)) 45 | productionset2 = BNFGrammar(nonterminal, (rulea, symbola, symbolb)) 46 | productionsetlr = strlist_to_production_set(leftrecursive) 47 | productionsetrr = strlist_to_production_set(rightrecursive) 48 | productionsetcr = strlist_to_production_set(centerrecursive) 49 | 50 | #arithmetic 51 | 52 | 53 | arithmetic=["E ::= E plus T | T", "T ::= T times F | F" ,"F ::= open_parenthesis E close_parenthesis | id", "id := String,123" , "plus := String,+", "times := String,*", "open_parenthesis := String,(","close_parenthesis := String,)"] 54 | productionset_arithmetic = strlist_to_production_set(arithmetic, start_symbol= "E") 55 | 56 | addition=["S ::= E","E ::= E plus F | F" ,"F ::= open_parenthesis E close_parenthesis | id", "id := String,123" , "plus := String,+", "open_parenthesis := String,(","close_parenthesis := String,)"] 57 | productionset_addition = strlist_to_production_set(addition) 58 | #tokenlist definition 59 | string1 = "S:a" 60 | string2 = "S:" 61 | string3 = "AB" 62 | string4 = "AAB" 63 | string5 = "ACB" 64 | dots = "....." 65 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/Date.bnf: -------------------------------------------------------------------------------- 1 | //Date 2 | #parser=descent 3 | 4 | S ::= dayofmonth separator number separator number 5 | separator := String,/ 6 | number := Word,integer,max,1 7 | dayofmonth := Word,DayOfMonth,max,1 8 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/Date.parsley: -------------------------------------------------------------------------------- 1 | separator = '/' 2 | number = digit{1, 2}:n -> int(''.join(n)) 3 | dayofmonth = number:n ?(DayOfMonth(n)) ->n 4 | expr=dayofmonth:d separator number:m separator number:y ->(d,m,y) 5 | 6 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/DayOfMonth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | #Copyright (C) 2008-2014 Nestor Arocha 5 | 6 | 7 | 8 | def matchFun(myinput): 9 | from collections import Iterable 10 | if isinstance(myinput, Iterable): 11 | myinput = "".join([str(x) for x in myinput]) 12 | strnumber = str(myinput) 13 | try: 14 | number = int(strnumber) 15 | except ValueError: 16 | return False 17 | if 0 < number < 32: 18 | return True 19 | return False 20 | 21 | iclass = "PythonGrammar" 22 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/Grammar2RecursiveDescentParserRecognizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | """Grammar 2 Recursive Descent Parser Recognizer 5 | First recipe of the book "Language implementation patterns 6 | 7 | grammar NestedNameList; 8 | list : '[' elements ']' ; // match bracketed list 9 | elements : element (',' element)* ; // match comma-separated list 10 | element : NAME | list ; // element is name or nested list 11 | NAME : ('a'..'z' |'A'..'Z' )+ ; // NAME is sequence of >=1 letter 12 | """ 13 | 14 | 15 | def matchFun(inputstr): 16 | def look_ahead(tl): 17 | if tl[0] == "[": 18 | return "list" 19 | elif tl[0] == ",": 20 | return "," 21 | 22 | def mlist(tl): 23 | if tl.pop(0) != "[": 24 | return False 25 | if not elements(tl): 26 | return False 27 | if tl.pop(0) != "]": 28 | return False 29 | return True 30 | 31 | def elements(tl): 32 | if not element(tl): 33 | return False 34 | while look_ahead(tl) == ",": 35 | tl.pop(0) 36 | if not element(tl): 37 | return False 38 | return True 39 | 40 | def element(tl): 41 | if look_ahead(tl) == "list": 42 | if not mlist(tl): 43 | return False 44 | else: 45 | if not name(tl): 46 | return False 47 | return True 48 | 49 | def name(tl): 50 | import re 51 | if not re.match("[a-zA-Z]", tl.pop(0)): 52 | return False 53 | while tl and re.match("[a-zA-Z]", tl[0]): 54 | tl.pop(0) 55 | return True 56 | 57 | inputlist = [x for x in inputstr] 58 | return element(inputlist) and not len(inputlist) 59 | 60 | 61 | iclass = "PythonGrammar" 62 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/HTMLTable.bnf: -------------------------------------------------------------------------------- 1 | //HTML table Grammar 2 | 3 | S ::= btable Trblock etable 4 | Trblock ::= btr Tdblock etr 5 | Tdblock ::= btd etd 6 | btable := String, 7 | etable := String,
8 | btr := String, 9 | etr := String, 10 | btd := String, 11 | etd := String, 12 | // identifier := Word,Integer,max,1 13 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/ImageFile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | #copyright (c) 2008-2013 Nestor Arocha 5 | 6 | """Image file recognizer""" 7 | 8 | def matchFun(input): 9 | content = input #assuming bytes 10 | import imghdr 11 | try: 12 | return bool(imghdr.what(None, content)) 13 | except: 14 | return False 15 | 16 | 17 | iclass = "PythonGrammar" 18 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/LogicalExpression.bnf: -------------------------------------------------------------------------------- 1 | //Logical Expression 2 | 3 | S ::= Expression 4 | Expression ::= identifier | op Expression cp | OperatorExpression 5 | OperatorExpression ::= not Expression | identifier RestExpression 6 | RestExpression ::= and Expression | or Expression 7 | op := String,( 8 | cp := String,) 9 | and := String,&& 10 | or := String,|| 11 | not := String,! 12 | identifier := Word,TrueFalse,min,1 13 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/MimeType.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | #copyright (c) 2008-2013 Nestor Arocha 5 | 6 | """Mime Type recognizer""" 7 | 8 | _mimelist = ["applicaiton/x-bytecode.python", 9 | "application/acad", 10 | "application/arj", 11 | "application/base64", 12 | "application/binhex", 13 | "application/binhex4", 14 | "application/book", 15 | "application/cdf", 16 | "application/clariscad", 17 | "application/commonground", 18 | "application/drafting", 19 | "application/dsptype", 20 | "application/dxf", 21 | "application/envoy", 22 | "application/excel", 23 | "application/fractals", 24 | "application/freeloader", 25 | "application/futuresplash", 26 | "application/gnutar", 27 | "application/groupwise", 28 | "application/hlp", 29 | "application/hta", 30 | "application/i-deas", 31 | "application/iges", 32 | "application/inf", 33 | "application/java", 34 | "application/java-byte-code", 35 | "application/lha", 36 | "application/lzx", 37 | "application/mac-binary", 38 | "application/macbinary", 39 | "application/mac-binhex", 40 | "application/mac-binhex40", 41 | "application/mac-compactpro", 42 | "application/marc", 43 | "application/mbedlet", 44 | "application/mcad", 45 | "application/mime", 46 | "application/mspowerpoint", 47 | "application/msword", 48 | "application/mswrite", 49 | "application/netmc", 50 | "application/octet-stream", 51 | "application/oda", 52 | "application/pdf", 53 | "application/pkcs10", 54 | "application/pkcs-12", 55 | "application/pkcs7-mime", 56 | "application/pkcs7-signature", 57 | "application/pkcs-crl", 58 | "application/pkix-cert", 59 | "application/pkix-crl", 60 | "application/plain", 61 | "application/postscript", 62 | "application/powerpoint", 63 | "application/pro_eng", 64 | "application/ringing-tones", 65 | "application/rtf", 66 | "application/sdp", 67 | "application/sea", 68 | "application/set", 69 | "application/sla", 70 | "application/smil", 71 | "application/solids", 72 | "application/sounder", 73 | "application/step", 74 | "application/streamingmedia", 75 | "application/toolbook", 76 | "application/vda", 77 | "application/vnd.fdf", 78 | "application/vnd.hp-hpgl", 79 | "application/vnd.hp-pcl", 80 | "application/vnd.ms-excel", 81 | "application/vnd.ms-pki.certstore", 82 | "application/vnd.ms-pki.pko", 83 | "application/vnd.ms-pki.seccat", 84 | "application/vnd.ms-pki.stl", 85 | "application/vnd.ms-powerpoint", 86 | "application/vnd.ms-project", 87 | "application/vnd.nokia.configuration-message", 88 | "application/vnd.nokia.ringing-tone", 89 | "application/vnd.rn-realmedia", 90 | "application/vnd.rn-realplayer", 91 | "application/vnd.wap.wmlc", 92 | "application/vnd.wap.wmlscriptc", 93 | "application/vnd.xara", 94 | "application/vocaltec-media-desc", 95 | "application/vocaltec-media-file", 96 | "application/wordperfect", 97 | "application/wordperfect6.0", 98 | "application/wordperfect6.1", 99 | "application/x-123", 100 | "application/x-aim", 101 | "application/x-authorware-bin", 102 | "application/x-authorware-map", 103 | "application/x-authorware-seg", 104 | "application/x-bcpio", 105 | "application/x-binary", 106 | "application/x-binhex40", 107 | "application/x-bsh", 108 | "application/x-bytecode.elisp (compiled elisp)", 109 | "application/x-bzip", 110 | "application/x-bzip2", 111 | "application/x-cdf", 112 | "application/x-cdlink", 113 | "application/x-chat", 114 | "application/x-cmu-raster", 115 | "application/x-cocoa", 116 | "application/x-compactpro", 117 | "application/x-compress", 118 | "application/x-compressed", 119 | "application/x-conference", 120 | "application/x-cpio", 121 | "application/x-cpt", 122 | "application/x-csh", 123 | "application/x-deepv", 124 | "application/x-director", 125 | "application/x-dvi", 126 | "application/x-elc", 127 | "application/x-envoy", 128 | "application/x-esrehber", 129 | "application/x-excel", 130 | "application/x-frame", 131 | "application/x-freelance", 132 | "application/x-gsp", 133 | "application/x-gss", 134 | "application/x-gtar", 135 | "application/x-gzip", 136 | "application/x-hdf", 137 | "application/x-helpfile", 138 | "application/x-httpd-imap", 139 | "application/x-ima", 140 | "application/x-internett-signup", 141 | "application/x-inventor", 142 | "application/x-ip2", 143 | "application/x-java-class", 144 | "application/x-java-commerce", 145 | "application/x-javascript", 146 | "application/x-koan", 147 | "application/x-ksh", 148 | "application/x-latex", 149 | "application/x-lha", 150 | "application/x-lisp", 151 | "application/x-livescreen", 152 | "application/x-lotus", 153 | "application/x-lotusscreencam", 154 | "application/x-lzh", 155 | "application/x-lzx", 156 | "application/x-macbinary", 157 | "application/x-mac-binhex40", 158 | "application/x-magic-cap-package-1.0", 159 | "application/x-mathcad", 160 | "application/x-meme", 161 | "application/x-midi", 162 | "application/x-mif", 163 | "application/x-mix-transfer", 164 | "application/xml", 165 | "application/x-mplayer2", 166 | "application/x-msexcel", 167 | "application/x-mspowerpoint", 168 | "application/x-navi-animation", 169 | "application/x-navidoc", 170 | "application/x-navimap", 171 | "application/x-navistyle", 172 | "application/x-netcdf", 173 | "application/x-newton-compatible-pkg", 174 | "application/x-nokia-9000-communicator-add-on-software", 175 | "application/x-omc", 176 | "application/x-omcdatamaker", 177 | "application/x-omcregerator", 178 | "application/x-pagemaker", 179 | "application/x-pcl", 180 | "application/x-pixclscript", 181 | "application/x-pkcs10", 182 | "application/x-pkcs12", 183 | "application/x-pkcs7-certificates", 184 | "application/x-pkcs7-certreqresp", 185 | "application/x-pkcs7-mime", 186 | "application/x-pkcs7-signature", 187 | "application/x-pointplus", 188 | "application/x-portable-anymap", 189 | "application/x-project", 190 | "application/x-qpro", 191 | "application/x-rtf", 192 | "application/x-sdp", 193 | "application/x-sea", 194 | "application/x-seelogo", 195 | "application/x-sh", 196 | "application/x-shar", 197 | "application/x-shockwave-flash", 198 | "application/x-sit", 199 | "application/x-sprite", 200 | "application/x-stuffit", 201 | "application/x-sv4cpio", 202 | "application/x-sv4crc", 203 | "application/x-tar", 204 | "application/x-tbook", 205 | "application/x-tcl", 206 | "application/x-tex", 207 | "application/x-texinfo", 208 | "application/x-troff", 209 | "application/x-troff-man", 210 | "application/x-troff-me", 211 | "application/x-troff-ms", 212 | "application/x-troff-msvideo", 213 | "application/x-ustar", 214 | "application/x-visio", 215 | "application/x-vnd.audioexplosion.mzz", 216 | "application/x-vnd.ls-xpix", 217 | "application/x-vrml", 218 | "application/x-wais-source", 219 | "application/x-winhelp", 220 | "application/x-wintalk", 221 | "application/x-world", 222 | "application/x-wpwin", 223 | "application/x-wri", 224 | "application/x-x509-ca-cert", 225 | "application/x-x509-user-cert", 226 | "application/x-zip-compressed", 227 | "application/zip", 228 | "audio/aiff", 229 | "audio/basic", 230 | "audio/it", 231 | "audio/make", 232 | "audio/make.my.funk", 233 | "audio/mid", 234 | "audio/midi", 235 | "audio/mod", 236 | "audio/mpeg", 237 | "audio/mpeg3", 238 | "audio/nspaudio", 239 | "audio/s3m", 240 | "audio/tsp-audio", 241 | "audio/tsplayer", 242 | "audio/vnd.qcelp", 243 | "audio/voc", 244 | "audio/voxware", 245 | "audio/wav", 246 | "audio/x-adpcm", 247 | "audio/x-aiff", 248 | "audio/x-au", 249 | "audio/x-gsm", 250 | "audio/x-jam", 251 | "audio/x-liveaudio", 252 | "audio/xm", 253 | "audio/x-mid", 254 | "audio/x-midi", 255 | "audio/x-mod", 256 | "audio/x-mpeg", 257 | "audio/x-mpeg-3", 258 | "audio/x-mpequrl", 259 | "audio/x-nspaudio", 260 | "audio/x-pn-realaudio", 261 | "audio/x-pn-realaudio-plugin", 262 | "audio/x-psid", 263 | "audio/x-realaudio", 264 | "audio/x-twinvq", 265 | "audio/x-twinvq-plugin", 266 | "audio/x-vnd.audioexplosion.mjuicemediafile", 267 | "audio/x-voc", 268 | "audio/x-wav", 269 | "chemical/x-pdb", 270 | "drawing/x-dwf (old)", 271 | "image/bmp", 272 | "image/cmu-raster", 273 | "image/fif", 274 | "image/florian", 275 | "image/g3fax", 276 | "image/gif", 277 | "image/ief", 278 | "image/jpeg", 279 | "image/jutvision", 280 | "image/naplps", 281 | "image/pict", 282 | "image/pjpeg", 283 | "image/png", 284 | "image/tiff", 285 | "image/vasa", 286 | "image/vnd.dwg", 287 | "image/vnd.fpx", 288 | "image/vnd.net-fpx", 289 | "image/vnd.rn-realflash", 290 | "image/vnd.rn-realpix", 291 | "image/vnd.wap.wbmp", 292 | "image/vnd.xiff", 293 | "image/xbm", 294 | "image/x-cmu-raster", 295 | "image/x-dwg", 296 | "image/x-icon", 297 | "image/x-jg", 298 | "image/x-jps", 299 | "image/x-niff", 300 | "image/x-pcx", 301 | "image/x-pict", 302 | "image/xpm", 303 | "image/x-portable-anymap", 304 | "image/x-portable-bitmap", 305 | "image/x-portable-graymap", 306 | "image/x-portable-greymap", 307 | "image/x-portable-pixmap", 308 | "image/x-quicktime", 309 | "image/x-rgb", 310 | "image/x-tiff", 311 | "image/x-windows-bmp", 312 | "image/x-xbitmap", 313 | "image/x-xbm", 314 | "image/x-xpixmap", 315 | "image/x-xwd", 316 | "image/x-xwindowdump", 317 | "i-world/i-vrml", 318 | "message/rfc822", 319 | "model/iges", 320 | "model/vnd.dwf", 321 | "model/vrml", 322 | "model/x-pov", 323 | "multipart/x-gzip", 324 | "multipart/x-ustar", 325 | "multipart/x-zip", 326 | "music/crescendo", 327 | "music/x-karaoke", 328 | "paleovu/x-pv", 329 | "text/asp", 330 | "text/css", 331 | "text/html", 332 | "text/mcf", 333 | "text/pascal", 334 | "text/plain", 335 | "text/richtext", 336 | "text/scriplet", 337 | "text/sgml", 338 | "text/tab-separated-values", 339 | "text/uri-list", 340 | "text/vnd.abc", 341 | "text/vnd.fmi.flexstor", 342 | "text/vnd.rn-realtext", 343 | "text/vnd.wap.wml", 344 | "text/vnd.wap.wmlscript", 345 | "text/webviewhtml", 346 | "text/x-asm", 347 | "text/x-audiosoft-intra", 348 | "text/x-c", 349 | "text/x-component", 350 | "text/x-fortran", 351 | "text/x-h", 352 | "text/x-java-source", 353 | "text/x-la-asf", 354 | "text/x-m", 355 | "text/xml", 356 | "text/x-pascal", 357 | "text/x-script", 358 | "text/x-script.csh", 359 | "text/x-script.elisp", 360 | "text/x-script.guile", 361 | "text/x-script.ksh", 362 | "text/x-script.lisp", 363 | "text/x-script.perl", 364 | "text/x-script.perl-module", 365 | "text/x-script.phyton", 366 | "text/x-script.rexx", 367 | "text/x-script.scheme", 368 | "text/x-script.sh", 369 | "text/x-script.tcl", 370 | "text/x-script.tcsh", 371 | "text/x-script.zsh", 372 | "text/x-server-parsed-html", 373 | "text/x-setext", 374 | "text/x-sgml", 375 | "text/x-speech", 376 | "text/x-uil", 377 | "text/x-uuencode", 378 | "text/x-vcalendar", 379 | "video/animaflex", 380 | "video/avi", 381 | "video/avs-video", 382 | "video/dl", 383 | "video/fli", 384 | "video/gl", 385 | "video/mpeg", 386 | "video/msvideo", 387 | "video/quicktime", 388 | "video/vdo", 389 | "video/vivo", 390 | "video/vnd.rn-realvideo", 391 | "video/vnd.vivo", 392 | "video/vosaic", 393 | "video/x-amt-demorun", 394 | "video/x-amt-showrun", 395 | "video/x-atomic3d-feature", 396 | "video/x-dl", 397 | "video/x-dv", 398 | "video/x-fli", 399 | "video/x-gl", 400 | "video/x-isvideo", 401 | "video/x-motion-jpeg", 402 | "video/x-mpeg", 403 | "video/x-mpeq2a", 404 | "video/x-ms-asf", 405 | "video/x-ms-asf-plugin", 406 | "video/x-msvideo", 407 | "video/x-qtc", 408 | "video/x-scm", 409 | "video/x-sgi-movie", 410 | "windows/metafile", 411 | "www/mime", 412 | "x-conference/x-cooltalk", 413 | "xgl/drawing", 414 | "xgl/movie", 415 | "x-music/x-midi", 416 | "x-world/x-3dmf", 417 | "x-world/x-svr", 418 | "x-world/x-vrml", 419 | "x-world/x-vrt"] 420 | 421 | 422 | def matchFun(input): 423 | content = str(input) 424 | return content in _mimelist 425 | 426 | iclass = "PythonGrammar" 427 | 428 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/SpanishID.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | #copyright (c) 2008-2013 Nestor Arocha 5 | 6 | """spanish id number grammar""" 7 | 8 | def matchFun(inputstr): 9 | dni = str(inputstr) 10 | if len(dni) != 9: 11 | return False 12 | string = 'TRWAGMYFPDXBNJZSQVHLCKE' 13 | resto = int(dni[:8]) % 23 14 | if dni[-1].lower() == string[resto].lower(): 15 | return True 16 | return False 17 | 18 | def propFun(inputstr, propertyname): 19 | dni = inputstr 20 | if propertyname == "number": 21 | return dni[:8] 22 | elif propertyname == "letter": 23 | return dni[:-1] 24 | else: 25 | return False 26 | 27 | iclass = "PythonGrammar" 28 | 29 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/TrueFalse.bnf: -------------------------------------------------------------------------------- 1 | S ::= true | false 2 | true := String,True 3 | false := String,False 4 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/TrueHTMLTable.bnf: -------------------------------------------------------------------------------- 1 | //HTML table Grammar 2 | 3 | S ::= btable Tablecontent etable 4 | Tablecontent ::= Trblock Tablecontent | Null 5 | Trblock ::= btr Trcontent etr 6 | Trcontent ::= Tdblock Trcontent | Null 7 | Tdblock ::= btd identifier etd 8 | btable := String, 9 | etable := String,
10 | btr := String, 11 | etr := String, 12 | btd := String, 13 | etd := String, 14 | identifier := Word,integer,max,1 15 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nesaro/pydsl/04039c3a09e409c349705ac82e7a5460a60a0cae/pydsl/contrib/grammar/__init__.py -------------------------------------------------------------------------------- /pydsl/contrib/grammar/calc_ply.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # calc.py 3 | # 4 | # A simple calculator with variables -- all in one file. 5 | # ----------------------------------------------------------------------------- 6 | 7 | from pydsl.exceptions import ParseError 8 | 9 | tokens = ( 10 | 'NAME','NUMBER', 11 | 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', 12 | 'LPAREN','RPAREN', 13 | ) 14 | 15 | # Tokens 16 | 17 | t_PLUS = r'\+' 18 | t_MINUS = r'-' 19 | t_TIMES = r'\*' 20 | t_DIVIDE = r'/' 21 | t_EQUALS = r'=' 22 | t_LPAREN = r'\(' 23 | t_RPAREN = r'\)' 24 | t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' 25 | 26 | def t_NUMBER(t): 27 | r'\d+' 28 | try: 29 | t.value = int(t.value) 30 | except ValueError: 31 | print("Integer value too large %d", t.value) 32 | t.value = 0 33 | return t 34 | 35 | # Ignored characters 36 | t_ignore = " \t" 37 | 38 | def t_newline(t): 39 | r'\n+' 40 | t.lexer.lineno += t.value.count("\n") 41 | 42 | def t_error(t): 43 | raise ParseError("unknown character", t.lexpos) 44 | print("Illegal character '%s'" % t.value[0]) 45 | t.lexer.skip(1) 46 | 47 | # Parsing rules 48 | 49 | precedence = ( 50 | ('left','PLUS','MINUS'), 51 | ('left','TIMES','DIVIDE'), 52 | ('right','UMINUS'), 53 | ) 54 | 55 | # dictionary of names 56 | names = { } 57 | 58 | def p_statement_assign(t): 59 | 'statement : NAME EQUALS expression' 60 | names[t[1]] = t[3] 61 | 62 | def p_statement_expr(t): 63 | 'statement : expression' 64 | t[0] = t[1] 65 | 66 | def p_expression_binop(t): 67 | '''expression : expression PLUS expression 68 | | expression MINUS expression 69 | | expression TIMES expression 70 | | expression DIVIDE expression''' 71 | if t[2] == '+' : t[0] = t[1] + t[3] 72 | elif t[2] == '-': t[0] = t[1] - t[3] 73 | elif t[2] == '*': t[0] = t[1] * t[3] 74 | elif t[2] == '/': t[0] = t[1] / t[3] 75 | 76 | def p_expression_uminus(t): 77 | 'expression : MINUS expression %prec UMINUS' 78 | t[0] = -t[2] 79 | 80 | def p_expression_group(t): 81 | 'expression : LPAREN expression RPAREN' 82 | t[0] = t[2] 83 | 84 | def p_expression_number(t): 85 | 'expression : NUMBER' 86 | t[0] = t[1] 87 | 88 | def p_expression_name(t): 89 | 'expression : NAME' 90 | try: 91 | t[0] = names[t[1]] 92 | except LookupError: 93 | raise ParseError("Undefined name",0) 94 | t[0] = 0 95 | 96 | def p_error(t): 97 | raise ParseError("Syntax error at", t.value) 98 | 99 | iclass="PLY" 100 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/cstring.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | #Copyright (C) 2008-2013 Nestor Arocha 5 | 6 | """Any string """ 7 | 8 | iclass="PythonGrammar" 9 | def matchFun(inputstr): 10 | try: 11 | str(inputstr) 12 | except UnicodeDecodeError: 13 | return False 14 | return True 15 | 16 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/example_ply.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | """Calculate the molecular weight given a molecular formula 4 | 5 | Parse the formula using PLY. 6 | 7 | See http://www.dalkescientific.com/writings/NBN/parsing_with_ply.html 8 | """ 9 | # ply_mw.py 10 | 11 | from ply.lex import TOKEN 12 | from pydsl.exceptions import ParseError 13 | 14 | 15 | ### Define the lexer 16 | 17 | tokens = ( 18 | "ATOM", 19 | "DIGITS", 20 | ) 21 | 22 | mw_table = { 23 | 'H': 1.00794, 24 | 'C': 12.001, 25 | 'Cl': 35.453, 26 | 'O': 15.999, 27 | 'S': 32.06, 28 | } 29 | 30 | 31 | # I don't want to duplicate the atom names so extract the 32 | # keys to make the lexer pattern. 33 | 34 | # Sort order is: 35 | # - alphabetically on first character, to make it easier 36 | # for a human to look at and debug any problems 37 | # 38 | # - then by the length of the symbol; two letters before 1 39 | # Needed because Python's regular expression matcher 40 | # uses "first match" not "longest match" rules. 41 | # For example, "C|Cl" matches only the "C" in "Cl" 42 | # The "-" in "-len(symbol)" is a trick to reverse the sort order. 43 | # 44 | # - then by the full symbol, to make it easier for people 45 | 46 | # (This is more complicated than needed; it's to show how 47 | # this approach can scale to all 100+ known and named elements) 48 | 49 | atom_names = sorted( 50 | mw_table.keys(), 51 | key = lambda symbol: (symbol[0], -len(symbol), symbol)) 52 | 53 | # Creates a pattern like: Cl|C|H|O|S 54 | atom_pattern = "|".join(atom_names) 55 | 56 | # Use a relatively new PLY feature to set the __doc__ 57 | # string based on a Python variable. 58 | @TOKEN(atom_pattern) 59 | def t_ATOM(t): 60 | t.value = mw_table[t.value] 61 | return t 62 | 63 | def t_DIGITS(t): 64 | r"\d+" 65 | t.value = int(t.value) 66 | return t 67 | 68 | def t_error(t): 69 | raise ParseError("unknown character", t.lexpos) 70 | 71 | 72 | ## Here's an example of using the lexer 73 | 74 | # data = "H2SO4" 75 | # 76 | # lex.input(data) 77 | # 78 | # for tok in iter(lex.token, None): 79 | # print tok 80 | 81 | ##### Define the grammar 82 | 83 | # The molecular weight of "" is 0.0 84 | def p_mw_empty(p): 85 | "mw : " 86 | p[0] = 0.0 87 | 88 | def p_mw_formula(p): 89 | "mw : formula" 90 | p[0] = p[1] 91 | 92 | 93 | def p_first_species_term(p): 94 | "formula : species" 95 | p[0] = p[1] 96 | 97 | def p_species_list(p): 98 | "formula : formula species" 99 | p[0] = p[1] + p[2] 100 | 101 | def p_species(p): 102 | "species : ATOM DIGITS" 103 | p[0] = p[1] * p[2] 104 | 105 | def p_species_default(p): 106 | "species : ATOM" 107 | p[0] = p[1] 108 | 109 | def p_error(p): 110 | raise ParseError("unexpected character", p.lexpos) 111 | 112 | iclass="PLY" 113 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/integerop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | #Copyright (C) 2008-2013 Nestor Arocha 5 | 6 | 7 | def propFun(input, property): 8 | if property == "Operator": 9 | return input[1] 10 | 11 | def matchFun(myinput, auxgrammardict): 12 | myinput = str(myinput) 13 | validoperators = ["+", "-", "*", "/"] 14 | operatorexists = False 15 | currentoperator = None 16 | for operator in validoperators: 17 | if operator in myinput: 18 | operatorexists = True 19 | currentoperator = operator 20 | break 21 | if not operatorexists: 22 | return False 23 | parts = myinput.split(currentoperator) 24 | if len(parts) != 2: 25 | return False 26 | for part in parts: 27 | if not auxgrammardict["integer"].check(part): 28 | return False 29 | return True 30 | 31 | auxdic = {"integer":"integer"} 32 | iclass = "PythonGrammar" 33 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/logline.bnf: -------------------------------------------------------------------------------- 1 | S ::= ip space hyphen space hyphen space Datetime space quotation Command quotation space int space int space quotation Referer quotation space quotation Useragent quotation 2 | Datetime ::= opensquare DayOfMonth slash Month slash Year colon Hour colon Minute colon Second space Plusminus int closesquare 3 | Command ::= get | post | hyphen 4 | Plusminus ::= plus | hyphen 5 | Referer ::= string 6 | Useragent ::= string 7 | DayOfMonth ::= int 8 | Month ::= int 9 | Year ::= int 10 | Hour ::= int 11 | Minute ::= int 12 | Second ::= int 13 | 14 | space := Word,space,max 15 | plus := String,+ 16 | quotation := String," 17 | colon := String,: 18 | hyphen := String,- 19 | get := String,GET 20 | slash := String,/ 21 | post := String,POST 22 | int := Word,integer,max 23 | ip := Word,ipv4,max 24 | opensquare := String,[ 25 | closesquare := String,] 26 | string := Word,characters,max 27 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/mongoquery.bnf: -------------------------------------------------------------------------------- 1 | S :: Query 2 | Query : { identifier : subexpression } | { $or : querylist } 3 | querylist: [ Query *(,Query) ] 4 | valuelist: [ value *(,value) ] 5 | sublist: [ subexpression *(,subexpression) ] 6 | subexpression: inexpression | orexpression | ninexpression | elemMatchexpression | modexpression | existsexpression | typeexpression | query | value 7 | existsexpression: { $exists: bool } 8 | inexpression: { $in: valuelist} 9 | ninexpression: { $nin: valuelist } 10 | orexpression: { $or: sublist} 11 | elemMatchexpression: { $elemMatch : { identifier:value * (, identifier:value) } } 12 | modexpression: { $mod: [int, int] } 13 | typeexpression: { $type: int } 14 | bool: true | false 15 | value: str | int | float | bool | / regexp / 16 | -------------------------------------------------------------------------------- /pydsl/contrib/grammar/protocol.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | #copyright (c) 2008-2011 Nestor Arocha 5 | 6 | """Protocols""" 7 | 8 | def matchFun(inputd): 9 | inputs = str(inputd) 10 | return inputs.find("://") != -1 11 | 12 | def propFun(inputd, propertyname): 13 | inputs = str(inputd) 14 | protocol, rest = inputs.split("://") 15 | if propertyname == "protocol": 16 | return protocol 17 | if "?" in rest: 18 | path, options = rest.split("?") 19 | if propertyname == "path": 20 | return path 21 | elif propertyname == "options": 22 | return options 23 | else: 24 | if propertyname == "path": 25 | return rest 26 | 27 | iclass = "PythonGrammar" 28 | 29 | 30 | -------------------------------------------------------------------------------- /pydsl/contrib/mongogrammar.py: -------------------------------------------------------------------------------- 1 | spec = {"a":1,"b":2} 2 | fullspec = {"a":{"$type":"integer"},"b":{"$type":"integer"}} 3 | -------------------------------------------------------------------------------- /pydsl/contrib/regexps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | res={ 4 | "australian_phone":{"regexp":"^(\+\d{2}[ \-]{0,1}){0,1}(((\({0,1}[ \-]{0,1})0{0,1}\){0,1}[2|3|7|8]{1}\){0,1}[ \-]*(\d{4}[ \-]{0,1}\d{4}))|(1[ \-]{0,1}(300|800|900|902)[ \-]{0,1}((\d{6})|(\d{3}[ \-]{0,1}\d{3})))|(13[ \-]{0,1}([\d \-]{5})|((\({0,1}[ \-]{0,1})0{0,1}\){0,1}4{1}[\d \-]{8,10})))$"}, 5 | "australian_postcode":{"regexp":"(0[289][0-9]{2})|([1345689][0-9]{3})|(2[0-8][0-9]{2})|(290[0-9])|(291[0-4])|(7[0-4][0-9]{2})|(7[8-9][0-9]{2})"}, 6 | "austrian_vat":{"regexp":"^(AT){0,1}[U]{0,1}[0-9]{8}$"}, 7 | "belgian_vat":{"regexp":"^(BE)[0-1]{1}[0-9]{9}$|^((BE)|(BE ))[0-1]{1}(\d{3})([.]{1})(\d{3})([.]{1})(\d{3})"}, 8 | "bic":{"regexp":"^([a-zA-Z]){4}(AF|AX|AL|DZ|AS|AD|AO|AI|AQ|AG|AR|AM|AW|AU|AZ|BS|BH|BD|BB|BY|BE|BZ|BJ|BM|BT|BO|BA|BW|BV|BR|IO|BN|BG|BF|BI|KH|CM|CA|CV|KY|CF|TD|CL|CN|CX|CC|CO|KM|CG|CD|CK|CR|CI|HR|CU|CY|CZ|DK|DJ|DM|DO|EC|EG|SV|GQ|ER|EE|ET|FK|FO|FJ|FI|FR|GF|PF|TF|GA|GM|GE|DE|GH|GI|GR|GL|GD|GP|GU|GT|GG|GN|GW|GY|HT|HM|VA|HN|HK|HU|IS|IN|ID|IR|IQ|IE|IM|IL|IT|JM|JP|JE|JO|KZ|KE|KI|KP|KR|KW|KG|LA|LV|LB|LS|LR|LY|LI|LT|LU|MO|MK|MG|MW|MY|MV|ML|MT|MH|MQ|MR|MU|YT|MX|FM|MD|MC|MC|MN|ME|MS|MA|MZ|MM|MA|NR|NP|NL|AN|NC|NZ|NI|NE|NG|NU|NF|MP|NO|OM|PK|PW|PS|PA|PG|PY|PE|PH|PN|PL|PT|PR|QA|RE|RO|RU|RW|SH|KN|LC|PM|VC|WS|SM|ST|SA|SN|RS|SC|SL|SG|SK|SI|SB|SO|ZA|GS|ES|LK|SD|SR|SJ|SZ|SE|CH|SY|TW|TJ|TZ|TH|TL|TG|TK|TO|TT|TN|TR|TM|TC|TV|UG|UA|AE|GB|US|UM|UY|UZ|VU|VE|VN|VG|VI|WF|EH|YE|ZM|ZW)([0-9a-zA-Z]){2}([0-9a-zA-Z]{3})$"}, 9 | "binary":{"regexp":"^[01]*$"}, 10 | "brainfuck":{"regexp":"^(-|<|>|\.|,|\+|\[|\])+$"}, 11 | "bulgarian_vat":{"regexp":"^(BG){0,1}([0-9]{9}|[0-9]{10})$"}, 12 | "camelcase":{"regexp":"^[A-Z][a-z]+([A-Z][a-z]+)+$"}, 13 | "canadian_postcode":{"regexp":"^[ABCEGHJKLMNPRSTVXYabceghjklmnprstvxy]{1}\d{1}[A-Za-z]{1}\d{1}[A-Za-z]{1}\d{1}$"}, 14 | "characters":{"regexp":"^[A-z]+$"}, 15 | "color_code":{"regexp":"^#(\d{6})|^#([A-F]{6})|^#([A-F]|[0-9]){6}"}, 16 | "coordinate":{"regexp":"^\d{1,2}(\.\d*)?[NS] 1?\d{1,2}(\.\d*)?[EW]$"}, 17 | "credit_card":{"regexp":"^(\d{4}-){3}\d{4}$|^(\d{4} ){3}\d{4}$|^\d{16}$"}, 18 | "dms_coordinate":{"regexp":"[0-9]{1,2}[:|°][0-9]{1,2}[:|'](?:\b[0-9]+(?:\.[0-9]*)?|\.[0-9]+\b)\"?[N|S|E|W]"}, 19 | "dutch_postcode":{"regexp":"^[1-9]{1}[0-9]{3}\s?[a-zA-Z]{2}$"}, 20 | "email":{"regexp":"^(?P[A-Z0-9._%+-]+)@(?P[A-Z0-9.-]+\.[A-Z]{2,4})$","flags":"i"}, 21 | "FileFilter":{"regexp":"^([A-z]|[*?.])+$"}, 22 | "float":{"regexp":"^[123456789][01234567890]*\.[0123456789]*$"}, 23 | "fqdn":{"regexp":"^(?=^.{1,254}$)(^(?:(?!\.|-)([a-z0-9\-\*]{1,63}|([a-z0-9\-]{1,62}[a-z0-9]))\.)+(?:[a-z]{2,})$)$"}, 24 | "german_postcode":{"regexp":"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$"}, 25 | "hex":{"regexp":"^[0-9a-fA-F]*$"}, 26 | "Identifier":{"regexp":"^[A-Za-z][_A-Za-z0-9]*$"}, 27 | "indian_mobile_2":{"regexp":"^((\+){0,1}91(\s){0,1}(\-){0,1}(\s){0,1}){0,1}9[0-9](\s){0,1}(\-){0,1}(\s){0,1}[1-9]{1}[0-9]{7}$"}, 28 | "indian_mobile":{"regexp":"^[89][0-9]{9}"}, 29 | "indian_postcode":{"regexp":"^[1-9]{3}\s{0,1}[0-9]{3}$"}, 30 | "integer":{"regexp":"^[0123456789]*$"}, 31 | "ipv4_2":{"regexp":"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"}, 32 | "ipv4":{"regexp":"^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"}, 33 | "ipv6":{"regexp":"^((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))$"}, 34 | "isbn":{"regexp":"^((978[\--– ])?[0-9][0-9\--– ]{10}[\--– ][0-9xX])|((978)?[0-9]{9}[0-9Xx])$"}, 35 | "iso_8601":{"regexp":"^(?(?\d{4})-(?\d{2})-(?\d{2}))(?:T(?