├── src ├── GrammarNode │ ├── LeafInterface.php │ ├── NodeInterface.php │ ├── BranchInterface.php │ ├── BaseNode.php │ ├── BacktraceNode.php │ ├── BranchExtraCondition.php │ ├── TextS.php │ ├── ItemRestrictions.php │ ├── BranchDecorator.php │ ├── ParameterNode.php │ ├── BranchFactory.php │ ├── Text.php │ ├── ErrorTrackDecorator.php │ ├── Decorator.php │ ├── WhitespaceNegativeContextCheck.php │ ├── PredefinedSimpleString.php │ ├── PEGBranch.php │ ├── BranchStringCondition.php │ ├── AnyText.php │ ├── PredefinedString.php │ ├── LeafTime.php │ ├── NaiveBranch.php │ ├── WhitespaceContextCheck.php │ ├── Regex.php │ ├── ParametrizedNode.php │ ├── Choice.php │ ├── Lookahead.php │ ├── Branch.php │ ├── Series.php │ ├── Unorder.php │ └── Numeric.php ├── Exception.php ├── ParserAwareInterface.php ├── Extension │ ├── ItemRestrictions │ │ ├── ItemRestrictionInterface.php │ │ ├── ItemRestrictionNot.php │ │ ├── FollowedBy.php │ │ ├── ItemRestrictionOr.php │ │ ├── ItemRestrictionAnd.php │ │ ├── Is.php │ │ └── Contain.php │ ├── Regex.php │ ├── Time.php │ ├── Text.php │ ├── TextNode.php │ ├── StringObject.php │ ├── Base.php │ ├── RuleCondition.php │ ├── Choice.php │ ├── Unorder.php │ ├── SequenceItem.php │ ├── Lookahead.php │ ├── WhiteCharactersContext.php │ ├── Series.php │ ├── ParametrizedNode.php │ ├── ItemRestrictions.php │ ├── ExtensionInterface.php │ └── Integer.php ├── SyntaxTreeNode │ ├── LeafTime.php │ ├── Root.php │ ├── Numeric.php │ ├── PredefinedString.php │ ├── Base.php │ ├── Series.php │ └── Leaf.php ├── Examples │ ├── BooleanExpressionParser.php │ ├── CSVParser.php │ ├── ArithmeticExpressionParser.php │ ├── JSONParser.php │ ├── YamlLikeIndentationParser.php │ └── JSONFormater.php ├── ParsingException.php └── GrammarNodeCopier.php ├── phpunit.xml.dist ├── .travis.yml ├── .gitattributes ├── composer.json ├── TODO.txt ├── LICENSE ├── tests ├── ParsedNodes │ ├── SeriesTest.php │ ├── BranchTest.php │ ├── PredefinedStringTest.php │ └── ParserNodeTest.php ├── Examples │ ├── JSONFormaterTest.php │ ├── CSVParserTest.php │ ├── ArithmeticExpressionParserTest.php │ ├── PostfixToInfixNotationTranslatorTest.php │ ├── JSONParserTest.php │ ├── BooleanExpressionParserTest.php │ └── YamlLikeIndentationParserTest.php ├── Extension │ ├── ItemRestrictions │ │ ├── IsTest.php │ │ └── ContainTest.php │ ├── TextTest.php │ ├── PredefinedStringTest.php │ ├── WhiteCharactersTest.php │ ├── TimeTest.php │ ├── IntegerTest.php │ ├── ChoiceTest.php │ ├── UnorderTest.php │ ├── RuleConditionTest.php │ ├── SeriesTest.php │ ├── LookaheadTest.php │ └── ParametrizedNodeTest.php ├── GrammarNodes │ └── GrammarNodeNumericTest.php └── Util │ └── RegexTest.php └── .gitignore /src/GrammarNode/LeafInterface.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ./tests/ 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/GrammarNode/NodeInterface.php: -------------------------------------------------------------------------------- 1 | child = $child; 12 | } 13 | 14 | public function check($string, $fromIndex, $toIndex, $node) 15 | { 16 | return !$this->child->check($string, $fromIndex, $toIndex, $node); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/Extension/ItemRestrictions/FollowedBy.php: -------------------------------------------------------------------------------- 1 | grammarNode = $grammarNode; 12 | } 13 | 14 | public function check($string, $fromIndex, $toIndex, $node) 15 | { 16 | return (bool)$this->grammarNode->rparse($string, $toIndex, []); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "farafiri/php-parsing-tool", 3 | "description": "library for parsing", 4 | "license": "MIT", 5 | "require" : { 6 | "php": "^8.0" 7 | }, 8 | "require-dev": { 9 | "phpunit/phpunit": "^10.0" 10 | }, 11 | "autoload": { 12 | "psr-4": { 13 | "ParserGenerator\\": "src/" 14 | } 15 | }, 16 | "autoload-dev": { 17 | "psr-4": { 18 | "ParserGenerator\\Tests\\": "tests/" 19 | } 20 | }, 21 | "authors": [ 22 | { 23 | "name": "farafiri" 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /src/Extension/Regex.php: -------------------------------------------------------------------------------- 1 | node = $node; 12 | $this->tracer = $tracer; 13 | } 14 | 15 | public function rparse($string, $fromIndex, $restrictedEnd) 16 | { 17 | if ((int) $fromIndex === $this->tracer->index) { 18 | $this->tracer->addBacktrace(debug_backtrace()); 19 | }; 20 | 21 | return $this->node->rparse($string, $fromIndex, $restrictedEnd); 22 | } 23 | } 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/Extension/Time.php: -------------------------------------------------------------------------------- 1 | getSubnode(1), $options['ignoreWhitespaces']); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/Extension/ItemRestrictions/ItemRestrictionOr.php: -------------------------------------------------------------------------------- 1 | children = $children; 12 | } 13 | 14 | public function check($string, $fromIndex, $toIndex, $node) 15 | { 16 | foreach ($this->children as $child) { 17 | if ($child->check($string, $fromIndex, $toIndex, $node)) { 18 | return true; 19 | } 20 | } 21 | 22 | return false; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/Extension/ItemRestrictions/ItemRestrictionAnd.php: -------------------------------------------------------------------------------- 1 | children = $children; 12 | } 13 | 14 | public function check($string, $fromIndex, $toIndex, $node) 15 | { 16 | foreach ($this->children as $child) { 17 | if (!$child->check($string, $fromIndex, $toIndex, $node)) { 18 | return false; 19 | } 20 | } 21 | 22 | return true; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/GrammarNode/BranchExtraCondition.php: -------------------------------------------------------------------------------- 1 | node->rparse($string, $fromIndex, $restrictedEnd)) { 10 | if ($this->check($string, $fromIndex, $newResult['offset'], $newResult['node'])) { 11 | return $newResult; 12 | } 13 | $restrictedEnd[$newResult['offset']] = $newResult['offset']; 14 | } 15 | 16 | return false; 17 | } 18 | 19 | abstract public function check($string, $fromIndex, $toIndex, $node); 20 | } -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | /** 2 | * TODO: 3 | * -error track 4 | * -comments in grammar 5 | * -think about GrammarExtensionComment 6 | * -whitespaces support : (string) $parser->parse(' x y z ') should result in ' x y z ' not 'xyz' 7 | * -template nodes: sequence :=> ?n x sequence 8 | * :=> !n '' 9 | * where x is a node and n and integer 10 | * -think about lookbehind 11 | * -predefined node: text 12 | * -(!! after modifications) in context of one rule back references should work eg: [:=> /./+ /./* $1] schould easy get text abacab as ['ab', 'ac', 'ab'] 13 | * -varibles: gnNode (grammarNode) astNode(abstract syntax tree node) 14 | * -add namespace 15 | * 16 | * DONE: 17 | * -case insensitive: option for parser. with this "x" matches "X" 18 | * -lookaround: !"abcd" "abc" 19 | */ -------------------------------------------------------------------------------- /src/GrammarNode/TextS.php: -------------------------------------------------------------------------------- 1 | str)) == $this->str || 10 | ($this->str === '' && strlen($string) === $fromIndex)) { 11 | $endPos = $fromIndex + strlen($this->str); 12 | preg_match('/\s*/', $string, $match, 0, $endPos); 13 | $endPos += strlen($match[0]); 14 | if (!isset($restrictedEnd[$endPos])) { 15 | $node = new \ParserGenerator\SyntaxTreeNode\Leaf($this->str, $match[0]); 16 | return ['node' => $node, 'offset' => $endPos]; 17 | } 18 | } 19 | 20 | return false; 21 | } 22 | } -------------------------------------------------------------------------------- /src/GrammarNode/ItemRestrictions.php: -------------------------------------------------------------------------------- 1 | condition = $condition; 13 | } 14 | 15 | public function rparse($string, $fromIndex, $restrictedEnd) 16 | { 17 | while ($newResult = $this->node->rparse($string, $fromIndex, $restrictedEnd)) { 18 | if ($this->condition->check($string, $fromIndex, $newResult['offset'], $newResult['node'])) { 19 | return $newResult; 20 | } 21 | $restrictedEnd[$newResult['offset']] = $newResult['offset']; 22 | } 23 | 24 | return false; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/SyntaxTreeNode/LeafTime.php: -------------------------------------------------------------------------------- 1 | content = $content; 19 | $this->afterContent = $afterContent; 20 | $this->timeData = $timeData; 21 | } 22 | 23 | public function getValue() 24 | { 25 | $result = new \DateTime(); 26 | $result->setDate($this->timeData['year'], $this->timeData['month'], $this->timeData['day']); 27 | $result->setTime($this->timeData['hour'] ?: 0, $this->timeData['minute'] ?: 0, $this->timeData['second'] ?: 0); 28 | 29 | return $result; 30 | } 31 | } -------------------------------------------------------------------------------- /src/GrammarNode/BranchDecorator.php: -------------------------------------------------------------------------------- 1 | node->setParser($parser); 10 | } 11 | 12 | public function getParser() 13 | { 14 | return $this->node->getParser(); 15 | } 16 | 17 | public function setNode($node) 18 | { 19 | return $this->node->setNode($node); 20 | } 21 | 22 | public function getNode() 23 | { 24 | return $this->node->getNode(); 25 | } 26 | 27 | public function getNodeName() 28 | { 29 | return $this->node->getNodeName(); 30 | } 31 | 32 | public function __toString() 33 | { 34 | return $this->getNodeName(); 35 | } 36 | } -------------------------------------------------------------------------------- /src/Extension/ItemRestrictions/Is.php: -------------------------------------------------------------------------------- 1 | grammarNode = $grammarNode; 12 | } 13 | 14 | public function check($string, $fromIndex, $toIndex, $node) 15 | { 16 | $restrictedEnds = []; 17 | while (true) { 18 | $parsedNode = $this->grammarNode->rparse($string, $fromIndex, $restrictedEnds); 19 | 20 | if (!$parsedNode) { 21 | return false; 22 | } elseif ($parsedNode['offset'] === $toIndex) { 23 | return true; 24 | } else { 25 | $restrictedEnds[$parsedNode['offset']] = $parsedNode['offset']; 26 | } 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/Extension/Text.php: -------------------------------------------------------------------------------- 1 | getNS(null, false)] = [ 12 | [ 13 | 'text', 14 | ], 15 | ]; 16 | 17 | return parent::extendGrammar($grammarGrammar); 18 | } 19 | 20 | protected function getNS($node = '', $addColon = true) 21 | { 22 | return ($addColon ? ':' : '') . static::_NAMESPACE . ($node ? '_' . $node : ''); 23 | } 24 | 25 | protected function getGrammarGrammarSequence() 26 | { 27 | return [$this->getNS('')]; 28 | } 29 | 30 | protected function _buildSequenceItem(&$grammar, $sequenceItem, $grammarParser, $options) 31 | { 32 | return new \ParserGenerator\GrammarNode\AnyText($options); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/Extension/TextNode.php: -------------------------------------------------------------------------------- 1 | getSubnode(0)->getValue()); 16 | return new \ParserGenerator\GrammarNode\Regex($regex, $options['ignoreWhitespaces'], $options['caseInsensitive']); 17 | } 18 | 19 | if (!$options['ignoreWhitespaces']) { 20 | return new \ParserGenerator\GrammarNode\Text($sequenceItem->getSubnode(0)->getValue()); 21 | } 22 | 23 | return new \ParserGenerator\GrammarNode\TextS($sequenceItem->getSubnode(0)->getValue()); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/GrammarNode/ParameterNode.php: -------------------------------------------------------------------------------- 1 | index = $index; 16 | $this->branchName = $branchName; 17 | $this->parameterName = $parameterName; 18 | } 19 | 20 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 21 | { 22 | throw new Exception("this function should be never called on this node type"); 23 | } 24 | 25 | public function getIndex() 26 | { 27 | return $this->index; 28 | } 29 | 30 | public function getBranchName() 31 | { 32 | return $this->branchName; 33 | } 34 | 35 | public function getParameterName() 36 | { 37 | return $this->parameterName; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Rafał Fabiański 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /src/GrammarNode/BranchFactory.php: -------------------------------------------------------------------------------- 1 | getGrammar(); 17 | 18 | parent::__construct($grammar, [ 19 | 'caseInsensitive' => true, 20 | 'ignoreWhitespaces' => true, 21 | ]); 22 | } 23 | 24 | public function getGrammar(): string 25 | { 26 | return <<<'GRAMMAR' 27 | start :=> exprOr. 28 | 29 | tokenKeyword :=> /[^\s"'()]+/. 30 | tokenAnd :=> 'and'. 31 | tokenOr :=> 'or'. 32 | tokenNot :=> 'not'. 33 | 34 | exprOr :=> exprAnd (tokenOr exprAnd)*. 35 | exprAnd :=> expr (tokenAnd expr)*. 36 | expr :=> tokenNot? (tokenKeyword | string) 37 | :=> tokenNot? '(' exprOr ')'. 38 | GRAMMAR; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/SyntaxTreeNode/Root.php: -------------------------------------------------------------------------------- 1 | beforeContent = $beforeContent; 13 | } 14 | 15 | public function setBeforeContent($newValue) 16 | { 17 | $this->beforeContent = $newValue; 18 | return $this; 19 | } 20 | 21 | public function getBeforeContent() 22 | { 23 | return $this->beforeContent; 24 | } 25 | 26 | public function toString($mode = Base::TO_STRING_NO_WHITESPACES) 27 | { 28 | return ($mode == Base::TO_STRING_ORIGINAL ? $this->beforeContent : '') . parent::toString($mode); 29 | } 30 | 31 | public static function createFromPrototype(\ParserGenerator\SyntaxTreeNode\Branch $prototype) 32 | { 33 | return new self($prototype->type, $prototype->detailType, $prototype->subnodes); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/SyntaxTreeNode/Numeric.php: -------------------------------------------------------------------------------- 1 | base = $base; 13 | } 14 | 15 | public function getBase() 16 | { 17 | return $this->base; 18 | } 19 | 20 | public function getFixedCharacters() 21 | { 22 | $str = str_replace(['0x', '0b'], ['', ''], $this->content); 23 | 24 | if (substr($str, 0, 1) === '-') { 25 | $str = substr($str, 1); 26 | } 27 | 28 | if ($this->base === 8) { 29 | $str = substr($str, 1); 30 | } 31 | 32 | if (substr($str, 0, 1) === '0' && strlen($str) > 1) { 33 | return strlen($str); 34 | } else { 35 | return 0; 36 | } 37 | } 38 | 39 | public function getValue() 40 | { 41 | $str = str_replace(['0x', '0b'], ['', ''], $this->content); 42 | return intval($str, $this->base); 43 | } 44 | } -------------------------------------------------------------------------------- /src/GrammarNode/Text.php: -------------------------------------------------------------------------------- 1 | str = $str; 15 | } 16 | 17 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 18 | { 19 | if (substr($string, $fromIndex, strlen($this->str)) === $this->str || 20 | ($this->str === '' && strlen($string) === $fromIndex)) { 21 | $endPos = $fromIndex + strlen($this->str); 22 | if (!isset($restrictedEnd[$endPos])) { 23 | return ['node' => new \ParserGenerator\SyntaxTreeNode\Leaf($this->str), 'offset' => $endPos]; 24 | } 25 | } 26 | 27 | return false; 28 | } 29 | 30 | public function getString() 31 | { 32 | return $this->str; 33 | } 34 | 35 | public function __toString() 36 | { 37 | return '"' . addslashes($this->str) . '"'; 38 | } 39 | } -------------------------------------------------------------------------------- /src/ParsingException.php: -------------------------------------------------------------------------------- 1 | index = $index; 24 | $this->expected = $expected; 25 | $this->parsed = $parsed; 26 | } 27 | 28 | public function getIndex(): int 29 | { 30 | return $this->index; 31 | } 32 | 33 | /** 34 | * @return ParserGenerator\GrammarNode\BaseNode[] 35 | */ 36 | public function getExpected() 37 | { 38 | return $this->expected; 39 | } 40 | 41 | public function getParsed(): string 42 | { 43 | return $this->parsed; 44 | } 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /src/GrammarNode/ErrorTrackDecorator.php: -------------------------------------------------------------------------------- 1 | node->rparse($string, $fromIndex, $restrictedEnd); 19 | 20 | if (!$result && $fromIndex > $this->maxCheck) { 21 | $this->maxCheck = $fromIndex; 22 | } 23 | 24 | return $result; 25 | } 26 | 27 | public function getMaxCheck() 28 | { 29 | return $this->maxCheck === -1 ? null : $this->maxCheck; 30 | } 31 | 32 | public function setMaxCheck($maxCheck) 33 | { 34 | $this->maxCheck = $maxCheck; 35 | } 36 | 37 | public function reset() 38 | { 39 | $this->maxCheck = -1; 40 | } 41 | 42 | public function copy($copyCallback) 43 | { 44 | $result = parent::copy($copyCallback); 45 | $result->maxCheck = -1; 46 | return $result; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/GrammarNode/Decorator.php: -------------------------------------------------------------------------------- 1 | node = $node; 12 | } 13 | 14 | public function rparse($string, $fromIndex, $restrictedEnd) 15 | { 16 | return $this->node->rparse($string, $fromIndex, $restrictedEnd); 17 | } 18 | 19 | public static function undecorate($node) 20 | { 21 | while ($node instanceof self) { 22 | $node = $node->node; 23 | } 24 | 25 | return $node; 26 | } 27 | 28 | public function __toString() 29 | { 30 | return (string)$this->node; 31 | } 32 | 33 | public function getDecoratedNode() 34 | { 35 | return $this->node; 36 | } 37 | 38 | public function copy($copyCallback) 39 | { 40 | $copy = clone $this; 41 | $copy->node = $copyCallback($this->node); 42 | return $copy; 43 | } 44 | 45 | public function getNodeName() 46 | { 47 | return $this->node->getNodeName(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/SyntaxTreeNode/PredefinedString.php: -------------------------------------------------------------------------------- 1 | escapedByRepetition = $escapedByRepetition; 13 | } 14 | 15 | public function getValue() 16 | { 17 | if ($this->escapedByRepetition) { 18 | $startChar = substr($this->content, 0, 1); 19 | return str_replace($startChar . $startChar, $startChar, substr($this->content, 1, -1)); 20 | } else { 21 | return stripcslashes(substr($this->content, 1, -1)); 22 | } 23 | } 24 | 25 | public function getPHPValue() 26 | { 27 | if (substr($this->content, 0, 1) === '"') { 28 | return stripcslashes(substr(str_replace("\\'", "\\\\'", $this->content), 1, -1)); 29 | } else { 30 | return str_replace(['\\\\', '\\\''], ['\\', '\''], substr($this->content, 1, -1)); 31 | } 32 | } 33 | } -------------------------------------------------------------------------------- /src/SyntaxTreeNode/Base.php: -------------------------------------------------------------------------------- 1 | getRightLeaf()->afterContent = $newValue; 32 | return $this; 33 | } 34 | 35 | public function getAfterContent() 36 | { 37 | return $this->getRightLeaf()->afterContent; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/Extension/ItemRestrictions/Contain.php: -------------------------------------------------------------------------------- 1 | grammarNode = $grammarNode; 12 | } 13 | 14 | public function check($string, $fromIndex, $toIndex, $node) 15 | { 16 | for ($currentIndex = $fromIndex; $currentIndex < $toIndex; $currentIndex++) { 17 | $restrictedEnds = []; 18 | while (true) { 19 | $parsedNode = $this->grammarNode->rparse($string, $currentIndex, $restrictedEnds); 20 | 21 | if (!$parsedNode) { 22 | break; 23 | } 24 | 25 | $offset = $parsedNode['offset'] - strlen($parsedNode['node']->getRightLeaf()->getAfterContent()); 26 | if ($offset > $toIndex) { 27 | $restrictedEnds[$offset] = $offset; 28 | } else { 29 | return true; 30 | } 31 | } 32 | } 33 | 34 | return false; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/GrammarNode/WhitespaceNegativeContextCheck.php: -------------------------------------------------------------------------------- 1 | char = $char; 15 | } 16 | 17 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 18 | { 19 | if (!isset($restrictedEnd[$fromIndex])) { 20 | $index = $fromIndex; 21 | while (--$index >= 0 && in_array(substr($string, $index, 1), self::$whiteCharacters, true)) { 22 | if ($this->char === null || substr($string, $index, 1) === $this->char) { 23 | return false; 24 | } 25 | } 26 | 27 | return ['node' => new \ParserGenerator\SyntaxTreeNode\Leaf(''), 'offset' => $fromIndex]; 28 | } 29 | 30 | return false; 31 | } 32 | 33 | public function __toString() 34 | { 35 | return (string) $this->char; 36 | } 37 | } -------------------------------------------------------------------------------- /src/Extension/StringObject.php: -------------------------------------------------------------------------------- 1 | getSubnode(1) ? (string)$sequenceItem->getSubnode(1) : 'default'; 18 | 19 | switch ($type) { 20 | case "default": 21 | return new \ParserGenerator\GrammarNode\PredefinedString($options['ignoreWhitespaces'], ["'", '"']); 22 | 23 | case "apostrophe": 24 | return new \ParserGenerator\GrammarNode\PredefinedString($options['ignoreWhitespaces'], ["'"]); 25 | 26 | case "quotation": 27 | return new \ParserGenerator\GrammarNode\PredefinedString($options['ignoreWhitespaces'], ['"']); 28 | 29 | case "simple": 30 | return new \ParserGenerator\GrammarNode\PredefinedSimpleString($options['ignoreWhitespaces']); 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /tests/ParsedNodes/SeriesTest.php: -------------------------------------------------------------------------------- 1 | /\d+/+",".'); 20 | //var_Dump($x->parse('8,12,4')); 21 | 22 | $seriesNode = $x->parse('8,12,4')->getSubnode(0); 23 | 24 | $this->assertTrue($seriesNode instanceof \ParserGenerator\SyntaxTreeNode\Series); 25 | $this->assertEquals([ 26 | new \ParserGenerator\SyntaxTreeNode\Leaf('8'), 27 | new \ParserGenerator\SyntaxTreeNode\Leaf('12'), 28 | new \ParserGenerator\SyntaxTreeNode\Leaf('4'), 29 | ], $seriesNode->getMainNodes()); 30 | } 31 | 32 | public function testOrderBy() 33 | { 34 | $x = new Parser('start :=> /\d+/+",".'); 35 | $seriesNode = $x->parse('8,12,4')->getSubnode(0); 36 | 37 | $seriesNode->orderBy(); 38 | 39 | $this->assertEquals('4,8,12', (string)$seriesNode); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/Extension/Base.php: -------------------------------------------------------------------------------- 1 | parse('{"a":23,"b":false}'); 16 | $formater->setIndention($jsonTree); 17 | 18 | $nl = "\n"; 19 | $expected = '{' . $nl . 20 | ' "a": 23,' . $nl . 21 | ' "b": false' . $nl . 22 | '}'; 23 | 24 | $this->assertEquals($expected, $jsonTree->toString(Base::TO_STRING_ORIGINAL)); 25 | 26 | $jsonTree = $formater->parse('[{"a":0, "b":34}, {"x":17}]'); 27 | $formater->setIndention($jsonTree); 28 | 29 | $expected = '[' . $nl . 30 | ' {' . $nl . 31 | ' "a": 0,' . $nl . 32 | ' "b": 34' . $nl . 33 | ' },' . $nl . 34 | ' {' . $nl . 35 | ' "x": 17' . $nl . 36 | ' }' . $nl . 37 | ']'; 38 | 39 | $this->assertEquals($expected, $jsonTree->toString(Base::TO_STRING_ORIGINAL)); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/GrammarNode/PredefinedSimpleString.php: -------------------------------------------------------------------------------- 1 | eatWhiteChars = $eatWhiteChars; 17 | } 18 | 19 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 20 | { 21 | if (preg_match($this->regex, $string, $match, 0, $fromIndex)) { 22 | if (isset($match[1])) { 23 | $offset = strlen($match[$this->eatWhiteChars ? 0 : 1]) + $fromIndex; 24 | if (!isset($restrictedEnd[$offset])) { 25 | $node = new \ParserGenerator\SyntaxTreeNode\PredefinedString($match[1], '', true); 26 | $node->setAfterContent($this->eatWhiteChars ? substr($match[0], strlen($match[1])) : ''); 27 | return ['node' => $node, 'offset' => $offset]; 28 | } 29 | } 30 | } 31 | 32 | return false; 33 | } 34 | 35 | public function __toString() 36 | { 37 | return "string"; 38 | } 39 | } -------------------------------------------------------------------------------- /tests/Examples/CSVParserTest.php: -------------------------------------------------------------------------------- 1 | assertEquals($expected, $parser->parseCSV("r1c1,r1c2\nr2c1,r2c2")); 20 | } 21 | 22 | public function testQuoted() 23 | { 24 | $parser = new CSVParser(); 25 | 26 | $expected = [ 27 | ['r1c1', 'r1c2'], 28 | ['r2c1', 'r2c2'], 29 | ]; 30 | 31 | $this->assertEquals($expected, $parser->parseCSV("\"r1c1\" , \"r1c2\"\n\"r2c1\",\"r2c2\"")); 32 | } 33 | 34 | public function testPreserveSpaces() 35 | { 36 | $parser = new CSVParser(); 37 | 38 | $expected = [ 39 | [' c1 ', ' c2 '], 40 | ]; 41 | 42 | $this->assertEquals($expected, $parser->parseCSV(" c1 , c2 ")); 43 | } 44 | 45 | public function testProperEscaping() 46 | { 47 | $parser = new CSVParser(); 48 | 49 | $expected = [ 50 | ['text "quot"', ", \n"], 51 | ['\n', 'a'], 52 | ]; 53 | 54 | $this->assertEquals($expected, $parser->parseCSV("\"text \"\"quot\"\"\", \", \n\" \n \"\\n\",a")); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/Extension/RuleCondition.php: -------------------------------------------------------------------------------- 1 | ])+/', '?>']; 12 | 13 | return $grammarGrammar; 14 | } 15 | 16 | function modifyBranches($grammar, $parsedGrammar, $grammarParser, $options) 17 | { 18 | foreach ($parsedGrammar->findAll('grammarBranch') as $grammarBranch) { 19 | $functions = []; 20 | foreach ($grammarBranch->findAll('rule') as $ruleIndex => $rule) { 21 | $ruleName = (string)$rule->findFirst('ruleName') ?: $ruleIndex; 22 | if ($condition = $rule->findFirst('ruleCondition')) { 23 | $functions[$ruleName] = (string)$condition->getSubnode(1); 24 | } 25 | } 26 | 27 | if (count($functions)) { 28 | $branchName = (string)$grammarBranch->findFirst('branchName'); 29 | $grammar[$branchName] = new \ParserGenerator\GrammarNode\BranchStringCondition($grammar[$branchName], 30 | $functions); 31 | } 32 | } 33 | 34 | return $grammar; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/GrammarNode/PEGBranch.php: -------------------------------------------------------------------------------- 1 | nodeName; 10 | 11 | if (!isset($this->parser->cache[$cacheStr])) { 12 | foreach ($this->node as $_optionIndex => $option) { 13 | $index = $fromIndex; 14 | $subnodes = []; 15 | 16 | foreach ($option as $sequenceItem) { 17 | $subnode = $sequenceItem->rparse($string, $index, []); 18 | if ($subnode) { 19 | $subnodes[] = $subnode['node']; 20 | $index = $subnode['offset']; 21 | } else { 22 | continue 2; 23 | } 24 | } 25 | 26 | $node = new \ParserGenerator\SyntaxTreeNode\Branch($this->nodeShortName, $_optionIndex, $subnodes); 27 | $r = ['node' => $node, 'offset' => $index]; 28 | $this->parser->cache[$cacheStr] = $r; 29 | return isset($restrictedEnd[$index]) ? false : $r; 30 | } 31 | 32 | $this->parser->cache[$cacheStr] = false; 33 | return false; 34 | } 35 | 36 | $r = $this->parser->cache[$cacheStr]; 37 | if ($r !== false && !isset($restrictedEnd[$r['offset']])) { 38 | return $r; 39 | } else { 40 | return false; 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /tests/Examples/ArithmeticExpressionParserTest.php: -------------------------------------------------------------------------------- 1 | assertEquals(15, $parser->getValue('10 + 5')); 15 | $this->assertEquals(5, $parser->getValue('10 - 5')); 16 | $this->assertEquals(50, $parser->getValue('10 * 5')); 17 | $this->assertEquals(2, $parser->getValue('10 / 5')); 18 | 19 | $this->assertEquals(941, $parser->getValue('1 + 10 + 30 + 100 + 300 + 500')); 20 | $this->assertEquals(180, $parser->getValue('3 * 2 * 2 * 1 * 15')); 21 | $this->assertEquals(89, $parser->getValue('100 - 10 - 1')); 22 | $this->assertEquals(4, $parser->getValue('16 / 2 / 2')); 23 | } 24 | 25 | public function testMixed() 26 | { 27 | $parser = new ArithmeticExpressionParser(); 28 | 29 | $this->assertEquals(67, $parser->getValue('100 - 3 - 10 + 30 - 50')); 30 | $this->assertEquals(56, $parser->getValue('10 * 5 + 3 * 2')); 31 | $this->assertEquals(40, $parser->getValue('10 * 5 - 3 * 2 - 8 / 2')); 32 | } 33 | 34 | public function testBrackets() 35 | { 36 | $parser = new ArithmeticExpressionParser(); 37 | 38 | $this->assertEquals(91, $parser->getValue('100 - (10 - 1)')); 39 | $this->assertEquals(51, $parser->getValue('10 * 5 + 1')); 40 | $this->assertEquals(60, $parser->getValue('10 * (5 + 1)')); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/GrammarNodeCopier.php: -------------------------------------------------------------------------------- 1 | $subnode) { 31 | $result[$index] = $_copy($subnode); 32 | } 33 | return $result; 34 | } else { 35 | $x = $callback($node); 36 | if ($x === false || $x === null) { 37 | return $node; 38 | } elseif ($x === true) { 39 | return $node->copy($_copy); 40 | } else { 41 | return $x; 42 | } 43 | } 44 | }; 45 | 46 | return $node->copy($_copy); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /tests/Extension/ItemRestrictions/IsTest.php: -------------------------------------------------------------------------------- 1 | 'abcd' 14 | :=> 'ab'."); 15 | 16 | $contain = new Is($x->grammar['start']); 17 | 18 | $this->assertFalse($contain->check('qwerty', 0, 3, null)); 19 | $x->cache = []; 20 | $this->assertTrue($contain->check('xabcd', 1, 5, null)); 21 | $x->cache = []; 22 | $this->assertFalse($contain->check('xabcf', 1, 5, null)); 23 | $x->cache = []; 24 | $this->assertTrue($contain->check('xabcf', 1, 3, null)); 25 | $x->cache = []; 26 | $this->assertFalse($contain->check('xabcd', 1, 4, null)); 27 | $x->cache = []; 28 | $this->assertTrue($contain->check('ab', 0, 2, null)); 29 | $x->cache = []; 30 | $this->assertTrue($contain->check('ab ', 0, 2, null)); 31 | $x->cache = []; 32 | 33 | $x = new Parser("start :=> 'abcd' 34 | :=> 'ab'.", ['ignoreWhitespaces' => true]); 35 | 36 | $contain = new Is($x->grammar['start']); 37 | 38 | $this->assertTrue($contain->check('ab ', 0, 4, null)); 39 | 40 | // I dont realy know what to return in these cases 41 | //$x->cache = array(); 42 | //$this->assertTrue($contain->check('ab ', 0, 3, null)); 43 | //$x->cache = array(); 44 | //$this->assertTrue($contain->check('ab ', 0, 2, null)); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/Examples/CSVParser.php: -------------------------------------------------------------------------------- 1 | getCSVDefinition()); 19 | } 20 | 21 | protected function getCSVDefinition() 22 | { 23 | return ' 24 | start: => line*lineSeparator. 25 | lineSeparator:=> /(\r\n|\n\r|\r|\n)/. 26 | line: => value*",". 27 | value: => /[ \t]*/ string/simple /[ \t]*/ 28 | :=> /[^\r\n,"]*/. 29 | '; 30 | } 31 | 32 | public function parseCSV($string) 33 | { 34 | $csvRaw = $this->parse($string); 35 | 36 | if ($csvRaw) { 37 | $data = []; 38 | foreach ($csvRaw->getSubnode(0)->getMainNodes() as $csvLine) { 39 | $line = []; 40 | foreach ($csvLine->getSubnode(0)->getMainNodes() as $csvValue) { 41 | if ($csvValue->getDetailType() == 0) { 42 | $line[] = $csvValue->getSubnode(1)->getValue(); 43 | } else { 44 | $line[] = (string)$csvValue; 45 | } 46 | } 47 | 48 | $data[] = $line; 49 | } 50 | 51 | return $data; 52 | } else { 53 | throw new Exception('given string is not proper CSV format'); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /tests/Examples/PostfixToInfixNotationTranslatorTest.php: -------------------------------------------------------------------------------- 1 | start start operator 14 | :=> -inf..inf. 15 | operator :=> "+" 16 | :=> "-" 17 | :=> "*" 18 | :=> "/". 19 | ', ['ignoreWhitespaces' => true]); 20 | 21 | $tree = $parser->parse($str); 22 | 23 | $tree->inPlaceTranslate('start', function ($node, $parent) { 24 | if ($node->getDetailType() == 1) { 25 | return; 26 | } 27 | 28 | $temp = $node->getSubnode(1); 29 | $node->setSubnode(1, $node->getSubnode(2)); 30 | $node->setSubnode(2, $temp); 31 | 32 | if ($parent && in_array((string)$node->getSubnode(1), 33 | ['+', '-']) && in_array((string)$parent->getSubnode(2), ['*', '/'])) { 34 | return '(' . $node . ')'; 35 | } 36 | }); 37 | 38 | return $tree->toString(); 39 | } 40 | 41 | public function testTranslator() 42 | { 43 | $this->assertEquals('2+3+4', $this->translate('2 3 + 4 +')); 44 | $this->assertEquals('2+3+4', $this->translate('2 3 4 + +')); 45 | $this->assertEquals('2+3*4', $this->translate('2 3 4 * +')); 46 | $this->assertEquals('2*(3+4)', $this->translate('2 3 4 + *')); 47 | $this->assertEquals('2*(3+4+5*6)', $this->translate('2 3 4 5 6 * + + *')); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /tests/Extension/ItemRestrictions/ContainTest.php: -------------------------------------------------------------------------------- 1 | 'abcd' 14 | :=> 'ab'."); 15 | 16 | $contain = new Contain($x->grammar['start']); 17 | 18 | $this->assertFalse($contain->check('qwerty', 0, 3, null)); 19 | $x->cache = []; 20 | $this->assertTrue($contain->check('abcdef', 0, 6, null)); 21 | $x->cache = []; 22 | $this->assertTrue($contain->check('aabcdef', 0, 7, null)); 23 | $x->cache = []; 24 | $this->assertFalse($contain->check('abcdef', 1, 6, null)); 25 | $x->cache = []; 26 | $this->assertTrue($contain->check('aabcdef', 1, 7, null)); 27 | $x->cache = []; 28 | $this->assertFalse($contain->check('abcdef', 0, 1, null)); 29 | $x->cache = []; 30 | $this->assertTrue($contain->check('abcd', 0, 3, null)); 31 | $x->cache = []; 32 | $this->assertTrue($contain->check('ab', 0, 2, null)); 33 | $x->cache = []; 34 | $this->assertTrue($contain->check('ab ', 0, 5, null)); 35 | 36 | $x = new Parser("start :=> 'abcd' 37 | :=> 'ab'.", ['ignoreWhitespaces' => true]); 38 | 39 | $contain = new Contain($x->grammar['start']); 40 | 41 | $this->assertTrue($contain->check('ab ', 0, 4, null)); 42 | $x->cache = []; 43 | $this->assertTrue($contain->check('ab ', 0, 3, null)); 44 | $x->cache = []; 45 | $this->assertTrue($contain->check('ab ', 0, 2, null)); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/GrammarNode/BranchStringCondition.php: -------------------------------------------------------------------------------- 1 | setConditionString($conditionStrings); 20 | } 21 | 22 | public function setConditionString($conditionStrings) 23 | { 24 | $this->conditionStrings = $conditionStrings; 25 | $this->_functions = []; 26 | 27 | foreach ($conditionStrings as $detailType => $conditionString) { 28 | $this->_functions[$detailType] = $this->create_function('$string,$fromIndex,$toIndex,$node,$s', 29 | 'return ' . $conditionString . ';'); 30 | } 31 | } 32 | 33 | public function check($string, $fromIndex, $toIndex, $node) 34 | { 35 | $fn = isset($this->_functions[$node->getDetailType()]) ? $this->_functions[$node->getDetailType()] : null; 36 | 37 | if (isset($fn)) { 38 | /** @var $fn \Closure */ 39 | return $fn($string, $fromIndex, $toIndex, $node, $node->getSubnodes()); 40 | } else { 41 | return true; 42 | } 43 | } 44 | 45 | /** 46 | * Emulate `create_function` (which was deprecated with PHP 7.2) tailored 47 | * for the Grammar parser needs. 48 | * 49 | * @param string $arguments 50 | * @param string $body 51 | * @return \Closure 52 | */ 53 | protected function create_function($arguments, $body) 54 | { 55 | return eval("return function ($arguments) { $body };"); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/Extension/Choice.php: -------------------------------------------------------------------------------- 1 | seqName] = [ 12 | 'nest' => [':sequence', '|', (':' . $this->seqName)], 13 | 'last' => [':sequence'], 14 | ]; 15 | 16 | return parent::extendGrammar($grammarGrammar); 17 | } 18 | 19 | protected function getGrammarGrammarSequence() 20 | { 21 | return ['(', (':' . $this->seqName), ':comments', ')']; 22 | } 23 | 24 | protected function _buildSequenceItem(&$grammar, $sequenceItem, $grammarParser, $options) 25 | { 26 | $choices = []; 27 | $sequenceNode = $sequenceItem->getSubnode(1); 28 | 29 | while ($sequenceNode->getDetailType() !== 'last') { 30 | $choices[] = $this->buildInternalSequence($grammar, $sequenceNode->getSubnode(0), $grammarParser, $options); 31 | $sequenceNode = $sequenceNode->getSubnode(2); 32 | } 33 | $choices[] = $this->buildInternalSequence($grammar, $sequenceNode->getSubnode(0), $grammarParser, $options);; 34 | 35 | $node = new \ParserGenerator\GrammarNode\Choice($choices); 36 | $node->setParser($options['parser']); 37 | 38 | //$grammar[$node->getTmpNodeName()] = $node; 39 | 40 | return $node; 41 | } 42 | 43 | private function buildInternalSequence(&$grammar, $sequence, $grammarParser, $options) 44 | { 45 | $choice = []; 46 | 47 | foreach ($sequence->findAll('sequenceItem') as $sequenceItem) { 48 | $choice[] = $grammarParser->buildSequenceItem($grammar, $sequenceItem, $options); 49 | } 50 | 51 | return (count($choice) === 1) ? $choice[0] : $choice; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /tests/ParsedNodes/BranchTest.php: -------------------------------------------------------------------------------- 1 | refreshOwners(); 31 | 32 | $c = $b->copy(); 33 | 34 | $this->assertSame($b, $c->origin); 35 | $this->assertTrue($c->getSubnode(0) instanceof Leaf); 36 | $this->assertTrue($c->getSubnode(1) instanceof Branch); 37 | $this->assertTrue($c->getSubnode(2) instanceof Numeric); 38 | $this->assertSame($b->getSubnode(0), $c->getSubnode(0)->origin); 39 | $this->assertSame($b->getSubnode(1), $c->getSubnode(1)->origin); 40 | $this->assertSame($b->getSubnode(2), $c->getSubnode(2)->origin); 41 | $this->assertSame($c, $c->getSubnode(0)->owner); 42 | $this->assertSame($c, $c->getSubnode(1)->owner); 43 | $this->assertSame($c, $c->getSubnode(2)->owner); 44 | 45 | $this->assertSame($b->getSubnode(1)->getSubnode(0), $c->getSubnode(1)->getSubnode(0)->origin); 46 | $this->assertSame($b->getSubnode(1)->getSubnode(1), $c->getSubnode(1)->getSubnode(1)->origin); 47 | 48 | $this->assertSame($c->getSubnode(1), $c->getSubnode(1)->getSubnode(0)->owner); 49 | $this->assertSame($c->getSubnode(1), $c->getSubnode(1)->getSubnode(1)->owner); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /tests/Extension/TextTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 16 | } 17 | 18 | public function testBaseText() 19 | { 20 | $x = new Parser('start :=> text.', ['ignoreWhitespaces' => true]); 21 | 22 | $this->assertEquals(new Root('start', 0, [ 23 | new Leaf("Lorem ipsum dolor\nsit emet"), 24 | ]), $x->parse("Lorem ipsum dolor\nsit emet")); 25 | 26 | $this->assertEquals(new Root('start', 0, [ 27 | new Leaf(""), 28 | ], ' '), $x->parse(" ")); 29 | 30 | $this->assertEquals(new Root('start', 0, [ 31 | new Leaf(""), 32 | ]), $x->parse("")); 33 | 34 | $this->assertEquals(new Root('start', 0, [ 35 | new Leaf("Lorem ipsum", "\n"), 36 | ], ' '), $x->parse(" Lorem ipsum\n")); 37 | 38 | $x = new Parser('start :=> text.', ['ignoreWhitespaces' => false]); 39 | 40 | $this->assertEquals(new Root('start', 0, [ 41 | new Leaf(""), 42 | ]), $x->parse("")); 43 | 44 | $x = new Parser('start :=> text++",".', ['ignoreWhitespaces' => true]); 45 | 46 | $this->assertEquals(new Root('start', 0, [ 47 | new Series('list', 'text', [ 48 | new Leaf('some text', ' '), 49 | new Leaf(',', ' '), 50 | new Leaf('more text'), 51 | ], true), 52 | ]), $x->parse("some text , more text")); 53 | 54 | $parsed = $x->parse("a,a,a,a,a,a,a,a,a,a,a,a,a,a,a,a"); 55 | 56 | $this->assertCount(31, $parsed->getSubnode(0)->getSubnodes()); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /tests/Extension/PredefinedStringTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 14 | } 15 | 16 | public function testSimple() 17 | { 18 | $x = new Parser('start :=> string.'); 19 | 20 | $this->assertObject($x->parse('"asd\\" "')); 21 | $this->assertObject($x->parse("'asd\\' '")); 22 | $this->assertFalse($x->parse('"asd"" "')); 23 | 24 | $x = new Parser('start :=> string/apostrophe.'); 25 | 26 | $this->assertObject($x->parse("'asd\\' '")); 27 | $this->assertFalse($x->parse('"asd\\" "')); 28 | $this->assertObject($x->parse("'asd\\' '")); 29 | $this->assertFalse($x->parse('"asd"" "')); 30 | 31 | $x = new Parser('start :=> string/quotation.'); 32 | 33 | $this->assertObject($x->parse('"asd\\" "')); 34 | $this->assertFalse($x->parse("'asd\\' '")); 35 | $this->assertFalse($x->parse('"asd"" "')); 36 | 37 | $x = new Parser('start :=> string/simple.'); 38 | 39 | $this->assertFalse($x->parse('"asd\\" "')); 40 | $this->assertFalse($x->parse("'asd\\' '")); 41 | $this->assertObject($x->parse('"asd"" "')); 42 | 43 | $parsingResult = $x->parse('"ab""c"'); 44 | $this->assertEquals('ab"c', $parsingResult->getSubnode(0)->getValue()); 45 | 46 | $parsingResult = $x->parse('"\t\n"'); 47 | $this->assertEquals('\t\n', $parsingResult->getSubnode(0)->getValue()); 48 | } 49 | 50 | public function testBugNoWhitespacesEatenBySimple() 51 | { 52 | $x = new Parser('start :=> string/simple "b".', ['ignoreWhitespaces' => true]); 53 | $this->assertObject($x->parse('"abb"b')); 54 | $this->assertObject($x->parse('"abb" b')); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/Extension/Unorder.php: -------------------------------------------------------------------------------- 1 | seqName] = [ 12 | 'nest' => [':/[?*+]?/', ':sequenceItem', ',', (':' . $this->seqName)], 13 | 'last' => [':/[?*+]?/', ':sequenceItem'], 14 | ]; 15 | 16 | return parent::extendGrammar($grammarGrammar); 17 | } 18 | 19 | protected function getGrammarGrammarSequence() 20 | { 21 | return [['unorder(', ':sequenceItem', ',', (':' . $this->seqName), ')']]; 22 | } 23 | 24 | protected function _buildSequenceItem(&$grammar, $sequenceItem, $grammarParser, $options) 25 | { 26 | $separator = $this->buildInternalSequence($grammar, $sequenceItem->getSubnode(1), $grammarParser, $options); 27 | $node = new \ParserGenerator\GrammarNode\Unorder($separator); 28 | $sequenceNode = $sequenceItem->getSubnode(3); 29 | 30 | while ($sequenceNode) { 31 | $n = $this->buildInternalSequence($grammar, $sequenceNode->getSubnode(1), $grammarParser, $options); 32 | $node->addChoice($n, (string)$sequenceNode->getSubnode(0)); 33 | $sequenceNode = ($sequenceNode->getDetailType() == 'last') ? null : $sequenceNode->getSubnode(3); 34 | } 35 | 36 | 37 | $node->setParser($options['parser']); 38 | 39 | $grammar[$node->getTmpNodeName()] = $node; 40 | 41 | return $node; 42 | } 43 | 44 | private function buildInternalSequence(&$grammar, $sequence, $grammarParser, $options) 45 | { 46 | $choice = []; 47 | 48 | foreach ($sequence->findAll('sequenceItem') as $sequenceItem) { 49 | $choice[] = $grammarParser->buildSequenceItem($grammar, $sequenceItem, $options); 50 | } 51 | 52 | return (count($choice) === 1) ? $choice[0] : $choice; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/Examples/ArithmeticExpressionParser.php: -------------------------------------------------------------------------------- 1 | getExpressionDefinition(), ['ignoreWhitespaces' => true]); 12 | } 13 | 14 | public function getExpressionDefinition() 15 | { 16 | return ' 17 | start :=> expr. 18 | expr:add => expr "+" expr 19 | :sub => expr "-" expr 20 | :mul => expr "*" expr 21 | :div => expr "/" expr 22 | :bra => "(" expr ")" 23 | :val => -inf..inf. 24 | '; 25 | } 26 | 27 | public function getValue($inputString) 28 | { 29 | $expr = $this->parse($inputString); 30 | if ($expr) { 31 | return $this->getExpressionValue($expr->getSubnode(0)); 32 | } else { 33 | throw new Exception('Cannot parse arithmetic expression.'); 34 | } 35 | } 36 | 37 | protected function getExpressionValue($expr) 38 | { 39 | switch ($expr->getDetailType()) { 40 | case "add": 41 | return $this->getExpressionValue($expr->getSubnode(0)) + $this->getExpressionValue($expr->getSubnode(2)); 42 | case "sub": 43 | return $this->getExpressionValue($expr->getSubnode(0)) - $this->getExpressionValue($expr->getSubnode(2)); 44 | case "mul": 45 | return $this->getExpressionValue($expr->getSubnode(0)) * $this->getExpressionValue($expr->getSubnode(2)); 46 | case "div": 47 | return $this->getExpressionValue($expr->getSubnode(0)) / $this->getExpressionValue($expr->getSubnode(2)); 48 | case "bra": 49 | return $this->getExpressionValue($expr->getSubnode(1)); 50 | case "val": 51 | return $expr->getSubnode(0)->getValue(); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/GrammarNode/AnyText.php: -------------------------------------------------------------------------------- 1 | true, "\t" => true, "\r" => true, " " => true]; 8 | public $ignoreWhitespaces; 9 | 10 | public function __construct($options = []) 11 | { 12 | $this->ignoreWhitespaces = true; 13 | } 14 | 15 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 16 | { 17 | $endPos = $this->getNextNonrestrictedIndex($string, $fromIndex, $restrictedEnd); 18 | $str = substr($string, $fromIndex, $endPos - $fromIndex); 19 | 20 | if ($endPos !== null) { 21 | if ($this->ignoreWhitespaces) { 22 | $trimedString = rtrim($str); 23 | $whitespaces = substr($str, strlen($trimedString)); 24 | return [ 25 | 'node' => new \ParserGenerator\SyntaxTreeNode\Leaf($trimedString, $whitespaces), 26 | 'offset' => $endPos, 27 | ]; 28 | } else { 29 | return ['node' => new \ParserGenerator\SyntaxTreeNode\Leaf($str), 'offset' => $endPos]; 30 | } 31 | } else { 32 | return false; 33 | } 34 | } 35 | 36 | protected function getNextNonrestrictedIndex($string, $fromIndex, $restrictedEnd) 37 | { 38 | if (!isset($string[$fromIndex])) { 39 | return isset($restrictedEnd[$fromIndex]) ? null : $fromIndex; 40 | } 41 | 42 | $i = $fromIndex; 43 | while (isset($restrictedEnd[$i]) || ($this->ignoreWhitespaces && isset(self::$whiteChars[$string[$i]]))) { 44 | $i++; 45 | if (!isset($string[$i])) { 46 | return isset($restrictedEnd[$i]) ? null : $i; 47 | } 48 | } 49 | 50 | return $i; 51 | } 52 | 53 | public function __toString() { 54 | return 'text'; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/GrammarNode/PredefinedString.php: -------------------------------------------------------------------------------- 1 | eatWhiteChars = $eatWhiteChars; 16 | $this->startCharacters = $startCharacters; 17 | } 18 | 19 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 20 | { 21 | $stChar = substr($string, $fromIndex, 1); 22 | if (in_array($stChar, $this->startCharacters)) { 23 | $offset = $fromIndex + 1; 24 | while ($nextPos = strpos($string, $stChar, $offset)) { 25 | $i = 1; 26 | while (substr($string, $nextPos - $i++, 1) === '\\') { 27 | }; 28 | $offset = $nextPos + 1; 29 | if ($i % 2 === 0) { 30 | $val = substr($string, $fromIndex, $nextPos - $fromIndex + 1); 31 | if ($this->eatWhiteChars) { 32 | preg_match('/\s*/', $string, $match, 0, $nextPos + 1); 33 | $nextPos += strlen($match[0]); 34 | } 35 | if (isset($restrictedEnd[$nextPos + 1])) { 36 | 37 | return false; 38 | } else { 39 | $node = new \ParserGenerator\SyntaxTreeNode\PredefinedString($val, 40 | $this->eatWhiteChars ? $match[0] : ''); 41 | 42 | return ['node' => $node, 'offset' => $nextPos + 1]; 43 | } 44 | } 45 | } 46 | } 47 | 48 | return false; 49 | } 50 | 51 | public function __toString() 52 | { 53 | return "string"; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/GrammarNode/LeafTime.php: -------------------------------------------------------------------------------- 1 | format = $format; 18 | $this->maxLength = strlen($this->format) + 14; 19 | $this->eatWhiteChars = $eatWhiteChars; 20 | } 21 | 22 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 23 | { 24 | $s = substr($string, $fromIndex, $this->maxLength); 25 | $data = date_parse_from_format($this->format, $s); 26 | 27 | if (!empty($data['errors'])) { 28 | foreach ($data['errors'] as $key => $_) { 29 | break; 30 | } 31 | $s = substr($s, 0, $key); 32 | $data = date_parse_from_format($this->format, $s); 33 | if (!empty($data['errors'])) { 34 | return false; 35 | } 36 | } 37 | 38 | $end = $fromIndex + strlen($s); 39 | if ($this->eatWhiteChars) { 40 | if (preg_match('/\s*/', $string, $match, 0, $end)) { 41 | $whiteChars = $match[0]; 42 | } 43 | $end += strlen($whiteChars); 44 | } else { 45 | $whiteChars = ''; 46 | } 47 | 48 | if (isset($restrictedEnd[$end])) { 49 | if ($this->lastNMatch < $fromIndex) { 50 | $this->lastNMatch = $fromIndex; 51 | } 52 | 53 | return false; 54 | } 55 | 56 | $node = new \ParserGenerator\SyntaxTreeNode\LeafTime($s, $whiteChars, $data); 57 | 58 | if ($this->lastMatch < $fromIndex) { 59 | $this->lastMatch = $fromIndex; 60 | } 61 | 62 | return ['node' => $node, 'offset' => $end]; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/GrammarNode/NaiveBranch.php: -------------------------------------------------------------------------------- 1 | nodeName . '-' . implode(',', $restrictedEnd); 10 | 11 | if (isset($this->parser->cache[$cacheStr])) { 12 | return $this->parser->cache[$cacheStr]; 13 | } 14 | $this->parser->cache[$cacheStr] = false; 15 | 16 | foreach ($this->node as $_optionIndex => $option) { 17 | $subnodes = []; 18 | $optionIndex = 0; 19 | $indexes = [-1 => $fromIndex]; 20 | $optionCount = count($option); 21 | //!!! TODO: 22 | $restrictedEnds = [[], [], [], [], [], [], [], []]; 23 | $restrictedEnds[$optionCount - 1] = $restrictedEnd; 24 | while (true) { 25 | $subNode = $option[$optionIndex]->rparse($string, $indexes[$optionIndex - 1], 26 | $restrictedEnds[$optionIndex]); 27 | if ($subNode) { 28 | $subNodeOffset = $subNode['offset']; 29 | $subnodes[$optionIndex] = $subNode['node']; 30 | $restrictedEnds[$optionIndex][$subNodeOffset] = $subNodeOffset; 31 | $indexes[$optionIndex] = $subNodeOffset; 32 | if (++$optionIndex === $optionCount) { 33 | break; 34 | }; 35 | } elseif ($optionIndex-- === 0) { 36 | continue 2; 37 | } 38 | } 39 | // match 40 | $index = $indexes[$optionCount - 1]; 41 | $node = new \ParserGenerator\SyntaxTreeNode\Branch($this->nodeShortName, $_optionIndex, $subnodes); 42 | $r = ['node' => $node, 'offset' => $index]; 43 | $this->parser->cache[$cacheStr] = $r; 44 | return $r; 45 | } 46 | return false; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/Examples/JSONParser.php: -------------------------------------------------------------------------------- 1 | getJSONDefinition(), ['ignoreWhitespaces' => true]); 12 | } 13 | 14 | protected function getJSONDefinition() 15 | { 16 | return ' 17 | start: => value. 18 | value:bool => ("true"|"false") 19 | :string => string 20 | :number => -inf..inf 21 | :array => "[" value*"," "]" 22 | :object => "{" objValue*"," "}". 23 | objValue: => string ":" value. 24 | '; 25 | } 26 | 27 | public function getValue($jsonString) 28 | { 29 | $jsonTree = $this->parse($jsonString); 30 | 31 | if (!$jsonTree) { 32 | throw new Exception("Given string is not proper JSON"); 33 | } 34 | 35 | return $this->getValueOfNode($jsonTree->getSubnode(0)); 36 | } 37 | 38 | protected function getValueOfNode(\ParserGenerator\SyntaxTreeNode\Branch $node) 39 | { 40 | switch ($node->getDetailType()) { 41 | case "bool": 42 | return (string)$node === "true"; 43 | case "string": 44 | case "number": 45 | return $node->getSubnode(0)->getValue(); 46 | case "array": 47 | $result = []; 48 | 49 | foreach ($node->getSubnode(1)->getMainNodes() as $valueNode) { 50 | $result[] = $this->getValueOfNode($valueNode); 51 | } 52 | 53 | return $result; 54 | case "object": 55 | $result = []; 56 | 57 | foreach ($node->getSubnode(1)->getMainNodes() as $objValueNode) { 58 | $result[$objValueNode->getSubnode(0)->getValue()] = $this->getValueOfNode($objValueNode->getSubnode(2)); 59 | } 60 | 61 | return $result; 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /tests/Extension/WhiteCharactersTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 19 | } 20 | 21 | public function testNewLine() 22 | { 23 | $x = new Parser('start :=> "x"+newLine.'); 24 | 25 | $this->assertObject($x->parse("x\nx")); 26 | $this->assertObject($x->parse("x\r\nx")); 27 | $this->assertObject($x->parse("x\rx")); 28 | 29 | $x = new Parser('start :=> "x"+newLine.', ['ignoreWhitespaces' => true]); 30 | 31 | $this->assertObject($x->parse("x\nx")); 32 | $this->assertObject($x->parse("x\r\nx")); 33 | $this->assertObject($x->parse("x\rx")); 34 | } 35 | 36 | public function testEatExactNumberOfSpaces() 37 | { 38 | $x = new Parser('start :=> space space space.'); 39 | 40 | $this->assertFalse($x->parse(" ")); 41 | $this->assertObject($x->parse(" ")); 42 | $this->assertFalse($x->parse(" ")); 43 | } 44 | 45 | public function testEatExactNumberOfWhitespaces() 46 | { 47 | $x = new Parser('start :=> whiteSpace whiteSpace whiteSpace.'); 48 | 49 | $this->assertFalse($x->parse(" ")); 50 | $this->assertObject($x->parse(" ")); 51 | $this->assertFalse($x->parse(" ")); 52 | 53 | $this->assertFalse($x->parse("\r\n\r\n")); 54 | $this->assertObject($x->parse("\r\n\r\n\r\n")); 55 | $this->assertFalse($x->parse("\r\n\r\n\r\n\r\n")); 56 | } 57 | 58 | public function testNegativeLookahead() 59 | { 60 | $x = new Parser('start :=> !space whiteSpace !newLine whiteSpace.'); 61 | 62 | $this->assertObject($x->parse("\r\n ")); 63 | $this->assertObject($x->parse("\t\t")); 64 | $this->assertFalse($x->parse(" ")); 65 | $this->assertFalse($x->parse("\n\n")); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /tests/Examples/JSONParserTest.php: -------------------------------------------------------------------------------- 1 | assertTrue($jsonParser->getValue('true')); 14 | $this->assertFalse($jsonParser->getValue('false')); 15 | $this->assertEquals(23, $jsonParser->getValue('23')); 16 | $this->assertEquals(0, $jsonParser->getValue('0')); 17 | $this->assertEquals("Lorem ipsum", $jsonParser->getValue('"Lorem ipsum"')); 18 | $this->assertEquals("", $jsonParser->getValue('""')); 19 | $this->assertEquals("Lorem \n ipsum", $jsonParser->getValue('"Lorem \n ipsum"')); 20 | } 21 | 22 | public function testArray() 23 | { 24 | $jsonParser = new JSONParser(); 25 | $this->assertEquals([], $jsonParser->getValue('[]')); 26 | $this->assertEquals([true], $jsonParser->getValue('[true]')); 27 | $this->assertEquals([1, 2, 3], $jsonParser->getValue('[1, 2, 3]')); 28 | $this->assertEquals(["1, 2", "3"], $jsonParser->getValue('["1, 2", "3"]')); 29 | $this->assertEquals([[], [[]]], $jsonParser->getValue('[[],[[]]]')); 30 | $this->assertEquals([["a1", "a2"], ["b1", "b2"]], 31 | $jsonParser->getValue('[["a1", "a2"], ["b1", "b2"]]')); 32 | } 33 | 34 | public function testObject() 35 | { 36 | $jsonParser = new JSONParser(); 37 | $this->assertEquals([], $jsonParser->getValue('{}')); 38 | $this->assertEquals(["x" => "x"], $jsonParser->getValue('{"x":"x"}')); 39 | $this->assertEquals(["x" => 4, "y" => 5, "color" => "red", "visible" => true], 40 | $jsonParser->getValue('{"x": 4, "y": 5, "color":"red", "visible":true}')); 41 | $this->assertEquals(["a" => [], "b" => ["c" => "c"]], 42 | $jsonParser->getValue('{"a" : {}, "b":{"c": "c"}}')); 43 | 44 | } 45 | 46 | public function testMixed() 47 | { 48 | $jsonParser = new JSONParser(); 49 | $this->assertEquals(["x" => [["c" => "c"], 6]], $jsonParser->getValue('{"x":[{"c":"c"}, 6]}')); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/Extension/SequenceItem.php: -------------------------------------------------------------------------------- 1 | getGrammarGrammarSequence(); 12 | if ($this->isArrayOfSequences($sequence)) { 13 | foreach ($sequence as $index => $seq) { 14 | $grammarGrammar['sequenceItem'][$this->getDetailType($index)] = $seq; 15 | } 16 | } else { 17 | $grammarGrammar['sequenceItem'][$this->getDetailType(1)] = $this->getGrammarGrammarSequence(); 18 | } 19 | 20 | return $grammarGrammar; 21 | } 22 | 23 | protected function isArrayOfSequences($arr) 24 | { 25 | foreach ($arr as $arrayItem) { 26 | if (is_array($arrayItem)) { 27 | return true; 28 | } else { 29 | return false; 30 | } 31 | } 32 | } 33 | 34 | protected function getDetailType($index = 1) 35 | { 36 | if ($index === null) { 37 | return get_class($this); 38 | } else { 39 | return $this->getDetailType(null) . $this->detailTypeSeparator . $index; 40 | } 41 | } 42 | 43 | protected function getDetailTypeIndex($sequenceItem) 44 | { 45 | $detailType = explode($this->detailTypeSeparator, $sequenceItem->getDetailType()); 46 | 47 | if ($this->getDetailType(null) === $detailType[0] && isset($detailType[1])) { 48 | return $detailType[1]; 49 | } else { 50 | return null; 51 | } 52 | } 53 | 54 | public function buildSequenceItem(&$grammar, $sequenceItem, $grammarParser, $options) 55 | { 56 | if ($this->getDetailTypeIndex($sequenceItem) !== null) { 57 | return $this->_buildSequenceItem($grammar, $sequenceItem, $grammarParser, $options); 58 | } else { 59 | return false; 60 | } 61 | } 62 | 63 | abstract protected function getGrammarGrammarSequence(); 64 | 65 | abstract protected function _buildSequenceItem(&$grammar, $sequenceItem, $grammarParser, $options); 66 | } -------------------------------------------------------------------------------- /src/Extension/Lookahead.php: -------------------------------------------------------------------------------- 1 | getDetailTypeIndex($sequenceItem)) { 25 | case 0: 26 | $mainNode = $grammarParser->buildSequenceItem($grammar, $sequenceItem->getSubnode(4), $options); 27 | $lookaheadNode = $grammarParser->buildSequenceItem($grammar, $sequenceItem->getSubnode(2), $options); 28 | $operator = (string)$sequenceItem->getSubnode(0); 29 | $before = true; 30 | 31 | break; 32 | case 1: 33 | $mainNode = $grammarParser->buildSequenceItem($grammar, $sequenceItem->getSubnode(0), $options); 34 | $lookaheadNode = $grammarParser->buildSequenceItem($grammar, $sequenceItem->getSubnode(4), $options); 35 | $operator = (string)$sequenceItem->getSubnode(2); 36 | $before = false; 37 | 38 | break; 39 | case 2: 40 | $mainNode = null; 41 | $lookaheadNode = $grammarParser->buildSequenceItem($grammar, $sequenceItem->getSubnode(2), $options); 42 | $operator = (string)$sequenceItem->getSubnode(0); 43 | $before = null; 44 | 45 | break; 46 | 47 | default: 48 | throw new Exception('that was unexpected'); 49 | } 50 | 51 | return new \ParserGenerator\GrammarNode\Lookahead($lookaheadNode, $mainNode, $before, $operator == '?'); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/Examples/YamlLikeIndentationParser.php: -------------------------------------------------------------------------------- 1 | getYamlDefinition()); 14 | } 15 | 16 | protected function getYamlDefinition() 17 | { 18 | return ' 19 | start : => value<""> nl?. 20 | value :string => space* simpleString 21 | :object => objValues. 22 | objValues:values => objValue+ 23 | :indent => ?(nl indent) objValues<(indent space)>. 24 | objValue :value => nl indent !space simpleString space* ":" value<(indent space)>. 25 | nl :=> /^/ 26 | :=> newLine. 27 | simpleString :=> /[a-z0-9_]+/. 28 | '; 29 | } 30 | 31 | public function getValue($yamlString) 32 | { 33 | $yamlTree = $this->parse($yamlString); 34 | 35 | if (!$yamlTree) { 36 | return false; 37 | } 38 | 39 | return $this->getValueOfNode($yamlTree->getSubnode(0)); 40 | } 41 | 42 | protected function getValueOfNode(\ParserGenerator\SyntaxTreeNode\Branch $node) 43 | { 44 | if ($node->getType() == 'value' && $node->getDetailType() == 'string') { 45 | return (string)$node->getSubnode(1); 46 | } elseif ($node->getType() == 'value' && $node->getDetailType() == 'object') { 47 | return $this->getValueOfNode($node->getSubnode(0)); 48 | } elseif ($node->getType() == 'objValues' && $node->getDetailType() == 'values') { 49 | $result = []; 50 | foreach ($node->getSubnode(0)->getMainNodes() as $objValue) { 51 | $result[(string)$objValue->getSubnode(2)] = $this->getValueOfNode($objValue->getSubnode(5)); 52 | } 53 | return $result; 54 | } elseif ($node->getType() == 'objValues' && $node->getDetailType() == 'indent') { 55 | return $this->getValueOfNode($node->getSubnode(0)); 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /tests/Extension/TimeTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 23 | } 24 | 25 | public function testBase() 26 | { 27 | $x = new Parser('start :=> time(Y-m-d).'); 28 | 29 | $this->assertObject($x->parse('2024-10-11')); 30 | $this->assertObject($x->parse('2004-05-06')); 31 | $this->assertFalse($x->parse('2004-05-06a')); 32 | $this->assertFalse($x->parse('2004-05')); 33 | } 34 | 35 | public function testDataWithStdFormat() 36 | { 37 | $x = new Parser('start :=> "q" time(Y-m-d) text.'); 38 | 39 | $this->assertEquals(new \DateTime('2024-10-11'), $x->parse('q2024-10-11')->getSubnode(1)->getValue()); 40 | $this->assertEquals(new \DateTime('2004-05-06'), $x->parse('q2004-05-06 more text')->getSubnode(1)->getValue()); 41 | } 42 | 43 | public function testCantParse() 44 | { 45 | $x = new Parser('start :=> "q" time(Y-m-d) text.'); 46 | 47 | $this->assertFalse($x->parse('q2024-10')); 48 | $this->assertFalse($x->parse('q2004-05 more text')); 49 | } 50 | 51 | public function testDataWithOtherFormat() 52 | { 53 | $x = new Parser('start :=> "q" time(d.m.Y) text.'); 54 | 55 | $this->assertEquals(new \DateTime('2024-10-11'), $x->parse('q11.10.2024')->getSubnode(1)->getValue()); 56 | $this->assertEquals(new \DateTime('2004-05-06'), $x->parse('q06.05.2004 more text')->getSubnode(1)->getValue()); 57 | } 58 | 59 | public function testDataShouldProperlyCaptureWhitespaces() 60 | { 61 | $x = new Parser('start :=> time(Y-m-d) text.', ['ignoreWhitespaces' => true]); 62 | 63 | $timeNode = $x->parse('2014-03-08 lorem ipsum')->getSubnode(0); 64 | $this->assertEquals('2014-03-08 ', 65 | $timeNode->toString(\ParserGenerator\SyntaxTreeNode\Base::TO_STRING_ORIGINAL)); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/GrammarNode/WhitespaceContextCheck.php: -------------------------------------------------------------------------------- 1 | char = $char; 15 | } 16 | 17 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 18 | { 19 | $substring = substr($string, $fromIndex, 2); 20 | if ($substring === "\r\n") { 21 | $stringChar = "\n"; 22 | $offset = 2; 23 | } else { 24 | $stringChar = substr($string, $fromIndex, 1); 25 | $substring = $stringChar; 26 | if ($stringChar === "\r") { 27 | $stringChar = "\n"; 28 | } 29 | $offset = 1; 30 | } 31 | 32 | if (($this->char === null) ? in_array($stringChar, self::$whiteCharacters, 33 | true) : ($this->char === $stringChar)) { 34 | if (!isset($restrictedEnd[$fromIndex + $offset])) { 35 | return [ 36 | 'node' => new \ParserGenerator\SyntaxTreeNode\Leaf($substring), 37 | 'offset' => $fromIndex + $offset, 38 | ]; 39 | } 40 | } 41 | 42 | if (!isset($restrictedEnd[$fromIndex])) { 43 | $index = $fromIndex; 44 | while (--$index >= 0 && in_array(substr($string, $index, 1), self::$whiteCharacters, true)) { 45 | $char = substr($string, $index, 1); 46 | if ($char === "\r") { 47 | $char = "\n"; 48 | } 49 | if ($this->char === null || $char === $this->char) { 50 | return ['node' => new \ParserGenerator\SyntaxTreeNode\Leaf(''), 'offset' => $fromIndex]; 51 | } 52 | } 53 | 54 | if ($index < 0) { 55 | return ['node' => new \ParserGenerator\SyntaxTreeNode\Leaf(''), 'offset' => $fromIndex]; 56 | } 57 | } 58 | 59 | return false; 60 | } 61 | 62 | public function __toString() 63 | { 64 | return (string)$this->char; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/Extension/WhiteCharactersContext.php: -------------------------------------------------------------------------------- 1 | eatWhiteChars = $eatWhiteChars; 20 | $this->caseInsensitive = $caseInsensitive; 21 | $this->givenRegex = $regex; 22 | if (preg_match('/\/(.*)\/([A-Za-z]*)/s', $regex, $match)) { 23 | $regexBody = $match[1]; 24 | $regexModifiers = $match[2]; 25 | if (strpos($regexModifiers, 'i') === false && $caseInsensitive) { 26 | $regexModifiers .= 'i'; 27 | } 28 | $this->regex = '/(' . $regexBody . ')?\s*/' . $regexModifiers; 29 | } else { 30 | throw new Exception("Wrong regex format [$regex]"); 31 | } 32 | } 33 | 34 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 35 | { 36 | if (preg_match($this->regex, $string, $match, 0, $fromIndex)) { 37 | if (isset($match[1])) { 38 | $offset = strlen($match[$this->eatWhiteChars ? 0 : 1]) + $fromIndex; 39 | if (!isset($restrictedEnd[$offset])) { 40 | $node = new \ParserGenerator\SyntaxTreeNode\Leaf($match[1], 41 | $this->eatWhiteChars ? substr($match[0], strlen($match[1])) : ''); 42 | 43 | if ($this->lastMatch < $fromIndex) { 44 | $this->lastMatch = $fromIndex; 45 | } 46 | return ['node' => $node, 'offset' => $offset]; 47 | } 48 | } 49 | } 50 | 51 | if ($this->lastNMatch < $fromIndex) { 52 | $this->lastNMatch = $fromIndex; 53 | } 54 | 55 | return false; 56 | } 57 | 58 | public function getRegex() 59 | { 60 | return $this->givenRegex; 61 | } 62 | 63 | public function __toString() 64 | { 65 | return $this->givenRegex; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/Examples/JSONFormater.php: -------------------------------------------------------------------------------- 1 | getJSONDefinition(), 12 | ['ignoreWhitespaces' => true, 'defaultBranchType' => 'PEG']); 13 | } 14 | 15 | protected function getJSONDefinition() 16 | { 17 | return ' 18 | start: => value. 19 | value:bool => ("true"|"false") 20 | :string => string 21 | :number => -inf..inf 22 | :array => "[" value*"," "]" 23 | :object => "{" objValue*"," "}". 24 | objValue: => key ":" value. 25 | key: => string 26 | '; 27 | } 28 | 29 | public function setObjectsPropertiesOrder($node) 30 | { 31 | $node->inPlaceTranslate('value:object', function ($node) { 32 | $node->getSubnode(1)->orderBy('key'); 33 | }); 34 | } 35 | 36 | public function setIndention($node, $indention = ' ', $start = "\n") 37 | { 38 | if ($node->getType() === 'start') { 39 | foreach ($node->getLeafs() as $leaf) { 40 | $leaf->setAfterContent((string)$leaf == ':' ? ' ' : ''); 41 | } 42 | 43 | return $this->setIndention($node->getSubnode(0), $indention, $start); 44 | } elseif ($node->getType() !== 'value') { 45 | throw new Exception('Function JSONFormater::setIndention can be used only on nodes with type start or value'); 46 | } 47 | 48 | if ($node->getDetailType() == 'array' || $node->getDetailType() == 'object') { 49 | $node->getSubnode(0)->setAfterContent($start . $indention); 50 | 51 | $collection = $node->getSubnode(1); 52 | 53 | foreach ($collection->getSeparators() as $separator) { 54 | $separator->setAfterContent($start . $indention); 55 | } 56 | 57 | $collection->setAfterContent($start); 58 | 59 | foreach ($collection->getMainNodes() as $collectionNode) { 60 | if ($node->getDetailType() === 'array') { 61 | $this->setIndention($collectionNode, $indention, $start . $indention); 62 | } else { 63 | $this->setIndention($collectionNode->getSubnode(2), $indention, $start . $indention); 64 | } 65 | } 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /tests/ParsedNodes/PredefinedStringTest.php: -------------------------------------------------------------------------------- 1 | assertEquals($apo, $x->getValue()); 18 | $this->assertEquals($slash . $apo, $x->getPHPValue()); 19 | 20 | $x = new \ParserGenerator\SyntaxTreeNode\PredefinedString($quot . $slash . $slash . $quot); 21 | $this->assertEquals($slash, $x->getValue()); 22 | $this->assertEquals($slash, $x->getPHPValue()); 23 | 24 | $x = new \ParserGenerator\SyntaxTreeNode\PredefinedString($quot . $slash . $quot . $quot); 25 | $this->assertEquals($quot, $x->getValue()); 26 | $this->assertEquals($quot, $x->getPHPValue()); 27 | 28 | $x = new \ParserGenerator\SyntaxTreeNode\PredefinedString($quot . $slash . "n" . $quot); 29 | $this->assertEquals("\n", $x->getValue()); 30 | $this->assertEquals("\n", $x->getPHPValue()); 31 | 32 | $x = new \ParserGenerator\SyntaxTreeNode\PredefinedString($quot . $slash . "t" . $quot); 33 | $this->assertEquals("\t", $x->getValue()); 34 | $this->assertEquals("\t", $x->getPHPValue()); 35 | 36 | 37 | $x = new \ParserGenerator\SyntaxTreeNode\PredefinedString($apo . $slash . $apo . $apo); 38 | $this->assertEquals($apo, $x->getValue()); 39 | $this->assertEquals($apo, $x->getPHPValue()); 40 | 41 | $x = new \ParserGenerator\SyntaxTreeNode\PredefinedString($apo . $slash . $slash . $apo); 42 | $this->assertEquals($slash, $x->getValue()); 43 | $this->assertEquals($slash, $x->getPHPValue()); 44 | 45 | $x = new \ParserGenerator\SyntaxTreeNode\PredefinedString($apo . $slash . $quot . $apo); 46 | $this->assertEquals($quot, $x->getValue()); 47 | $this->assertEquals($slash . $quot, $x->getPHPValue()); 48 | 49 | $x = new \ParserGenerator\SyntaxTreeNode\PredefinedString($apo . $slash . "n" . $apo); 50 | $this->assertEquals("\n", $x->getValue()); 51 | $this->assertEquals($slash . "n", $x->getPHPValue()); 52 | 53 | $x = new \ParserGenerator\SyntaxTreeNode\PredefinedString($apo . $slash . "t" . $apo); 54 | $this->assertEquals("\t", $x->getValue()); 55 | $this->assertEquals($slash . "t", $x->getPHPValue()); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/GrammarNode/ParametrizedNode.php: -------------------------------------------------------------------------------- 1 | abstractNode = $abstractNode; 17 | $this->params = $params; 18 | } 19 | 20 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 21 | { 22 | if (!$this->node) { 23 | $this->node = $this->createNode(); 24 | } 25 | 26 | return $this->node->rparse($string, $fromIndex, $restrictedEnd); 27 | } 28 | 29 | protected function createNode() 30 | { 31 | $params = $this->params; 32 | $parser = $this->parser; 33 | return GrammarNodeCopier::copy($this->abstractNode, function ($node) use ($params, $parser) { 34 | if ($node instanceof ErrorTrackDecorator) { 35 | $node = $node->getDecoratedNode(); 36 | } 37 | 38 | if ($node instanceof ParameterNode) { 39 | if (empty($params[$node->getIndex()])) { 40 | throw new Exception("Parameter " . $node->getParameterName() . " with index " . $node->getIndex() . " in branch " . $node->getBranchName() . " not provided"); 41 | } 42 | return $params[$node->getIndex()]; 43 | } 44 | 45 | if ($node instanceof LeafInterface) { 46 | return false; 47 | } 48 | 49 | if ($node instanceof BranchInterface) { 50 | $name = $node->getNodeName(); 51 | if (isset($parser->grammar[$name]) && $parser->grammar[$name] === $node) { 52 | return false; 53 | } 54 | } 55 | 56 | return true; 57 | }); 58 | } 59 | 60 | public function __toString() 61 | { 62 | return $this->abstractNode . '<' . implode(',', $this->params) . '>'; 63 | } 64 | 65 | public function setParser(\ParserGenerator\Parser $parser) 66 | { 67 | $this->parser = $parser; 68 | } 69 | 70 | public function getParser() 71 | { 72 | return $this->parser; 73 | } 74 | 75 | public function copy($callback) 76 | { 77 | $copy = new static($this->abstractNode, $callback($this->params)); 78 | $copy->setParser($this->getParser()); 79 | return $copy; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/Extension/Series.php: -------------------------------------------------------------------------------- 1 | buildSequenceItem($grammar, $sequenceItem->getSubnode(0), $options); 23 | if ($options['trackError']) { 24 | $main = new \ParserGenerator\GrammarNode\ErrorTrackDecorator($main); 25 | } 26 | 27 | if ($sequenceItem->getSubnode(4)) { 28 | $separator = $grammarParser->buildSequenceItem($grammar, $sequenceItem->getSubnode(4), $options); 29 | if ($options['trackError']) { 30 | $separator = new \ParserGenerator\GrammarNode\ErrorTrackDecorator($separator); 31 | } 32 | } else { 33 | $separator = null; 34 | } 35 | 36 | $forceGreedy = $options['defaultBranchType'] === BranchFactory::PEG; 37 | $operator = (string)$sequenceItem->getSubnode(2); 38 | switch ($operator) { 39 | case '++': 40 | case '**': 41 | case '+': 42 | case '*': 43 | $greedy = in_array($operator, ['**', '++']) || $forceGreedy; 44 | $node = new \ParserGenerator\GrammarNode\Series($main, $separator, 45 | in_array($operator, ['*', '**']), $greedy, $options['defaultBranchType']); 46 | $node->setParser($options['parser']); 47 | 48 | return $node; 49 | case '??': 50 | case '?': 51 | $toStringCallback = function($_, $choices) use ($operator) { return $choices[$operator == '??' ? 0 : 1] . $operator; }; 52 | 53 | $empty = new \ParserGenerator\GrammarNode\Text(''); 54 | $choices = ($operator == '??' || $forceGreedy) ? [$main, $empty] : [$empty, $main]; 55 | $node = new \ParserGenerator\GrammarNode\Choice($choices, $toStringCallback); 56 | 57 | $node->setParser($options['parser']); 58 | 59 | return $node; 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/SyntaxTreeNode/Series.php: -------------------------------------------------------------------------------- 1 | isWithSeparator = $isWithSeparator; 13 | } 14 | 15 | public function getMainNodes() 16 | { 17 | if (!$this->isWithSeparator) { 18 | return $this->subnodes; 19 | } 20 | 21 | $subnodesCount = count($this->subnodes); 22 | $result = []; 23 | 24 | for ($i = 0; $i < $subnodesCount; $i += 2) { 25 | $result[] = $this->subnodes[$i]; 26 | } 27 | 28 | return $result; 29 | } 30 | 31 | public function getSeparators() 32 | { 33 | if (!$this->isWithSeparator) { 34 | return []; 35 | } 36 | 37 | $subnodesCount = count($this->subnodes); 38 | $result = []; 39 | 40 | for ($i = 1; $i < $subnodesCount; $i += 2) { 41 | $result[] = $this->subnodes[$i]; 42 | } 43 | 44 | return $result; 45 | } 46 | 47 | public function isWithSeparator() 48 | { 49 | return $this->isWithSeparator; 50 | } 51 | 52 | public function orderBy($callback = null) 53 | { 54 | if (is_string($callback)) { 55 | $compareBy = $callback; 56 | $callback = function ($a, $b) use ($compareBy) { 57 | return strnatcmp((string)$a->findFirst($compareBy), (string)$b->findFirst($compareBy)); 58 | }; 59 | } elseif ($callback === null) { 60 | $callback = function ($a, $b) { 61 | return strnatcmp((string)$a, (string)$b); 62 | }; 63 | } 64 | 65 | $mainNodes = $this->getMainNodes(); 66 | usort($mainNodes, $callback); 67 | 68 | foreach ($mainNodes as $index => $node) { 69 | $this->subnodes[$index * 2] = $node; 70 | } 71 | } 72 | 73 | public function findFirstInMainNodes($type, $addNullValues = false) 74 | { 75 | $result = []; 76 | 77 | foreach ($this->getMainNodes() as $node) { 78 | if ($node instanceof \ParserGenerator\SyntaxTreeNode\Branch) { 79 | $value = $node->findFirst($type); 80 | } else { 81 | $value = null; 82 | } 83 | 84 | if ($value || $addNullValues) { 85 | $result[] = $value; 86 | } 87 | } 88 | 89 | return $result; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/GrammarNode/Choice.php: -------------------------------------------------------------------------------- 1 | choices = $choices; 15 | $this->tmpNodeName = '&choices/' . spl_object_hash($this); 16 | 17 | $this->grammarNode = new \ParserGenerator\GrammarNode\Branch($this->tmpNodeName); 18 | 19 | $node = []; 20 | foreach ($choices as $choice) { 21 | if (is_array($choice)) { 22 | $node[] = $choice; 23 | $this->reduce[] = false; 24 | } else { 25 | $node[] = [$choice]; 26 | $this->reduce[] = true; 27 | } 28 | }; 29 | 30 | $this->grammarNode->setNode($node); 31 | $this->toStringCallback = $toStringCallback; 32 | } 33 | 34 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 35 | { 36 | if ($rparseResult = $this->grammarNode->rparse($string, $fromIndex, $restrictedEnd)) { 37 | if ($this->reduce[$rparseResult['node']->getDetailType()]) { 38 | $rparseResult['node'] = $rparseResult['node']->getSubnode(0); 39 | } 40 | 41 | return $rparseResult; 42 | } 43 | 44 | return false; 45 | } 46 | 47 | public function getTmpNodeName() 48 | { 49 | return $this->tmpNodeName; 50 | } 51 | 52 | public function getNode() 53 | { 54 | return $this->grammarNode->getNode(); 55 | } 56 | 57 | public function setParser(\ParserGenerator\Parser $parser) 58 | { 59 | $this->parser = $parser; 60 | $this->grammarNode->setParser($parser); 61 | } 62 | 63 | public function __toString() 64 | { 65 | if ($this->toStringCallback) { 66 | $callback = $this->toStringCallback; 67 | return $callback($this, $this->choices); 68 | } 69 | 70 | $result = ''; 71 | foreach ($this->choices as $choice) { 72 | $result .= ($result ? '|' : '') . (is_array($choice) ? implode(" ", $choice) : $choice); 73 | } 74 | 75 | return '(' . $result . ')'; 76 | } 77 | 78 | public function copy($copyCallback) 79 | { 80 | $copy = new static($copyCallback($this->choices)); 81 | $copy->setParser($this->parser); 82 | return $copy; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /tests/Extension/IntegerTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 13 | } 14 | 15 | public function testIntExtension() 16 | { 17 | $x = new Parser('start :=> 2..6.'); 18 | $this->assertFalse($x->parse('0')); 19 | $this->assertFalse($x->parse('1')); 20 | $this->assertObject($x->parse('2')); 21 | $this->assertObject($x->parse('3')); 22 | $this->assertObject($x->parse('5')); 23 | $this->assertObject($x->parse('6')); 24 | $this->assertFalse($x->parse('7')); 25 | $this->assertFalse($x->parse('10')); 26 | $this->assertFalse($x->parse('20')); 27 | 28 | $this->assertFalse($x->parse('0x5')); 29 | $this->assertFalse($x->parse('05')); 30 | 31 | $x = new Parser('start :=> -inf..inf.'); 32 | $this->assertObject($x->parse('0')); 33 | $this->assertObject($x->parse('1')); 34 | $this->assertObject($x->parse('-1')); 35 | $this->assertObject($x->parse('26843562')); 36 | $this->assertObject($x->parse('-26843562')); 37 | 38 | //if we have rage in hex then hex is proper format 39 | $x = new Parser('start :=> 0x0..0xff.'); 40 | $this->assertObject($x->parse('10')); 41 | $this->assertObject($x->parse('0x5a')); 42 | $this->assertObject($x->parse('255')); 43 | $this->assertFalse($x->parse('256')); 44 | $this->assertFalse($x->parse('0b101')); 45 | $this->assertFalse($x->parse('051')); 46 | 47 | //if we have range with leading 0 then we require leading 0 48 | $x = new Parser('start :=> 01..31.'); 49 | $this->assertFalse($x->parse('5')); 50 | $this->assertObject($x->parse('05')); 51 | 52 | $x = new Parser('start :=> 1..31 .'); 53 | $this->assertFalse($x->parse('05')); 54 | $this->assertObject($x->parse('5')); 55 | 56 | $x = new Parser('start :=> 0..31 .'); 57 | $this->assertFalse($x->parse('05')); 58 | $this->assertObject($x->parse('5')); 59 | 60 | // option switcher "/" turn off all autooptions 61 | $x = new Parser('start :=> 01..31/d .'); 62 | $this->assertFalse($x->parse('05')); 63 | $this->assertObject($x->parse('5')); 64 | 65 | // "/" turn off even decimal format 66 | $x = new Parser('start :=> 0..32/h .'); 67 | $this->assertObject($x->parse('0x5')); 68 | $this->assertFalse($x->parse('0')); 69 | $this->assertFalse($x->parse('20')); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /tests/GrammarNodes/GrammarNodeNumericTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_array($node) && isset($node['node'])); 13 | $this->assertEquals($expected, (string)$node['node']); 14 | } 15 | 16 | public function testBasic() 17 | { 18 | $x = new Numeric(); 19 | 20 | $this->assertNodeEquals('5', $x->rparse('5b123vb', 0, [])); 21 | $this->assertNodeEquals('123', $x->rparse('5b123vb', 2, [])); 22 | $this->assertNodeEquals('-123', $x->rparse('5b-123vb', 2, [])); 23 | $this->assertFalse($x->rparse('ab123vb', 2, [5 => 5])); 24 | $this->assertNodeEquals('0', $x->rparse('ab0x3vb', 2, [])); 25 | $this->assertFalse($x->rparse('ab0x3vb', 2, [3 => 3])); 26 | } 27 | 28 | public function testBase() 29 | { 30 | $x = new Numeric(['formatHex' => true]); 31 | 32 | $this->assertNodeEquals('123', $x->rparse('ab123vb', 2, [])); 33 | $this->assertFalse($x->rparse('ab123vb', 2, [5 => 5])); 34 | $this->assertNodeEquals('0x3', $x->rparse('ab0x3vb', 2, [])); 35 | $this->assertNodeEquals('0', $x->rparse('ab0x3vb', 2, [5 => 5])); 36 | $this->assertFalse($x->rparse('ab0x3vb', 2, [3 => 3, 5 => 5])); 37 | } 38 | 39 | public function testMinMax() 40 | { 41 | $x = new Numeric([ 42 | 'formatHex' => true, 43 | 'formatBin' => true, 44 | 'min' => 7, 45 | 'max' => 250, 46 | ]); 47 | 48 | $this->assertFalse($x->rparse('-8', 0, [])); 49 | $this->assertFalse($x->rparse('6', 0, [])); 50 | $this->assertFalse($x->rparse('251', 0, [])); 51 | $this->assertFalse($x->rparse('1000', 0, [])); 52 | $this->assertFalse($x->rparse('5000', 0, [])); 53 | $this->assertNodeEquals('7', $x->rparse('7', 0, [])); 54 | $this->assertNodeEquals('90', $x->rparse('90', 0, [])); 55 | $this->assertNodeEquals('57', $x->rparse('57', 0, [])); 56 | $this->assertNodeEquals('250', $x->rparse('250', 0, [])); 57 | $this->assertNodeEquals('0x7', $x->rparse('0x7', 0, [])); 58 | $this->assertNodeEquals('0xfa', $x->rparse('0xfa', 0, [])); 59 | $this->assertFalse($x->rparse('0x250', 0, [])); 60 | $this->assertFalse($x->rparse('0xfb', 0, [])); 61 | $this->assertFalse($x->rparse('0x110', 0, [])); 62 | $this->assertNodeEquals('0b111', $x->rparse('0b111', 0, [])); 63 | $this->assertNodeEquals('0b11111010', $x->rparse('0b11111010', 0, [])); 64 | $this->assertFalse($x->rparse('0b11111011', 0, [])); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /tests/Examples/BooleanExpressionParserTest.php: -------------------------------------------------------------------------------- 1 | parser = new BooleanExpressionParser(); 18 | } 19 | 20 | /** 21 | * @param string $input 22 | * @dataProvider dataForParseSuccessful 23 | */ 24 | public function testParseSuccessful(string $input) 25 | { 26 | $result = $this->parser->parse($input); 27 | 28 | $this->assertNotFalse($result); 29 | } 30 | 31 | public static function dataForParseSuccessful(): array 32 | { 33 | return [ 34 | ['foo'], 35 | ['foo or bar'], 36 | ['"foo" or \'bar\''], 37 | ['foo or (bar and not baz)'], 38 | ['foo and bar'], 39 | ['foo\nor\n\tbar'], 40 | ['not foo or bar'], 41 | ['not foo and bar'], 42 | ['not (foo or bar)'], 43 | ['(a or (b or (c or (d or (e or f) and g))))'], 44 | ['foo and not bar'], 45 | ['foo OR NOT bar'], 46 | ['foo OR "NOT bar"'], 47 | ['"foo or" and bar'], 48 | ['"foo and (bar or baz)" or faz'], 49 | ['; or :'], 50 | ['{ or }'], 51 | ]; 52 | } 53 | 54 | /** 55 | * @param string $input 56 | * @param string $expectedError 57 | * @dataProvider dataForParseFail 58 | */ 59 | public function testParseFail(string $input, string $expectedError) 60 | { 61 | $result = $this->parser->parse($input); 62 | 63 | $this->assertFalse($result); 64 | 65 | $this->assertSame($expectedError, $this->parser->getException()->getMessage()); 66 | } 67 | 68 | public static function dataForParseFail(): array 69 | { 70 | return [ 71 | [ 72 | 'input' => 'foo or', 73 | 'error' => "line: 1, character: 7\nexpected: \"(\" or tokenNot or tokenKeyword or string\nEnd of string found.", 74 | ], 75 | [ 76 | 'input' => '"foo or', 77 | 'error' => "line: 1, character: 1\nexpected: \"(\" or tokenNot or tokenKeyword or string\nfound: \"foo or", 78 | ], 79 | [ 80 | 'input' => 'foo and (bar or baz', 81 | 'error' => "line: 1, character: 20\nexpected: \")\" or tokenAnd or tokenOr\nEnd of string found.", 82 | ], 83 | [ 84 | 'input' => 'foo not bar', 85 | 'error' => "line: 1, character: 5\nexpected: tokenAnd or tokenOr\nfound: not bar", 86 | ], 87 | ]; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/SyntaxTreeNode/Leaf.php: -------------------------------------------------------------------------------- 1 | content = $content; 13 | $this->afterContent = $afterContent; 14 | } 15 | 16 | public function getContent() 17 | { 18 | return $this->content; 19 | } 20 | 21 | public function setContent($newValue) 22 | { 23 | $this->content = $newValue; 24 | return $this; 25 | } 26 | 27 | public function dump($maxNestLevel = -1) 28 | { 29 | return $this->content; 30 | } 31 | 32 | public function toString($mode = \ParserGenerator\SyntaxTreeNode\Base::TO_STRING_NO_WHITESPACES) 33 | { 34 | switch ($mode) { 35 | case \ParserGenerator\SyntaxTreeNode\Base::TO_STRING_NO_WHITESPACES: 36 | return $this->content; 37 | case \ParserGenerator\SyntaxTreeNode\Base::TO_STRING_ORIGINAL: 38 | return $this->content . $this->afterContent; 39 | case \ParserGenerator\SyntaxTreeNode\Base::TO_STRING_REDUCED_WHITESPACES: 40 | $afterContent = ''; 41 | if (strlen($this->afterContent)) { 42 | $afterContent = ' '; 43 | } 44 | if (strpos($this->afterContent, "\t") !== false) { 45 | $afterContent = "\t"; 46 | } 47 | if (strpos($this->afterContent, "\n") !== false) { 48 | $afterContent = "\n"; 49 | } 50 | 51 | return $this->content . $afterContent; 52 | } 53 | } 54 | 55 | public function __toString() 56 | { 57 | return $this->content; 58 | } 59 | 60 | public function compare($anotherNode, $compareOptions = \ParserGenerator\SyntaxTreeNode\Base::COMPARE_DEFAULT) 61 | { 62 | if (!($anotherNode instanceof \ParserGenerator\SyntaxTreeNode\Leaf)) { 63 | return false; 64 | } 65 | 66 | if (($compareOptions & self::COMPARE_LEAF) && $this->content !== $anotherNode->content) { 67 | return false; 68 | } 69 | 70 | return true; 71 | } 72 | 73 | public function diff($anotherNode, $returnAsPair = true) 74 | { 75 | if ($this->content === $anotherNode->content) { 76 | return []; 77 | } else { 78 | if ($returnAsPair) { 79 | return [[$this, $anotherNode]]; 80 | } else { 81 | return [$this]; 82 | } 83 | } 84 | } 85 | 86 | public function getLeftLeaf() 87 | { 88 | return $this; 89 | } 90 | 91 | public function getRightLeaf() 92 | { 93 | return $this; 94 | } 95 | 96 | public function iterateWith($anotherNode, $callback) 97 | { 98 | $callback($this, $anotherNode); 99 | } 100 | 101 | public function isBranch() 102 | { 103 | return false; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/Extension/ParametrizedNode.php: -------------------------------------------------------------------------------- 1 | $parameterIndex */ 11 | protected $nodeParams; 12 | 13 | public function extendGrammar($grammarGrammar) 14 | { 15 | $this->nodeParams = []; 16 | 17 | $grammarGrammar['grammarBranch']['standard'] = $this->insert($grammarGrammar['grammarBranch']['standard'], 18 | ':branchName', ':branchParamsDef'); 19 | 20 | $grammarGrammar['branchParamsDef'] = [ 21 | ['<', ':branchParamsDefList', '>'], 22 | [''], 23 | ]; 24 | 25 | $grammarGrammar['branchParamsDefList'] = [ 26 | 'last' => [':branchName'], 27 | 'notLast' => [':branchName', ',', ':branchParamsDefList'], 28 | ]; 29 | 30 | $grammarGrammar['sequenceItem']['parametrizedNode'] = [ 31 | ':branchName', 32 | '<', 33 | ':parametrizedNodeParamsList', 34 | '>', 35 | ]; 36 | $grammarGrammar['parametrizedNodeParamsList'] = [ 37 | 'last' => [':sequenceItem'], 38 | 'notLast' => [':sequenceItem', ',', ':parametrizedNodeParamsList'], 39 | ]; 40 | 41 | return $grammarGrammar; 42 | } 43 | 44 | public function modifyBranches($grammar, $parsedGrammar, $grammarParser, $options) 45 | { 46 | foreach ($parsedGrammar->findAll('grammarBranch:standard') as $grammarBranch) { 47 | $name = (string)$grammarBranch->findFirst('branchName'); 48 | $i = 0; 49 | /* Note that in $nodeParams[$branchName] is set only for parametrized branch because 50 | * branchParamsDef/branchName exists only for parametrized branches 51 | */ 52 | foreach ($grammarBranch->findFirst('branchParamsDef')->findAll('branchName') as $branchName) { 53 | $this->nodeParams[$name][(string)$branchName] = new ParameterNode($i++, $name, (string)$branchName); 54 | } 55 | } 56 | 57 | return $grammar; 58 | } 59 | 60 | function buildSequenceItem(&$grammar, $sequenceItem, $grammarParser, $options) 61 | { 62 | if ($sequenceItem->getDetailType() === 'branch') { 63 | $branchNode = $sequenceItem->nearestOwner('grammarBranch:standard'); 64 | $branchName = $branchNode ? (string)$branchNode->findFirst('branchName') : null; 65 | if ($branchNode && isset($this->nodeParams[$branchName][(string)$sequenceItem])) { 66 | return $this->nodeParams[$branchName][(string)$sequenceItem]; 67 | } 68 | return null; 69 | } 70 | 71 | if ($sequenceItem->getDetailType() === 'parametrizedNode') { 72 | $params = []; 73 | foreach ($sequenceItem->findFirst('parametrizedNodeParamsList')->findAll('sequenceItem') as $param) { 74 | $params[] = $grammarParser->buildSequenceItem($grammar, $param, $options); 75 | } 76 | 77 | $node = new GrammarNode($grammar[(string)$sequenceItem->findFirst('branchName')], $params); 78 | $node->setParser($options['parser']); 79 | return $node; 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/GrammarNode/Lookahead.php: -------------------------------------------------------------------------------- 1 | lookaheadNode = $lookaheadNode; 15 | $this->mainNode = $mainNode; 16 | $this->before = $before; 17 | $this->positive = $positive; 18 | } 19 | 20 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 21 | { 22 | if ($this->mainNode === null) { 23 | if (isset($restrictedEnd[$fromIndex])) { 24 | return false; 25 | } 26 | 27 | $match = $this->lookaheadNode->rparse($string, $fromIndex, []) !== false; 28 | 29 | if ($match === $this->positive) { 30 | return ['node' => new \ParserGenerator\SyntaxTreeNode\Leaf(''), 'offset' => $fromIndex]; 31 | } else { 32 | return false; 33 | } 34 | } elseif ($this->before) { 35 | $match = $this->lookaheadNode->rparse($string, $fromIndex, []) !== false; 36 | 37 | if ($match !== $this->positive) { 38 | return false; 39 | } 40 | 41 | return $this->mainNode->rparse($string, $fromIndex, $restrictedEnd); 42 | } else { // !$this->before 43 | while ($rparseResult = $this->mainNode->rparse($string, $fromIndex, $restrictedEnd)) { 44 | $offset = $rparseResult['offset']; 45 | $match = $this->lookaheadNode->rparse($string, $offset, []) !== false; 46 | 47 | if ($match === $this->positive) { 48 | return $rparseResult; 49 | } else { 50 | $restrictedEnd[$offset] = $offset; 51 | } 52 | } 53 | 54 | return false; 55 | } 56 | } 57 | 58 | public function getUsedNodes($startWithOnly = false, $onlyPositive = false) 59 | { 60 | $result = []; 61 | if ((!$startWithOnly || $this->before) && (!$onlyPositive || $this->positive)) { 62 | $result[] = $this->lookaheadNode; 63 | } 64 | if ($this->mainNode !== null) { 65 | $result[] = $this->mainNode; 66 | } 67 | 68 | return $result; 69 | } 70 | 71 | public function __toString() 72 | { 73 | $lookaheadStr = ($this->positive ? '?' : '!') . $this->lookaheadNode; 74 | if ($this->mainNode === null) { 75 | return $lookaheadStr; 76 | } elseif ($this->before) { 77 | return $lookaheadStr . ' ' . $this->mainNode; 78 | } else { 79 | return $this->mainNode . ' ' . $lookaheadStr; 80 | } 81 | } 82 | 83 | public function copy($copyCallback) 84 | { 85 | $copy = clone $this; 86 | $copy->lookaheadNode = $copyCallback($this->lookaheadNode); 87 | $copy->mainNode = $copyCallback($this->mainNode); 88 | return $copy; 89 | } 90 | 91 | public function isPositive() 92 | { 93 | return $this->positive; 94 | } 95 | 96 | public function getLookaheadNode() 97 | { 98 | return $this->lookaheadNode; 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/GrammarNode/Branch.php: -------------------------------------------------------------------------------- 1 | nodeName = $nodeName; 18 | $this->nodeShortName = $nodeName; 19 | } 20 | 21 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 22 | { 23 | $cacheStr = $fromIndex . '-' . $this->nodeName . '-' . implode(',', $restrictedEnd); 24 | $lastResult = 31; 25 | 26 | if (isset($this->parser->cache[$cacheStr])) { 27 | if (is_int($this->parser->cache[$cacheStr])) { 28 | $this->parser->cache[$cacheStr] = false; 29 | } else { 30 | return $this->parser->cache[$cacheStr]; 31 | } 32 | } else { 33 | $this->parser->cache[$cacheStr] = 0; 34 | } 35 | beforeForeach: 36 | foreach ($this->node as $_optionIndex => $option) { 37 | $subnodes = []; 38 | $optionIndex = 0; 39 | $indexes = [-1 => $fromIndex]; 40 | $optionCount = count($option); 41 | //!!! TODO: 42 | $restrictedEnds = array_fill(0, $optionCount - 1, []); 43 | $restrictedEnds[$optionCount - 1] = $restrictedEnd; 44 | while (true) { 45 | $subNode = $option[$optionIndex]->rparse($string, $indexes[$optionIndex - 1], 46 | $restrictedEnds[$optionIndex]); 47 | if ($subNode) { 48 | $subNodeOffset = $subNode['offset']; 49 | $subnodes[$optionIndex] = $subNode['node']; 50 | $restrictedEnds[$optionIndex][$subNodeOffset] = $subNodeOffset; 51 | $indexes[$optionIndex] = $subNodeOffset; 52 | if (++$optionIndex === $optionCount) { 53 | break; 54 | }; 55 | } elseif ($optionIndex-- === 0) { 56 | continue 2; 57 | } 58 | } 59 | // match 60 | $index = $indexes[$optionCount - 1]; 61 | $node = new \ParserGenerator\SyntaxTreeNode\Branch($this->nodeShortName, $_optionIndex, $subnodes); 62 | $r = ['node' => $node, 'offset' => $index]; 63 | $this->parser->cache[$cacheStr] = $r; 64 | if ($r != $lastResult) { 65 | $lastResult = $r; 66 | goto beforeForeach; 67 | } 68 | return $r; 69 | } 70 | return false; 71 | } 72 | 73 | public function setParser(\ParserGenerator\Parser $parser) 74 | { 75 | $this->parser = $parser; 76 | } 77 | 78 | public function getParser() 79 | { 80 | return $this->parser; 81 | } 82 | 83 | public function setNode($node) 84 | { 85 | $this->node = $node; 86 | } 87 | 88 | public function getNode() 89 | { 90 | return $this->node; 91 | } 92 | 93 | public function getNodeName() 94 | { 95 | return $this->nodeName; 96 | } 97 | 98 | public function __toString() 99 | { 100 | return $this->getNodeName(); 101 | } 102 | 103 | public function copy($copyCallback) 104 | { 105 | $copy = clone $this; 106 | $copy->setNode($copyCallback($this->node)); 107 | $copy->nodeName = $this->nodeName . '&' . spl_object_hash($copy); 108 | return $copy; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ################# 2 | ## PHPStorm 3 | ################# 4 | .idea/ 5 | 6 | composer.lock 7 | vendor/ 8 | 9 | ################# 10 | ## Eclipse 11 | ################# 12 | 13 | *.pydevproject 14 | .project 15 | .metadata 16 | bin/ 17 | tmp/ 18 | *.tmp 19 | *.bak 20 | *.swp 21 | *~.nib 22 | local.properties 23 | .classpath 24 | .settings/ 25 | .loadpath 26 | 27 | # External tool builders 28 | .externalToolBuilders/ 29 | 30 | # Locally stored "Eclipse launch configurations" 31 | *.launch 32 | 33 | # CDT-specific 34 | .cproject 35 | 36 | # PDT-specific 37 | .buildpath 38 | 39 | ################# 40 | ## Netbeans 41 | ################# 42 | 43 | /nbproject 44 | 45 | ################# 46 | ## Visual Studio 47 | ################# 48 | 49 | ## Ignore Visual Studio temporary files, build results, and 50 | ## files generated by popular Visual Studio add-ons. 51 | 52 | # User-specific files 53 | *.suo 54 | *.user 55 | *.sln.docstates 56 | 57 | # Build results 58 | 59 | [Dd]ebug/ 60 | [Rr]elease/ 61 | x64/ 62 | build/ 63 | [Bb]in/ 64 | [Oo]bj/ 65 | 66 | # MSTest test Results 67 | [Tt]est[Rr]esult*/ 68 | [Bb]uild[Ll]og.* 69 | 70 | *_i.c 71 | *_p.c 72 | *.ilk 73 | *.meta 74 | *.obj 75 | *.pch 76 | *.pdb 77 | *.pgc 78 | *.pgd 79 | *.rsp 80 | *.sbr 81 | *.tlb 82 | *.tli 83 | *.tlh 84 | *.tmp 85 | *.tmp_proj 86 | *.log 87 | *.vspscc 88 | *.vssscc 89 | .builds 90 | *.pidb 91 | *.log 92 | *.scc 93 | 94 | # Visual C++ cache files 95 | ipch/ 96 | *.aps 97 | *.ncb 98 | *.opensdf 99 | *.sdf 100 | *.cachefile 101 | 102 | # Visual Studio profiler 103 | *.psess 104 | *.vsp 105 | *.vspx 106 | 107 | # Guidance Automation Toolkit 108 | *.gpState 109 | 110 | # ReSharper is a .NET coding add-in 111 | _ReSharper*/ 112 | *.[Rr]e[Ss]harper 113 | 114 | # TeamCity is a build add-in 115 | _TeamCity* 116 | 117 | # DotCover is a Code Coverage Tool 118 | *.dotCover 119 | 120 | # NCrunch 121 | *.ncrunch* 122 | .*crunch*.local.xml 123 | 124 | # Installshield output folder 125 | [Ee]xpress/ 126 | 127 | # DocProject is a documentation generator add-in 128 | DocProject/buildhelp/ 129 | DocProject/Help/*.HxT 130 | DocProject/Help/*.HxC 131 | DocProject/Help/*.hhc 132 | DocProject/Help/*.hhk 133 | DocProject/Help/*.hhp 134 | DocProject/Help/Html2 135 | DocProject/Help/html 136 | 137 | # Click-Once directory 138 | publish/ 139 | 140 | # Publish Web Output 141 | *.Publish.xml 142 | *.pubxml 143 | 144 | # NuGet Packages Directory 145 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 146 | #packages/ 147 | 148 | # Windows Azure Build Output 149 | csx 150 | *.build.csdef 151 | 152 | # Windows Store app package directory 153 | AppPackages/ 154 | 155 | # Others 156 | sql/ 157 | *.Cache 158 | ClientBin/ 159 | [Ss]tyle[Cc]op.* 160 | ~$* 161 | *~ 162 | *.dbmdl 163 | *.[Pp]ublish.xml 164 | *.pfx 165 | *.publishsettings 166 | 167 | # RIA/Silverlight projects 168 | Generated_Code/ 169 | 170 | # Backup & report files from converting an old project file to a newer 171 | # Visual Studio version. Backup files are not needed, because we have git ;-) 172 | _UpgradeReport_Files/ 173 | Backup*/ 174 | UpgradeLog*.XML 175 | UpgradeLog*.htm 176 | 177 | # SQL Server files 178 | App_Data/*.mdf 179 | App_Data/*.ldf 180 | 181 | ############# 182 | ## Windows detritus 183 | ############# 184 | 185 | # Windows image file caches 186 | Thumbs.db 187 | ehthumbs.db 188 | 189 | # Folder config file 190 | Desktop.ini 191 | 192 | # Recycle Bin used on file shares 193 | $RECYCLE.BIN/ 194 | 195 | # Mac crap 196 | .DS_Store 197 | 198 | 199 | ############# 200 | ## Python 201 | ############# 202 | 203 | *.py[co] 204 | 205 | # Packages 206 | *.egg 207 | *.egg-info 208 | dist/ 209 | build/ 210 | eggs/ 211 | parts/ 212 | var/ 213 | sdist/ 214 | develop-eggs/ 215 | .installed.cfg 216 | 217 | # Installer logs 218 | pip-log.txt 219 | 220 | # Unit test / coverage reports 221 | .coverage 222 | .tox 223 | 224 | #Translations 225 | *.mo 226 | 227 | #Mr Developer 228 | .mr.developer.cfg 229 | -------------------------------------------------------------------------------- /tests/Extension/ChoiceTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 17 | } 18 | 19 | public function testBase() 20 | { 21 | $x = new Parser('start :=> (abc | "z" | /[qwe]/) "." . 22 | abc :=> "abc".'); 23 | 24 | $this->assertFalse($x->parse('ax.')); 25 | $this->assertFalse($x->parse('a.')); 26 | $this->assertFalse($x->parse('.')); 27 | $this->assertFalse($x->parse('')); 28 | $this->assertObject($x->parse('abc.')); 29 | $this->assertObject($x->parse('z.')); 30 | $this->assertObject($x->parse('w.')); 31 | } 32 | 33 | public function testChoiceSchouldNotCreateNewLevelInResult() 34 | { 35 | $x = new Parser('start :=> (abc | "z" | /[qwe]/) "." . 36 | abc :=> "abc".'); 37 | 38 | $this->assertEquals(new Root('start', 0, [ 39 | new Branch('abc', 0, 40 | [new Leaf('abc')]), 41 | new Leaf('.'), 42 | ]), $x->parse("abc.")); 43 | } 44 | 45 | public function testOnAmbigousGrammarChoiceSchouldPickFirstOption() 46 | { 47 | $x = new Parser('start :=> ("a" | "b" | "bc") /.*/ .'); 48 | 49 | $this->assertEquals(new Root('start', 0, [ 50 | new Leaf('b'), 51 | new Leaf('cd'), 52 | ]), $x->parse('bcd')); 53 | } 54 | 55 | public function testWithSeries() 56 | { 57 | $x = new Parser('start :=> (a | b)+ . 58 | a :=> "a". 59 | b :=> "b".'); 60 | 61 | $this->assertFalse($x->parse('')); 62 | $this->assertFalse($x->parse('abc')); 63 | 64 | $this->assertEquals(new Root('start', 0, [ 65 | new Series('list', '', [ 66 | new Branch('a', 0, 67 | [new Leaf('a')]), 68 | new Branch('b', 0, 69 | [new Leaf('b')]), 70 | new Branch('a', 0, 71 | [new Leaf('a')]), 72 | new Branch('b', 0, 73 | [new Leaf('b')]), 74 | new Branch('b', 0, [new Leaf('b')]), 75 | ], false), 76 | ]), $x->parse('ababb')); 77 | } 78 | 79 | public function testSeries() 80 | { 81 | $x = new Parser('start :=> ("a" | "b" "c" | "c" ?"d" | "d"++) /.+/.'); 82 | 83 | $this->assertEquals(new Root('start', 0, [ 84 | new Leaf('a'), 85 | new Leaf('a'), 86 | ]), $x->parse('aa')); 87 | 88 | $this->assertEquals(new Root('start', 0, [ 89 | new Leaf('c'), 90 | new Leaf('de'), 91 | ]), $x->parse('cde')); 92 | 93 | $parsed = $x->parse('bce'); 94 | $this->assertTrue((bool)$parsed->getSubnode(0)->getType()); 95 | $parsed->getSubnode(0)->setType(''); 96 | 97 | $this->assertEquals(new Root('start', 0, [ 98 | new Branch('', 1, [ 99 | new Leaf('b'), 100 | new Leaf('c'), 101 | ]), 102 | new Leaf('e'), 103 | ]), $parsed); 104 | 105 | $this->assertEquals(new Root('start', 0, [ 106 | new Series('list', 'd', [ 107 | new Leaf('d'), 108 | new Leaf('d'), 109 | ]), 110 | new Leaf('e'), 111 | ]), $x->parse('dde')); 112 | 113 | $this->assertEquals(new Root('start', 0, [ 114 | new Series('list', 'd', [ 115 | new Leaf('d'), 116 | ]), 117 | new Leaf('e'), 118 | ]), $x->parse('de')); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/Extension/ItemRestrictions.php: -------------------------------------------------------------------------------- 1 | getNS('', false)] = [ 24 | [ 25 | ':sequenceItem', 26 | $this->getNS('condition'), 27 | ], 28 | ]; 29 | 30 | $grammarGrammar[$this->getNS('condition', false)] = [ 31 | [$this->getNS('conditionAnd'), 'or', $this->getNS('condition')], 32 | 'last' => [$this->getNS('conditionAnd')], 33 | ]; 34 | 35 | $grammarGrammar[$this->getNS('conditionAnd', false)] = [ 36 | [$this->getNS('simpleCondition'), 'and', $this->getNS('conditionAnd')], 37 | 'last' => [$this->getNS('simpleCondition')], 38 | ]; 39 | 40 | $grammarGrammar[$this->getNS('simpleCondition', false)] = [ 41 | 'bracket' => ['(', $this->getNS('condition'), ':comments', ')'], 42 | 'not' => ['not', $this->getNS('simpleCondition')], 43 | 'contain' => ['contain', ':sequenceItem'], 44 | 'is' => ['is', ':sequenceItem'], 45 | ]; 46 | 47 | return parent::extendGrammar($grammarGrammar); 48 | } 49 | 50 | protected function getNS($node = '', $addColon = true) 51 | { 52 | return ($addColon ? ':' : '') . static::_NAMESPACE . ($node ? '_' . $node : ''); 53 | } 54 | 55 | protected function getGrammarGrammarSequence() 56 | { 57 | return [$this->getNS('')]; 58 | } 59 | 60 | protected function _buildSequenceItem(&$grammar, $sequenceItem, $grammarParser, $options) 61 | { 62 | $this->itemBuilderCallback = function ($sequenceItem) use (&$grammar, $grammarParser, $options) { 63 | return $grammarParser->buildSequenceItem($grammar, $sequenceItem, $options); 64 | }; 65 | 66 | $grammarNode = $grammarParser->buildSequenceItem($grammar, $sequenceItem->getSubnode(0)->getSubnode(0), 67 | $options); 68 | $condition = $this->buildCondition($sequenceItem->getSubnode(0)->getSubnode(1)); 69 | 70 | return new \ParserGenerator\GrammarNode\ItemRestrictions($grammarNode, $condition); 71 | } 72 | 73 | protected function buildCondition($node) 74 | { 75 | switch ($node->getType()) { 76 | case $this->getNS('condition', false): 77 | if ($node->getDetailType() === 'last') { 78 | return $this->buildCondition($node->getSubnode(0)); 79 | } else { 80 | return new ItemRestrictionOr([ 81 | $this->buildCondition($node->getSubnode(0)), 82 | $this->buildCondition($node->getSubnode(2)), 83 | ]); 84 | } 85 | 86 | case $this->getNS('conditionAnd', false): 87 | if ($node->getDetailType() === 'last') { 88 | return $this->buildCondition($node->getSubnode(0)); 89 | } else { 90 | return new ItemRestrictionAnd([ 91 | $this->buildCondition($node->getSubnode(0)), 92 | $this->buildCondition($node->getSubnode(2)), 93 | ]); 94 | } 95 | 96 | case $this->getNS('simpleCondition', false): 97 | switch ($node->getDetailType()) { 98 | case 'bracket': 99 | return $this->buildCondition($node->getSubnode(1)); 100 | 101 | case 'not': 102 | return new ItemRestrictionNot($this->buildCondition($node->getSubnode(1))); 103 | 104 | case 'contain': 105 | $itemBuilder = $this->itemBuilderCallback; 106 | 107 | return new Contain($itemBuilder($node->getSubnode(1))); 108 | 109 | case 'is': 110 | $itemBuilder = $this->itemBuilderCallback; 111 | 112 | return new Is($itemBuilder($node->getSubnode(1))); 113 | } 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/Extension/ExtensionInterface.php: -------------------------------------------------------------------------------- 1 | "bbb"^.) 19 | * Here is the implementation 20 | * 21 | * $noWhiteChar = new \ParserGenerator\GrammarNode\WhitespaceNegativeContextCheck(null); 22 | * $grammarGrammar['sequenceItem']['caret'] = [':sequenceItem', $noWhiteChar, '^']; 23 | * return $grammarGrammar; 24 | */ 25 | function extendGrammar($grammarGrammar); 26 | 27 | /** 28 | * It allows to add,remove and change branches for builded parser 29 | * it is invoked before node property is populated with real values 30 | * 31 | * @param \ParserGenerator\GrammarNode\NodeInterface[$nodeName] $grammar grammar being build 32 | * @param \ParserGenerator\SyntaxTreeNode\Base $parsedGrammar 33 | * @param \ParserGenerator\GrammarParser $grammarParser 34 | * @param array $options 35 | * 36 | * @return \ParserGenerator\GrammarNode\NodeInterface[$nodeName] (it schould return $grammar param with applied changes) 37 | */ 38 | function modifyBranches($grammar, $parsedGrammar, $grammarParser, $options); 39 | 40 | /** 41 | * Invoked when branch other than standard found (It should create object for new branch) 42 | * 43 | * @param \ParserGenerator\GrammarNode\NodeInterface[$nodeName] $grammar grammar being build 44 | * @param \ParserGenerator\SyntaxTreeNode\Base $grammarBranch 45 | * @param \ParserGenerator\GrammarParser $grammarParser 46 | * @param array $options 47 | * 48 | * @return \ParserGenerator\GrammarNode\NodeInterface[$nodeName] (it schould return $grammar param with applied changes) 49 | */ 50 | function createGrammarBranch($grammar, $grammarBranch, $grammarParser, $options); 51 | 52 | /** 53 | * Invoked when branch other than standard found (It should populate new branch with 'node' values) 54 | * 55 | * @param \ParserGenerator\GrammarNode\NodeInterface[$nodeName] $grammar grammar being build 56 | * @param \ParserGenerator\SyntaxTreeNode\Base $grammarBranch 57 | * @param \ParserGenerator\GrammarParser $grammarParser 58 | * @param array $options 59 | * 60 | * @return \ParserGenerator\GrammarNode\NodeInterface[$nodeName] (it schould return $grammar param with applied changes) 61 | */ 62 | function fillGrammarBranch($grammar, $grammarBranch, $grammarParser, $options); 63 | 64 | /** 65 | * Invoked when sequenceItem other than standard found 66 | * If this extension can build sequenceItem from provided $sequenceItem then proper 67 | * \ParserGenerator\GrammarNode\NodeInterface should be returned 68 | * if provided $sequenceItem is not supported by this extension then this function should return false 69 | * 70 | * It is expected to support types added by extendGrammar method 71 | * 72 | * Example implementation (continuation of example from extendGrammar method) 73 | * 74 | * if($sequenceItem->getDetailType() === 'caret') { 75 | * $main = $grammarParser->buildSequenceItem($grammar, $sequenceItem->getSubnode(0), $options); 76 | * return new CaretGrammarNode($main); 77 | * } else { 78 | * return false; 79 | * } 80 | * 81 | * @param \ParserGenerator\GrammarNode\NodeInterface[$nodeName] $grammar grammar being build 82 | * @param \ParserGenerator\SyntaxTreeNode\Base $sequenceItem 83 | * @param \ParserGenerator\GrammarParser $grammarParser 84 | * @param array $options 85 | * 86 | * @return \ParserGenerator\GrammarNode\NodeInterface|false 87 | */ 88 | function buildSequenceItem(&$grammar, $sequenceItem, $grammarParser, $options); 89 | 90 | function buildSequence($grammar, $rule, $grammarParser, $options); 91 | } -------------------------------------------------------------------------------- /tests/Examples/YamlLikeIndentationParserTest.php: -------------------------------------------------------------------------------- 1 | assertEquals(["a" => "x"], $parser->getValue("a:x")); 15 | $this->assertEquals([ 16 | "a" => "x", 17 | "b" => "y", 18 | ], $parser->getValue(" 19 | a:x 20 | b:y")); 21 | } 22 | 23 | public function testIndentsBase() 24 | { 25 | $parser = new YamlLikeIndentationParser(); 26 | 27 | $this->assertEquals([ 28 | "a" => [ 29 | "b" => "x", 30 | ], 31 | ], $parser->getValue(" 32 | a: 33 | b:x")); 34 | } 35 | 36 | public function testIndentsCanBeAnyNumberOfSpaces() 37 | { 38 | $parser = new YamlLikeIndentationParser(); 39 | 40 | $this->assertEquals([ 41 | "a" => [ 42 | "b" => "x", 43 | ], 44 | ], $parser->getValue(" 45 | a: 46 | b:x")); 47 | } 48 | 49 | public function testIndentsMultilevel() 50 | { 51 | $parser = new YamlLikeIndentationParser(); 52 | 53 | $this->assertEquals([ 54 | "a" => [ 55 | "b" => [ 56 | "c" => [ 57 | "d" => [ 58 | "e" => "x", 59 | ], 60 | ], 61 | ], 62 | ], 63 | ], $parser->getValue(" 64 | a: 65 | b: 66 | c: 67 | d: 68 | e:x")); 69 | } 70 | 71 | public function testIndentsSignificance() 72 | { 73 | $parser = new YamlLikeIndentationParser(); 74 | 75 | $this->assertEquals([ 76 | "a" => [ 77 | "b" => [ 78 | "c" => "x", 79 | "d" => "y", 80 | ], 81 | ], 82 | ], $parser->getValue(" 83 | a: 84 | b: 85 | c:x 86 | d:y")); 87 | 88 | $this->assertEquals([ 89 | "a" => [ 90 | "b" => [ 91 | "c" => "x", 92 | ], 93 | "d" => "y", 94 | ], 95 | ], $parser->getValue(" 96 | a: 97 | b: 98 | c:x 99 | d:y")); 100 | 101 | $this->assertEquals([ 102 | "a" => [ 103 | "b" => [ 104 | "c" => "x", 105 | ], 106 | ], 107 | "d" => "y", 108 | ], $parser->getValue(" 109 | a: 110 | b: 111 | c:x 112 | d:y")); 113 | 114 | // indentation level doesn't match 115 | $this->assertFalse($parser->getValue(" 116 | a: 117 | b: 118 | c:x 119 | d:y")); 120 | 121 | // indentation level doesn't match 122 | $this->assertFalse($parser->getValue(" 123 | a: 124 | b: 125 | c:x 126 | d:y")); 127 | } 128 | 129 | public function testComplexExample() 130 | { 131 | $parser = new YamlLikeIndentationParser(); 132 | 133 | $this->assertEquals([ 134 | "a" => [ 135 | "b" => [ 136 | "c" => "1", 137 | "d" => "2", 138 | ], 139 | "e" => [ 140 | "f" => [ 141 | "g" => "3", 142 | "h" => "4", 143 | ], 144 | ], 145 | "i" => "5", 146 | "j" => "6", 147 | "k" => [ 148 | "l" => [ 149 | "m" => "7", 150 | ], 151 | "n" => "8", 152 | "o" => "9", 153 | ], 154 | "p" => "10", 155 | ], 156 | ], $parser->getValue(" 157 | a: 158 | b: 159 | c:1 160 | d:2 161 | e: 162 | f: 163 | g:3 164 | h:4 165 | i:5 166 | j:6 167 | k: 168 | l: 169 | m:7 170 | n:8 171 | o:9 172 | p:10")); 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/GrammarNode/Series.php: -------------------------------------------------------------------------------- 1 | mainNode = $mainNode; 19 | $this->separator = $separator; 20 | $this->from0 = $from0; 21 | $this->greedy = $greedy; 22 | $this->tmpNodeName = '&series/' . spl_object_hash($this); 23 | $this->type = $type; 24 | 25 | $undecorated = \ParserGenerator\GrammarNode\Decorator::undecorate($mainNode); //$mainNode instanceof \ParserGenerator\GrammarNode\ErrorTrackDecorator ? $mainNode->getDecoratedNode() : $mainNode; 26 | 27 | if ($undecorated instanceof \ParserGenerator\GrammarNode\BranchInterface) { 28 | $this->resultDetailType = $undecorated->getNodeName(); 29 | } elseif ($undecorated instanceof \ParserGenerator\GrammarNode\Text) { 30 | $this->resultDetailType = $undecorated->getString(); 31 | } elseif ($undecorated instanceof \ParserGenerator\GrammarNode\Regex) { 32 | $this->resultDetailType = $undecorated->getRegex(); 33 | } elseif ($undecorated instanceof \ParserGenerator\GrammarNode\AnyText) { 34 | $this->resultDetailType = 'text'; 35 | } else { 36 | $this->resultDetailType = ''; 37 | } 38 | 39 | $this->node = BranchFactory::createBranch($this->type, $this->tmpNodeName); 40 | 41 | $ruleGo = $separator ? [$mainNode, $separator, $this->node] : [$mainNode, $this->node]; 42 | $ruleStop = [$mainNode]; 43 | 44 | if ($greedy) { 45 | $node = ['go' => $ruleGo, 'stop' => $ruleStop]; 46 | } else { 47 | $node = ['stop' => $ruleStop, 'go' => $ruleGo]; 48 | } 49 | 50 | $this->node->setNode($node); 51 | } 52 | 53 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 54 | { 55 | if ($this->from0 && !$this->greedy && !isset($restrictedEnd[$fromIndex])) { 56 | return [ 57 | 'node' => new \ParserGenerator\SyntaxTreeNode\Series($this->resultType, $this->resultDetailType, 58 | [], (bool)$this->separator), 59 | 'offset' => $fromIndex, 60 | ]; 61 | } 62 | 63 | if ($rparseResult = $this->node->rparse($string, $fromIndex, $restrictedEnd)) { 64 | $rparseResult['node'] = $this->getFlattenNode($rparseResult['node']); 65 | return $rparseResult; 66 | } 67 | 68 | if ($this->from0 && !isset($restrictedEnd[$fromIndex])) { 69 | return [ 70 | 'node' => new \ParserGenerator\SyntaxTreeNode\Series($this->resultType, $this->resultDetailType, 71 | [], (bool)$this->separator), 72 | 'offset' => $fromIndex, 73 | ]; 74 | } 75 | 76 | return false; 77 | } 78 | 79 | protected function getFlattenNode($ast) 80 | { 81 | $astSubnodes = []; 82 | while ($ast->getDetailType() == 'go') { 83 | $astSubnodes[] = $ast->getSubnode(0); 84 | if ($this->separator) { 85 | $astSubnodes[] = $ast->getSubnode(1); 86 | $ast = $ast->getSubnode(2); 87 | } else { 88 | $ast = $ast->getSubnode(1); 89 | } 90 | } 91 | $astSubnodes[] = $ast->getSubnode(0); 92 | 93 | return new \ParserGenerator\SyntaxTreeNode\Series($this->resultType, $this->resultDetailType, $astSubnodes, 94 | (bool)$this->separator); 95 | } 96 | 97 | public function getNode() 98 | { 99 | $node = $this->separator ? [[$this->mainNode, $this->separator]] : [[$this->mainNode]]; 100 | if ($this->from0) { 101 | $node[] = []; 102 | } 103 | return $node; 104 | } 105 | 106 | public function getMainNode() 107 | { 108 | return $this->mainNode; 109 | } 110 | 111 | public function __toString() 112 | { 113 | $op = [['+', '++'], ['*', '**']]; 114 | return $this->mainNode . $op[$this->from0][$this->greedy] . ($this->separator ?: ''); 115 | } 116 | 117 | public function copy($copyCallback) 118 | { 119 | $copy = new static($copyCallback($this->mainNode), $copyCallback($this->separator), $this->from0, 120 | $this->greedy, $this->type); 121 | $copy->setParser($this->getParser()); 122 | return $copy; 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/GrammarNode/Unorder.php: -------------------------------------------------------------------------------- 1 | separator = $separator; 18 | $this->resultType = $resultType; 19 | $this->setTmpNodeName(); 20 | } 21 | 22 | protected function setTmpNodeName() 23 | { 24 | $this->tmpNodeName = '&unorder/' . spl_object_hash($this); 25 | } 26 | 27 | public function addChoice($choice, $mod) 28 | { 29 | $this->choices[] = $choice; 30 | $this->mod[] = $mod; 31 | } 32 | 33 | protected function internalParse($string, $fromIndex, $restrictedEnd, $required, $left) 34 | { 35 | foreach ($this->choices as $key => $choice) { 36 | if ($left[$key] > 0) { 37 | $choiceRestrictedEnd = []; 38 | $isRequired = !empty($required[$key]); 39 | unset($required[$key]); 40 | $left[$key]--; 41 | while ($choiceResult = $choice->rparse($string, $fromIndex, $choiceRestrictedEnd)) { 42 | $afterChoiceIndex = $choiceResult['offset']; 43 | $separatorRestrictedEnd = []; 44 | while ($separatorResult = $this->separator->rparse($string, $afterChoiceIndex, 45 | $separatorRestrictedEnd)) { 46 | $afterSeparatorIndex = $separatorResult['offset']; 47 | if ($next = $this->internalParse($string, $afterSeparatorIndex, $restrictedEnd, $required, 48 | $left)) { 49 | array_push($next['nodes'], $separatorResult['node'], $choiceResult['node']); 50 | return $next; 51 | } 52 | 53 | $separatorRestrictedEnd[$afterSeparatorIndex] = $afterSeparatorIndex; 54 | } 55 | 56 | 57 | $choiceRestrictedEnd[$afterChoiceIndex] = $afterChoiceIndex; 58 | } 59 | 60 | if (empty($required)) { 61 | $choiceResult = $choice->rparse($string, $fromIndex, $restrictedEnd); 62 | if ($choiceResult) { 63 | return ['nodes' => [$choiceResult['node']], 'offset' => $choiceResult['offset']]; 64 | } 65 | } 66 | 67 | $left[$key]++; 68 | if ($isRequired) { 69 | $required[$key] = true; 70 | } 71 | } 72 | } 73 | 74 | return false; 75 | } 76 | 77 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 78 | { 79 | $required = []; 80 | foreach ($this->choices as $key => $choice) { 81 | $mod = $this->mod[$key]; 82 | $left[$key] = ($mod == '*' || $mod == '+') ? static::MAX : 1; 83 | if ($mod == '' || $mod == '1' || $mod == '+') { 84 | $required[$key] = 1; 85 | } 86 | } 87 | 88 | if ($result = $this->internalParse($string, $fromIndex, $restrictedEnd, $required, $left)) { 89 | $node = new \ParserGenerator\SyntaxTreeNode\Series($this->resultType, '', array_reverse($result['nodes']), 90 | true); 91 | return ['node' => $node, 'offset' => $result['offset']]; 92 | } 93 | 94 | return false; 95 | } 96 | 97 | public function getTmpNodeName() 98 | { 99 | return $this->tmpNodeName; 100 | } 101 | 102 | public function setParser(\ParserGenerator\Parser $parser) 103 | { 104 | $this->parser = $parser; 105 | foreach ($this->choices as $choice) { 106 | if ($choice instanceof \ParserGenerator\ParserAwareInterface) { 107 | $choice->setParser($parser); 108 | } 109 | } 110 | } 111 | 112 | public function getParser() 113 | { 114 | return $this->parser; 115 | } 116 | 117 | public function __toString() 118 | { 119 | return "unorder"; 120 | return '(' . implode(' | ', $this->choices) . ')'; 121 | } 122 | 123 | public function copy($copyCallback) 124 | { 125 | $result = clone $this; 126 | $result->separator = $copyCallback($this->separator); 127 | $result->choices = $copyCallback($this->choices); 128 | $result->setTmpNodeName(); 129 | return $result; 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/GrammarNode/Numeric.php: -------------------------------------------------------------------------------- 1 | $value) { 25 | if (in_array($key, ['min', 'max', 'requireFixedCharacters'], true)) { 26 | if (is_int($value)) { 27 | $this->$key = $value; 28 | } 29 | } 30 | if (in_array($key, 31 | ['formatDec', 'formatHex', 'formatOct', 'formatBin', 'eatWhiteChars', 'allowFixedCharacters'], 32 | true)) { 33 | if (is_bool($value)) { 34 | $this->$key = $value; 35 | } 36 | } 37 | } 38 | 39 | if (!$this->formatDec && !$this->formatHex && !$this->formatOct && !$this->formatBin) { 40 | throw new Exception('You must specify at least one proper format'); 41 | } 42 | 43 | if ($this->formatOct && $this->formatDec && ($this->requireFixedCharacters || $this->allowFixedCharacters)) { 44 | throw new Exception('options fixedCharacters and oct format canot be mixed together'); 45 | } 46 | 47 | $this->buildRegexes(); 48 | } 49 | 50 | protected function buildRegexes() 51 | { 52 | $this->regexes = []; 53 | 54 | if ($this->formatHex) { 55 | $this->regexes[16] = $this->buildRegexForBaseFormat('1-9a-fA-F', '0x'); 56 | } 57 | 58 | if ($this->formatBin) { 59 | $this->regexes[2] = $this->buildRegexForBaseFormat('1', '0b'); 60 | } 61 | 62 | if ($this->formatOct) { 63 | $this->regexes[8] = $this->buildRegexForBaseFormat('1-7', '0'); 64 | } 65 | 66 | if ($this->formatDec) { 67 | $this->regexes[10] = $this->buildRegexForBaseFormat('1-9', ''); 68 | } 69 | } 70 | 71 | protected function buildRegexForBaseFormat($charSet, $prefix) 72 | { 73 | return '/(' . $this->buildSubRegexForBaseFormat($charSet, $prefix) . ')?\s*/'; 74 | } 75 | 76 | protected function buildSubRegexForBaseFormat($charSet, $prefix) 77 | { 78 | if ($this->requireFixedCharacters > 0) { 79 | return '-?' . $prefix . '[0' . $charSet . ']{' . $this->requireFixedCharacters . '}'; 80 | } else { 81 | if ($this->allowFixedCharacters) { 82 | return '-?' . $prefix . '[0' . $charSet . ']+'; 83 | } else { 84 | return '-?' . $prefix . '([' . $charSet . '][0' . $charSet . ']*|0)'; 85 | } 86 | } 87 | } 88 | 89 | public function rparse($string, $fromIndex = 0, $restrictedEnd = []) 90 | { 91 | foreach ($this->regexes as $base => $regex) { 92 | if (preg_match($regex, $string, $match, 0, $fromIndex)) { 93 | if (isset($match[1])) { 94 | $offset = strlen($match[$this->eatWhiteChars ? 0 : 1]) + $fromIndex; 95 | if (!isset($restrictedEnd[$offset])) { 96 | $value = intval(str_replace(['0x', '0b'], ['', ''], $match[1]), $base); 97 | if (isset($this->min) && $value < $this->min) { 98 | return false; 99 | }; 100 | if (isset($this->max) && $value > $this->max) { 101 | return false; 102 | }; 103 | 104 | $node = new \ParserGenerator\SyntaxTreeNode\Numeric($match[1], $base); 105 | $node->setAfterContent(substr($match[0], strlen($match[1]))); 106 | return ['node' => $node, 'offset' => $offset]; 107 | } 108 | } 109 | } 110 | } 111 | 112 | return false; 113 | } 114 | 115 | public function __toString() 116 | { 117 | $modifiers = $this->requireFixedCharacters ? $this->requireFixedCharacters : ''; 118 | $modifiers .= $this->formatBin ? 'b' : ''; 119 | $modifiers .= $this->formatHex ? 'h' : ''; 120 | $modifiers .= $this->formatOct ? 'o' : ''; 121 | $modifiers .= $this->formatDec ? 'd' : ''; 122 | $modifiers = $modifiers == 'd' ? '' : ('/' . $modifiers); 123 | return (($this->min === null) ? '-inf' : $this->min) . '..' . (($this->max === null) ? 'inf' : $this->max) . $modifiers; 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /tests/Extension/UnorderTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 19 | } 20 | 21 | public function testBase() 22 | { 23 | $x = new Parser('start :=> unorder("", "a", "b").'); 24 | 25 | $this->assertObject($x->parse('ab')); 26 | $this->assertObject($x->parse('ba')); 27 | $this->assertFalse($x->parse('a')); 28 | $this->assertFalse($x->parse('b')); 29 | $this->assertFalse($x->parse('')); 30 | $this->assertFalse($x->parse('aba')); 31 | $this->assertFalse($x->parse('baba')); 32 | $this->assertFalse($x->parse('aa')); 33 | } 34 | 35 | public function testBaseWithThreeNodes() 36 | { 37 | $x = new Parser('start :=> unorder("", "a", "b", "c").'); 38 | 39 | $this->assertFalse($x->parse('ab')); 40 | $this->assertObject($x->parse('abc')); 41 | $this->assertObject($x->parse('acb')); 42 | $this->assertObject($x->parse('cab')); 43 | $this->assertObject($x->parse('cba')); 44 | $this->assertFalse($x->parse('aba')); 45 | $this->assertFalse($x->parse('ac')); 46 | } 47 | 48 | public function testSeparator() 49 | { 50 | $x = new Parser('start :=> unorder(",", "a", "b", "c").'); 51 | 52 | $this->assertFalse($x->parse('a,b')); 53 | $this->assertObject($x->parse('a,b,c')); 54 | $this->assertObject($x->parse('c,b,a')); 55 | 56 | $this->assertFalse($x->parse('ab')); 57 | $this->assertFalse($x->parse('abc')); 58 | $this->assertFalse($x->parse('cba')); 59 | } 60 | 61 | public function testFallback() 62 | { 63 | $x = new Parser('start :=> unorder("", ("abc" | "ab" | "a"), "b", "c").'); 64 | 65 | $this->assertObject($x->parse('abc')); 66 | $this->assertObject($x->parse('abcbc')); 67 | $this->assertObject($x->parse('abcb')); 68 | $this->assertFalse($x->parse('abcc')); 69 | $this->assertObject($x->parse('babc')); 70 | $this->assertFalse($x->parse('cabc')); 71 | } 72 | 73 | public function testNonTrivialExample() 74 | { 75 | $x = new Parser('start :=> unorder("", ("a"|"b"|"c"), ("b"|"c"|"d"), ("c"|"a"), ("b"|"d")).'); 76 | 77 | $this->assertObject($x->parse('caab')); 78 | $this->assertObject($x->parse('bcca')); 79 | $this->assertObject($x->parse('abbc')); 80 | $this->assertFalse($x->parse('accc')); 81 | $this->assertFalse($x->parse('ccca')); 82 | } 83 | 84 | public function testQModifier() 85 | { 86 | $x = new Parser('start :=> unorder("", ?"a", ?"b", ?"c").'); 87 | 88 | $this->assertObject($x->parse('abc')); 89 | $this->assertObject($x->parse('cba')); 90 | $this->assertObject($x->parse('a')); 91 | $this->assertObject($x->parse('c')); 92 | $this->assertObject($x->parse('bc')); 93 | $this->assertFalse($x->parse('')); 94 | $this->assertFalse($x->parse('aa')); 95 | $this->assertFalse($x->parse('aabbcc')); 96 | $this->assertFalse($x->parse('caa')); 97 | } 98 | 99 | public function testAModifier() 100 | { 101 | $x = new Parser('start :=> unorder("", +"a", +"b", +"c").'); 102 | 103 | $this->assertObject($x->parse('abc')); 104 | $this->assertObject($x->parse('cba')); 105 | $this->assertFalse($x->parse('a')); 106 | $this->assertFalse($x->parse('c')); 107 | $this->assertFalse($x->parse('bc')); 108 | $this->assertFalse($x->parse('')); 109 | $this->assertFalse($x->parse('aa')); 110 | $this->assertObject($x->parse('aabbcc')); 111 | $this->assertFalse($x->parse('caa')); 112 | $this->assertObject($x->parse('aacaaabb')); 113 | } 114 | 115 | public function testMModifier() 116 | { 117 | $x = new Parser('start :=> unorder("", *"a", *"b", *"c").'); 118 | 119 | $this->assertObject($x->parse('abc')); 120 | $this->assertObject($x->parse('cba')); 121 | $this->assertObject($x->parse('a')); 122 | $this->assertObject($x->parse('c')); 123 | $this->assertObject($x->parse('bc')); 124 | $this->assertFalse($x->parse('')); 125 | $this->assertObject($x->parse('aa')); 126 | $this->assertObject($x->parse('aabbcc')); 127 | $this->assertObject($x->parse('caa')); 128 | $this->assertObject($x->parse('aacaaabb')); 129 | } 130 | 131 | public function testMixedModifier() 132 | { 133 | $x = new Parser('start :=> unorder("", *"a", "b", ?"c").'); 134 | 135 | $this->assertObject($x->parse('abc')); 136 | $this->assertObject($x->parse('b')); 137 | $this->assertObject($x->parse('cb')); 138 | $this->assertFalse($x->parse('aac')); 139 | $this->assertObject($x->parse('aaba')); 140 | $this->assertFalse($x->parse('aabba')); 141 | $this->assertObject($x->parse('acaba')); 142 | $this->assertFalse($x->parse('baacac')); 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /tests/ParsedNodes/ParserNodeTest.php: -------------------------------------------------------------------------------- 1 | assertTrue($a == $b); 23 | $this->assertFalse($a === $b); 24 | 25 | $this->assertFalse($a->getSubnode(0) === $b->getSubnode(0)); 26 | $this->assertFalse($a->getSubnode(1) === $b->getSubnode(1)); 27 | } 28 | 29 | public function testToString() 30 | { 31 | $a = new Branch('a', 'b', [ 32 | new Branch('q', 'w', [ 33 | new Leaf('l1'), 34 | ]), 35 | new Leaf('l2'), 36 | ]); 37 | 38 | $this->assertEquals('l1l2', (string)$a); 39 | 40 | $a->getSubnode(0)->setSubnode(null, new Leaf('l3')); 41 | $this->assertEquals('l1l3l2', (string)$a); 42 | 43 | $a->setSubnode(null, new Leaf('l4')); 44 | $this->assertEquals('l1l3l2l4', (string)$a); 45 | 46 | $a->setSubnode(1, new Leaf('l5')); 47 | $this->assertEquals('l1l3l5l4', (string)$a); 48 | 49 | $a->setSubnode(2, clone $a); 50 | $this->assertEquals('l1l3l5l1l3l5l4', (string)$a); 51 | } 52 | 53 | public function testCompare() 54 | { 55 | $a = new Branch('a', 'b', [ 56 | new Branch('q', 'w', [ 57 | new Leaf('l1'), 58 | ]), 59 | new Leaf('l2'), 60 | ]); 61 | 62 | $b = clone $a; 63 | 64 | $this->assertTrue($a->compare($b)); 65 | 66 | $b->getSubnode(0)->setType('qq'); 67 | 68 | $this->assertFalse($a->compare($b)); 69 | $this->assertTrue($a->compare($b, 70 | \ParserGenerator\SyntaxTreeNode\Base::COMPARE_DEFAULT xor \ParserGenerator\SyntaxTreeNode\Base::COMPARE_CHILDREN_NORMAL)); 71 | 72 | $b = clone $a; 73 | $b->setDetailType(''); 74 | 75 | $this->assertFalse($a->compare($b)); 76 | $this->assertTrue($a->compare($b, 77 | \ParserGenerator\SyntaxTreeNode\Base::COMPARE_DEFAULT xor \ParserGenerator\SyntaxTreeNode\Base::COMPARE_SUBTYPE)); 78 | 79 | $b = clone $a; 80 | $b->setType(''); 81 | 82 | $this->assertFalse($a->compare($b)); 83 | $this->assertTrue($a->compare($b, 84 | \ParserGenerator\SyntaxTreeNode\Base::COMPARE_DEFAULT xor \ParserGenerator\SyntaxTreeNode\Base::COMPARE_TYPE)); 85 | 86 | $b = clone $a; 87 | $b->getSubnode(0)->getSubnode(0)->setContent('lx'); 88 | 89 | $this->assertFalse($a->compare($b)); 90 | $this->assertTrue($a->compare($b, 91 | \ParserGenerator\SyntaxTreeNode\Base::COMPARE_DEFAULT xor \ParserGenerator\SyntaxTreeNode\Base::COMPARE_LEAF)); 92 | } 93 | 94 | protected function getTestNode1() 95 | { 96 | return new Branch('a', '', [ 97 | new Branch('b', '', [ 98 | new Leaf('l1'), 99 | new Branch('b', '', [ 100 | new Branch('c', '', [ 101 | new Leaf('l2'), 102 | ]), 103 | new Branch('b', '', [ 104 | new Leaf('l3'), 105 | new Leaf('l4'), 106 | ]), 107 | ]), 108 | ]), 109 | new Leaf('l2'), 110 | new Branch('c', '', [ 111 | new Branch('b', '', []), 112 | new Branch('d', '', [ 113 | new Leaf('l5'), 114 | ]), 115 | ]), 116 | ]); 117 | } 118 | 119 | public function testFindAll() 120 | { 121 | $a = $this->getTestNode1(); 122 | 123 | $this->assertEquals([$a], $a->findAll('a')); 124 | 125 | $this->assertEquals([ 126 | $a->getSubnode(0)->getSubnode(1)->getSubnode(0), 127 | $a->getSubnode(2), 128 | ], $a->findAll('c')); 129 | 130 | $this->assertEquals([ 131 | $a->getSubnode(2)->getSubnode(1), 132 | ], $a->findAll('d')); 133 | 134 | $this->assertEquals([], $a->findAll('nonExistingType')); 135 | $this->assertEquals([], $a->findAll('nonExistingType', true)); 136 | $this->assertEquals([], $a->findAll('nonExistingType', true, true)); 137 | 138 | $this->assertEquals([ 139 | $a->getSubnode(0), 140 | $a->getSubnode(2)->getSubnode(0), 141 | ], $a->findAll('b')); 142 | 143 | $this->assertEquals([ 144 | $a->getSubnode(0), 145 | $a->getSubnode(0)->getSubnode(1), 146 | $a->getSubnode(0)->getSubnode(1)->getSubnode(1), 147 | $a->getSubnode(2)->getSubnode(0), 148 | ], $a->findAll('b', true)); 149 | 150 | $this->assertEquals([ 151 | $a->getSubnode(0)->getSubnode(1)->getSubnode(1), 152 | $a->getSubnode(0)->getSubnode(1), 153 | $a->getSubnode(0), 154 | $a->getSubnode(2)->getSubnode(0), 155 | ], $a->findAll('b', true, true)); 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/Extension/Integer.php: -------------------------------------------------------------------------------- 1 | getNS(null, false)] = [ 12 | [ 13 | $this->getNS('LowBound'), 14 | new \ParserGenerator\GrammarNode\Text('..'), 15 | $this->getNS('HiBound'), 16 | $this->getNs('modifiers'), 17 | '', 18 | ], 19 | ]; 20 | 21 | $grammarGrammar[$this->getNS('LowBound', false)] = [ 22 | [new \ParserGenerator\GrammarNode\Text('-inf')], 23 | [new \ParserGenerator\GrammarNode\Text('-infinity')], 24 | 'int' => [ 25 | new \ParserGenerator\GrammarNode\Numeric([ 26 | 'formatHex' => true, 27 | 'formatBin' => true, 28 | 'allowFixedCharacters' => true, 29 | ]), 30 | ], 31 | ]; 32 | $grammarGrammar[$this->getNS('HiBound', false)] = [ 33 | [new \ParserGenerator\GrammarNode\Text('inf')], 34 | [new \ParserGenerator\GrammarNode\Text('infinity')], 35 | 'int' => [ 36 | new \ParserGenerator\GrammarNode\Numeric([ 37 | 'formatHex' => true, 38 | 'formatBin' => true, 39 | 'allowFixedCharacters' => true, 40 | ]), 41 | ], 42 | ]; 43 | 44 | $grammarGrammar[$this->getNS('modifiers', false)] = [ 45 | [new \ParserGenerator\GrammarNode\Text('/'), $this->getNS('modifierList')], 46 | [''], 47 | ]; 48 | 49 | $grammarGrammar[$this->getNS('modifierList', false)] = [ 50 | [$this->getNS('modifier'), $this->getNS('modifierList')], 51 | [$this->getNS('modifier')], 52 | ]; 53 | 54 | $grammarGrammar[$this->getNS('modifier', false)] = [ 55 | 'formatHex' => [new \ParserGenerator\GrammarNode\Text('h')], 56 | 'formatDec' => [new \ParserGenerator\GrammarNode\Text('d')], 57 | 'formatOct' => [new \ParserGenerator\GrammarNode\Text('o')], 58 | 'formatBin' => [new \ParserGenerator\GrammarNode\Text('b')], 59 | 'fixed' => [new \ParserGenerator\GrammarNode\Regex('/\d+/')], 60 | ]; 61 | 62 | return parent::extendGrammar($grammarGrammar); 63 | } 64 | 65 | protected function getNS($node = '', $addColon = true) 66 | { 67 | return ($addColon ? ':' : '') . static::_NAMESPACE . ($node ? '_' . $node : ''); 68 | } 69 | 70 | protected function getGrammarGrammarSequence() 71 | { 72 | return [$this->getNS('')]; 73 | } 74 | 75 | protected function _buildSequenceItem(&$grammar, $sequenceItem, $grammarParser, $options) 76 | { 77 | $numericOptions = []; 78 | $numericOptions['eatWhiteChars'] = $options['ignoreWhitespaces']; 79 | 80 | $item = $sequenceItem->getSubnode(0); 81 | $min = $item->getSubnode(0); 82 | $max = $item->getSubnode(2); 83 | $modifiers = $item->findAll($this->getNS('modifier', false)); 84 | 85 | if ($min->getDetailType() === 'int') { 86 | $numericOptions['min'] = $min->getSubnode(0)->getValue(); 87 | 88 | if (!count($modifiers)) { 89 | if ($min->getSubnode(0)->getFixedCharacters()) { 90 | $numericOptions['requireFixedCharacters'] = $min->getSubnode(0)->getFixedCharacters(); 91 | } 92 | 93 | if ($min->getSubnode(0)->getBase() === 16) { 94 | $numericOptions['formatHex'] = true; 95 | } 96 | 97 | if ($min->getSubnode(0)->getBase() === 2) { 98 | $numericOptions['formatBin'] = true; 99 | } 100 | } 101 | } 102 | 103 | if ($max->getDetailType() === 'int') { 104 | $numericOptions['max'] = $max->getSubnode(0)->getValue(); 105 | 106 | if (!count($modifiers)) { 107 | if ($max->getSubnode(0)->getFixedCharacters()) { 108 | $numericOptions['requireFixedCharacters'] = $max->getSubnode(0)->getFixedCharacters(); 109 | } 110 | 111 | if ($max->getSubnode(0)->getBase() === 16) { 112 | $numericOptions['formatHex'] = true; 113 | } 114 | 115 | if ($max->getSubnode(0)->getBase() === 2) { 116 | $numericOptions['formatBin'] = true; 117 | } 118 | } 119 | } 120 | 121 | if (count($modifiers)) { 122 | $numericOptions['formatDec'] = false; 123 | } 124 | 125 | foreach ($modifiers as $modifier) { 126 | if (in_array($modifier->getDetailType(), ['formatDec', 'formatHex', 'formatOct', 'formatBin'])) { 127 | $numericOptions[$modifier->getDetailType()] = true; 128 | } elseif ($modifier->getDetailType() === 'fixed') { 129 | if ((string)$modifier === '0') { 130 | $numericOptions['allowFixedCharacters'] = true; 131 | } else { 132 | $numericOptions['requireFixedCharacters'] = (int)(string)$modifier; 133 | } 134 | } 135 | } 136 | 137 | return new \ParserGenerator\GrammarNode\Numeric($numericOptions); 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /tests/Extension/RuleConditionTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 13 | } 14 | 15 | public function testIntegers() 16 | { 17 | $x = new Parser('start :=> 1..100 1..100 getValue() < $s[1]->getValue() ?>.', 18 | ['ignoreWhitespaces' => true]); 19 | $this->assertObject($x->parse('36 45')); 20 | $this->assertObject($x->parse('1 100')); 21 | $this->assertObject($x->parse('1 2')); 22 | $this->assertObject($x->parse('78 79')); 23 | $this->assertFalse($x->parse('100 1')); 24 | $this->assertFalse($x->parse('36 12')); 25 | $this->assertFalse($x->parse('36 7')); 26 | $this->assertFalse($x->parse('5 5')); 27 | } 28 | 29 | public function testValidSubXML() 30 | { 31 | $x = new Parser('start :=> xmlTag. 32 | xmlText :=> /[^<>]+/. 33 | xmlTag :=> "<" /[a-z]+/ ">" xmlNodes "" 34 | :=> "<" /[a-z]+/ "/>". 35 | xmlNodes :=> xmlNode xmlNodes 36 | :=> "". 37 | xmlNode :=> xmlTag 38 | :=> xmlText.'); 39 | 40 | $this->assertFalse($x->parse('text')); 41 | $this->assertFalse($x->parse('text')); 42 | $this->assertFalse($x->parse('text')); 43 | $this->assertObject($x->parse('')); 44 | $this->assertObject($x->parse('texttttext')); 45 | $this->assertObject($x->parse('')); 46 | $this->assertObject($x->parse('')); 47 | $this->assertObject($x->parse('texttext2')); 48 | $this->assertObject($x->parse('text')); 49 | $this->assertObject($x->parse('text')); 50 | $this->assertObject($x->parse('texttext2text')); 51 | //these strings test should fail: 52 | $this->assertFalse($x->parse('')); 53 | $this->assertFalse($x->parse('')); 54 | $this->assertFalse($x->parse('')); 55 | $this->assertFalse($x->parse('')); 56 | $this->assertFalse($x->parse('')); 57 | } 58 | 59 | public function testInvalidSubXML() 60 | { 61 | $x = new Parser('start :=> xmlTag. 62 | xmlText :=> /[^<>]+/. 63 | xmlTag :=> "<" /[a-z]+/ ">" xmlNodes "" 64 | :=> "<" /[a-z]+/ "/>" 65 | :=> "<" /[a-z]+/ ">" xmlNodes. 66 | xmlNodes :=> xmlNode xmlNodes 67 | :=> "". 68 | xmlNode :=> xmlTag 69 | :=> xmlText.'); 70 | 71 | $this->assertObject($x->parse('text')); 72 | $this->assertFalse($x->parse('text')); 73 | $this->assertFalse($x->parse('text')); 74 | 75 | //these strings test should fail: 76 | $this->assertFalse($x->parse('')); 77 | $this->assertFalse($x->parse('')); 78 | $this->assertFalse($x->parse('')); 79 | $this->assertFalse($x->parse('')); 80 | 81 | //check parsing result 82 | $r = $x->parse('qwert')->findAll('xmlTag', true); 83 | $this->assertEquals('qwert', (string)$r[0]); 84 | $this->assertEquals('w', (string)$r[1]); 85 | $this->assertEquals('rt', (string)$r[2]); 86 | $this->assertEquals('', (string)$r[3]); 87 | } 88 | 89 | public function testMadGrammar() 90 | { 91 | $x = new Parser('start :=> content. 92 | content :=> spectext content 93 | :=> /./ content 94 | :=> "". 95 | spectext :=> text3 content text3 96 | :=> text2 content text2 97 | :=> text1 content text1 . 98 | text1 :=> /./ . 99 | text2 :=> /.{2}/ . 100 | text3 :=> /.{3}/ .'); 101 | 102 | $r = $x->parse('abab')->findAll('spectext'); 103 | $this->assertEquals('abab', (string)$r[0]); 104 | $this->assertCount(1, $r); 105 | 106 | $r = $x->parse('abac')->findAll('spectext'); 107 | $this->assertEquals('aba', (string)$r[0]); 108 | $this->assertCount(1, $r); 109 | 110 | $r = $x->parse('abababa')->findAll('spectext'); 111 | $this->assertEquals('abababa', (string)$r[0]); 112 | $this->assertCount(1, $r); 113 | 114 | return 0; 115 | //this test will fail 116 | $r = $x->parse('abbbaabab')->findAll('spectext', true); 117 | $this->assertEquals('abbbaabab', (string)$r[0]); 118 | $this->assertEquals('bbaab', (string)$r[1]); 119 | $this->assertEquals('aa', (string)$r[2]); 120 | $this->assertCount(3, $r); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /tests/Extension/SeriesTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 17 | } 18 | 19 | public function testPWithoutSeparator() 20 | { 21 | $x = new Parser('start :=> str+. 22 | str :=> "a" 23 | :=> "b" 24 | :=> "c".'); 25 | 26 | 27 | $this->assertFalse($x->parse('ax')); 28 | $this->assertObject($x->parse('a')); 29 | $this->assertObject($x->parse('b')); 30 | $this->assertObject($x->parse('abacab')); 31 | $this->assertFalse($x->parse('')); 32 | 33 | $this->assertEquals(new Root('start', 0, [ 34 | new Series('list', 'str', [ 35 | new Branch('str', 0, 36 | [new Leaf('a')]), 37 | new Branch('str', 1, 38 | [new Leaf('b')]), 39 | new Branch('str', 0, 40 | [new Leaf('a')]), 41 | new Branch('str', 2, 42 | [new Leaf('c')]), 43 | new Branch('str', 0, 44 | [new Leaf('a')]), 45 | ], false), 46 | ]), $x->parse("abaca")); 47 | } 48 | 49 | public function testMWithoutSeparator() 50 | { 51 | $x = new Parser('start :=> str*. 52 | str :=> "a" 53 | :=> "b" 54 | :=> "c".'); 55 | 56 | 57 | $this->assertFalse($x->parse('ax')); 58 | $this->assertObject($x->parse('a')); 59 | $this->assertObject($x->parse('b')); 60 | $this->assertObject($x->parse('abacab')); 61 | 62 | $this->assertEquals(new Root('start', 0, [ 63 | new Series('list', 'str', [], false), 64 | ]), $x->parse('')); 65 | } 66 | 67 | public function testWithSeparator() 68 | { 69 | $x = new Parser('start :=> str+coma. 70 | coma :=> ",". 71 | str :=> "a" 72 | :=> "b" 73 | :=> "c".'); 74 | 75 | $this->assertFalse($x->parse('a,')); 76 | $this->assertObject($x->parse('a')); 77 | $this->assertObject($x->parse('b,a')); 78 | $this->assertFalse($x->parse('')); 79 | 80 | $this->assertEquals(new Root('start', 0, [ 81 | new Series('list', 'str', [ 82 | new Branch('str', 0, 83 | [new Leaf('a')]), 84 | new Branch('coma', 0, 85 | [new Leaf(',')]), 86 | new Branch('str', 2, 87 | [new Leaf('c')]), 88 | new Branch('coma', 0, 89 | [new Leaf(',')]), 90 | new Branch('str', 1, 91 | [new Leaf('b')]), 92 | ], true), 93 | ]), $x->parse("a,c,b")); 94 | } 95 | 96 | public function testWithVariousTypes() 97 | { 98 | $x = new Parser('start :=> /[abc]/+",".'); 99 | 100 | $this->assertFalse($x->parse('a,')); 101 | $this->assertObject($x->parse('a')); 102 | $this->assertObject($x->parse('b,c')); 103 | $this->assertFalse($x->parse('')); 104 | 105 | $x = new Parser('start :=> /[abc]/*",".'); 106 | 107 | $this->assertFalse($x->parse('a,')); 108 | $this->assertObject($x->parse('a')); 109 | $this->assertObject($x->parse('b,c')); 110 | $this->assertObject($x->parse('')); 111 | 112 | $x = new Parser('start :=> 1..100+",".'); 113 | 114 | $this->assertObject($x->parse('12')); 115 | $this->assertObject($x->parse('1,42,6')); 116 | $this->assertFalse($x->parse('a')); 117 | $this->assertFalse($x->parse('')); 118 | } 119 | 120 | public function testSpacing() 121 | { 122 | /* this parses letters separated by coma 123 | * $x = new Parser('start :=> /[abc]/+",".'); 124 | * 125 | * but this should parse letters ended with coma 126 | * $x = new Parser('start :=> /[abc]/+ ",".'); 127 | */ 128 | 129 | $x = new Parser('start :=> /[abc]/+ ",".'); 130 | 131 | $this->assertObject($x->parse('a,')); 132 | $this->assertFalse($x->parse('a')); 133 | $this->assertFalse($x->parse('b,c')); 134 | $this->assertObject($x->parse('abcc,')); 135 | } 136 | 137 | public function testGreed() 138 | { 139 | // by default series are not greedy but if we repeat series sign 140 | // series become greedy 141 | 142 | $x = new Parser('start :=> /./+ /.*/ .'); 143 | 144 | $this->assertEquals(new Root('start', 0, [ 145 | new Series('list', '/./', [ 146 | new Leaf('a'), 147 | ], false), 148 | new Leaf('bc'), 149 | ]), $x->parse("abc")); 150 | 151 | $x = new Parser('start :=> /./++ /.*/ .'); 152 | 153 | $this->assertEquals(new Root('start', 0, [ 154 | new Series('list', '/./', [ 155 | new Leaf('a'), 156 | new Leaf('b'), 157 | new Leaf('c'), 158 | ], false), 159 | new Leaf(''), 160 | ]), $x->parse("abc")); 161 | } 162 | 163 | public function testSurrounded() 164 | { 165 | $x = new Parser('start :=> num+"," /b/. 166 | num :=> /\d+/.'); 167 | 168 | $this->assertObject($x->parse('2,3b')); 169 | } 170 | 171 | public function testSeriesInPegAreAlwaysGreedy() 172 | { 173 | $x = new Parser('start :=> "a"+', ['defaultBranchType' => 'PEG']); 174 | 175 | $this->assertObject($x->parse('aaa')); 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /tests/Extension/LookaheadTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 15 | } 16 | 17 | public function testSimplePositive() 18 | { 19 | $x = new Parser('start :=> ?"abc" /.+/ .'); 20 | 21 | $this->assertFalse($x->parse('ax')); 22 | $this->assertFalse($x->parse('ab')); 23 | $this->assertFalse($x->parse('bcde')); 24 | $this->assertFalse($x->parse('cba')); 25 | $this->assertObject($x->parse('abc')); 26 | $this->assertObject($x->parse('abcbn')); 27 | } 28 | 29 | public function testSimpleNegative() 30 | { 31 | $x = new Parser('start :=> !"abc" /.+/ .'); 32 | 33 | $this->assertObject($x->parse('ax')); 34 | $this->assertObject($x->parse('ab')); 35 | $this->assertObject($x->parse('bcde')); 36 | $this->assertObject($x->parse('cba')); 37 | $this->assertFalse($x->parse('abc')); 38 | $this->assertFalse($x->parse('abcbn')); 39 | } 40 | 41 | public function testSimplePositiveAfter() 42 | { 43 | $x = new Parser('start :=> x /.*/ . 44 | x :=> /.{3}/ !"a" .'); 45 | 46 | $this->assertObject($x->parse('axcd')); 47 | $this->assertObject($x->parse('aaadaa')); 48 | $this->assertObject($x->parse('abc')); 49 | $this->assertFalse($x->parse('abca')); 50 | $this->assertFalse($x->parse('abcabn')); 51 | } 52 | 53 | public function testWithRegex() 54 | { 55 | $x = new Parser('start :=> !/.{3}/ /.+/ .'); 56 | 57 | $this->assertObject($x->parse('ax')); 58 | $this->assertObject($x->parse('ab')); 59 | $this->assertFalse($x->parse('bcde')); 60 | $this->assertFalse($x->parse('abcde')); 61 | $this->assertFalse($x->parse('cba')); 62 | } 63 | 64 | public function testWithChoice() 65 | { 66 | $x = new Parser('start :=> ?("abc" | "cba") /.+/ .'); 67 | 68 | $this->assertFalse($x->parse('ax')); 69 | $this->assertFalse($x->parse('ab')); 70 | $this->assertFalse($x->parse('bcde')); 71 | $this->assertObject($x->parse('abcde')); 72 | $this->assertObject($x->parse('cbax')); 73 | } 74 | 75 | public function testLookaroundDontProduceToken() 76 | { 77 | $x = new Parser('start :=> ?/[abc]/ /.+/ .'); 78 | 79 | $this->assertEquals(new Root('start', 0, [ 80 | new Leaf('abc'), 81 | ]), $x->parse("abc")); 82 | 83 | $x = new Parser('start :=> ?abc /.+/ . 84 | abc :=> a b c. 85 | a :=> "a". 86 | b :=> "b". 87 | c :=> "c".'); 88 | 89 | $this->assertEquals(new Root('start', 0, [ 90 | new Leaf('abc'), 91 | ]), $x->parse("abc")); 92 | } 93 | 94 | public function testInsideChoice() 95 | { 96 | $x = new Parser('start :=> (?"abc" /./| "ab") /.*/ .'); 97 | 98 | $this->assertFalse($x->parse('acd')); 99 | $this->assertFalse($x->parse('')); 100 | 101 | $this->assertEquals(new Root('start', 0, [ 102 | new Leaf('ab'), 103 | new Leaf('de'), 104 | ]), $x->parse("abde")); 105 | 106 | $this->assertEquals(new Root('start', 0, [ 107 | new Leaf('a'), 108 | new Leaf('bce'), 109 | ]), $x->parse("abce")); 110 | 111 | $x = new Parser('start :=> (?"abc" | "bc") /.*/ .'); 112 | 113 | $this->assertEquals(new Root('start', 0, [ 114 | new Leaf(''), 115 | new Leaf('abce'), 116 | ]), $x->parse("abce")); 117 | } 118 | 119 | public function testAnBnCnGrammar() 120 | { 121 | $x = new Parser('start :=> ?(A "c") "a"++ B. 122 | A :=> "a" A? "b". 123 | B :=> "b" B? "c".'); 124 | 125 | $this->assertObject($x->parse('abc')); 126 | $this->assertObject($x->parse('aabbcc')); 127 | $this->assertObject($x->parse('aaabbbccc')); 128 | 129 | $this->assertFalse($x->parse('aabb')); 130 | $this->assertFalse($x->parse('aacc')); 131 | $this->assertFalse($x->parse('bbcc')); 132 | 133 | $this->assertFalse($x->parse('aabbc')); 134 | $this->assertFalse($x->parse('aabcc')); 135 | $this->assertFalse($x->parse('abbcc')); 136 | 137 | $this->assertFalse($x->parse('aabbccc')); 138 | $this->assertFalse($x->parse('aabbbcc')); 139 | $this->assertFalse($x->parse('aaabbcc')); 140 | } 141 | 142 | public function testBugNoBacktracking() 143 | { 144 | $x = new Parser('start :=> text ?"c" text.'); 145 | 146 | $this->assertObject($x->parse('abcd')); 147 | } 148 | 149 | public function testErrorTrack() 150 | { 151 | $x = new Parser('start :=> "q" ?/.b/ "a".'); 152 | 153 | $this->assertFalse($x->parse('qa')); 154 | $e = $x->getException(); 155 | $this->assertEquals(1, $e->getIndex()); 156 | $this->assertEquals('?/.b/ "a"', implode(' ', $e->getExpected())); 157 | 158 | $this->assertFalse($x->parse('qcb')); 159 | $e = $x->getException(); 160 | $this->assertEquals(1, $e->getIndex()); 161 | $this->assertEquals('?/.b/ "a"', implode(' ', $e->getExpected())); 162 | 163 | $x = new Parser('start :=> "q" !/[ab]/ /[bc]/.'); 164 | 165 | $this->assertFalse($x->parse('qa')); 166 | $e = $x->getException(); 167 | $this->assertEquals(1, $e->getIndex()); 168 | $this->assertEquals('!/[ab]/ /[bc]/', implode(' ', $e->getExpected())); 169 | 170 | $this->assertFalse($x->parse('qb')); 171 | $e = $x->getException(); 172 | $this->assertEquals(1, $e->getIndex()); 173 | $this->assertEquals('!/[ab]/ /[bc]/', implode(' ', $e->getExpected())); 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /tests/Util/RegexTest.php: -------------------------------------------------------------------------------- 1 | assertFalse($x->canBeEmpty('/a/')); 14 | $this->assertFalse($x->canBeEmpty('/\s/')); 15 | $this->assertTrue($x->canBeEmpty('/$/')); 16 | $this->assertFalse($x->canBeEmpty('/aab/')); 17 | $this->assertFalse($x->canBeEmpty('/./')); 18 | $this->assertFalse($x->canBeEmpty('/[abc]/')); 19 | $this->assertFalse($x->canBeEmpty('/[^abc]/')); 20 | $this->assertTrue($x->canBeEmpty('/a*/')); 21 | $this->assertTrue($x->canBeEmpty('/a?/')); 22 | $this->assertFalse($x->canBeEmpty('/a+/')); 23 | $this->assertTrue($x->canBeEmpty('/[a]*/')); 24 | $this->assertFalse($x->canBeEmpty('/[a]+/')); 25 | $this->assertFalse($x->canBeEmpty('/a*b/')); 26 | $this->assertTrue($x->canBeEmpty('/a*b*/')); 27 | $this->assertFalse($x->canBeEmpty('/a*b*c/')); 28 | $this->assertFalse($x->canBeEmpty('/a{3}/')); 29 | $this->assertFalse($x->canBeEmpty('/a{3,9}/')); 30 | $this->assertTrue($x->canBeEmpty('/a{0,3}/')); 31 | $this->assertFalse($x->canBeEmpty('/a+?/')); 32 | $this->assertTrue($x->canBeEmpty('/a*?/')); 33 | $this->assertFalse($x->canBeEmpty('/a|b/')); 34 | $this->assertTrue($x->canBeEmpty('/a|/')); 35 | $this->assertTrue($x->canBeEmpty('/a|a*/')); 36 | $this->assertTrue($x->canBeEmpty('/a*|b*/')); 37 | $this->assertFalse($x->canBeEmpty('/(a|b)/')); 38 | $this->assertTrue($x->canBeEmpty('/(a|)/')); 39 | $this->assertTrue($x->canBeEmpty('/(a|a*)/')); 40 | $this->assertTrue($x->canBeEmpty('/(a*|b*)/')); 41 | $this->assertTrue($x->canBeEmpty('/(a|a*)b*/')); 42 | $this->assertFalse($x->canBeEmpty('/(a|a*)b/')); 43 | $this->assertTrue($x->canBeEmpty('/(a|b)?/')); 44 | $this->assertFalse($x->canBeEmpty('/(a|b)+/')); 45 | $this->assertTrue($x->canBeEmpty('/(a|$)/')); 46 | $this->assertTrue($x->canBeEmpty('/(a|^)+/')); 47 | } 48 | 49 | protected function assertCanStart($chars, $regex) 50 | { 51 | $assocCharacters = []; 52 | foreach ($chars as $char) { 53 | $assocCharacters[$char] = true; 54 | } 55 | 56 | $this->assertEquals($assocCharacters, Regex::getInstance()->getStartCharacters($regex)); 57 | } 58 | 59 | public function testGetStartCharacters() 60 | { 61 | $this->assertCanStart(['a'], '/a/'); 62 | $this->assertCanStart(['a'], '/ab/'); 63 | $this->assertCanStart(['a'], '/a+b/'); 64 | $this->assertCanStart(['a', 'b'], '/a*b/'); 65 | $this->assertCanStart(['a', 'b'], '/a?b/'); 66 | $this->assertCanStart(['a'], '/a+?b/'); 67 | $this->assertCanStart(['a', 'b'], '/(a|b)/'); 68 | $this->assertCanStart(['a', 'c'], '/(ab|c)/'); 69 | $this->assertCanStart(['a', 'b', 'c', 'd', 'e'], '/a?b?c?d?efg/'); 70 | $this->assertCanStart(['a', 'b', 'c'], '/(a|b?)c/'); 71 | $this->assertCanStart(['a', 'b', 'c'], '/(a|b)?c/'); 72 | $this->assertCanStart(['a', 'b'], '/a{0,3}b/'); 73 | $this->assertCanStart(['a'], '/a{1,3}b/'); 74 | $this->assertCanStart(['a', 'b', 'c'], '/[abc]/'); 75 | $this->assertCanStart(['a', 'b', 'c', 'd'], '/[a-d]/'); 76 | $this->assertCanStart(['h', 'a', 'b', 'c'], '/[ha-c]/'); 77 | $this->assertCanStart(['-', 'a', 'b', 'c'], '/[-a-c]/'); 78 | $this->assertCanStart(['a', 'b', 'c', '1', '2', '3'], '/[a-c1-3]/'); 79 | $this->assertCanStart(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], '/\\d/'); 80 | $this->assertCanStart(['['], '/\\[/'); 81 | $this->assertCanStart(["\n", "\r", " ", "\t"], '/\\s/'); 82 | $this->assertCanStart(["\n", "\r", " ", "\t", "j"], '/[\\sj]/'); 83 | $this->assertEquals(255, 84 | count(Regex::getInstance()->getStartCharacters('/./'))); //256 - 1 cause \n is out 85 | } 86 | 87 | protected function checkStringGenerate($regex, $results, $maxLength = 10) 88 | { 89 | $x = Regex::getInstance(); 90 | $matches = 0; 91 | for ($i = 0; $i < 1000; $i++) { 92 | $s = $x->generateString($regex); 93 | if (strlen($s) <= $maxLength) { 94 | if (in_array($s, $results)) { 95 | if (++$matches > 10) { 96 | $this->assertTrue(true); 97 | return true; 98 | } 99 | } else { 100 | $this->assertFalse(true); 101 | } 102 | } 103 | } 104 | 105 | $this->assertFalse(true); 106 | } 107 | 108 | public function testRegexGenarateString() 109 | { 110 | srand(10); 111 | $this->checkStringGenerate('/a/', ['a']); 112 | $this->checkStringGenerate('/asd/', ['asd']); 113 | $this->checkStringGenerate('/(a|b|c)/', ['a', 'b', 'c']); 114 | $this->checkStringGenerate('/a?/', ['a', '']); 115 | $this->checkStringGenerate('/a?b?/', ['a', 'b', 'ab', '']); 116 | $this->checkStringGenerate('/(a|b|c)?/', ['a', 'b', 'c', '']); 117 | $this->checkStringGenerate('/a*/', ['', 'a', 'aa', 'aaa'], 3); 118 | $this->checkStringGenerate('/a+/', ['a', 'aa', 'aaa'], 3); 119 | $this->checkStringGenerate('/a{2}/', ['aa']); 120 | $this->checkStringGenerate('/a{2,3}/', ['aa', 'aaa']); 121 | $this->checkStringGenerate('/(a|(c|de)?f)/', ['a', 'cf', 'def', 'f']); 122 | $this->checkStringGenerate('/[abc]/', ['a', 'b', 'c']); 123 | $this->checkStringGenerate('/[a-dz]/', ['a', 'b', 'c', 'd', 'z']); 124 | $this->checkStringGenerate('/\d/', ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']); 125 | } 126 | 127 | public function testBuildStringFromRegex() 128 | { 129 | $this->assertEquals('abc', Regex::buildStringFromRegex('/abc/')); 130 | $this->assertEquals('qwe', Regex::buildStringFromRegex('/qwe/im')); 131 | $this->assertEquals('a/bc[de(fg', Regex::buildStringFromRegex('/a\\/bc\\[de\\(fg/')); 132 | $this->assertEquals(null, Regex::buildStringFromRegex('/a\\/bc[de]fg/')); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /tests/Extension/ParametrizedNodeTest.php: -------------------------------------------------------------------------------- 1 | assertTrue(is_object($a)); 13 | } 14 | 15 | public function testBasic() 16 | { 17 | $x = new Parser('start :=> test<"x">. 18 | test :=> "y" paramtest.'); 19 | 20 | $this->assertObject($x->parse("yx")); 21 | $this->assertFalse($x->parse("yy")); 22 | } 23 | 24 | public function testTwoBranches() 25 | { 26 | $x = new Parser('start :=> test<"x"> 27 | :=> test<"o">. 28 | test :=> "a"*separator.'); 29 | 30 | $this->assertObject($x->parse("a")); 31 | $this->assertObject($x->parse("axa")); 32 | $this->assertObject($x->parse("aoa")); 33 | $this->assertObject($x->parse("axaxa")); 34 | $this->assertObject($x->parse("aoaoa")); 35 | $this->assertFalse($x->parse("axaoa")); 36 | $this->assertFalse($x->parse("aoaxa")); 37 | } 38 | 39 | public function testLotOfParams1() 40 | { 41 | $x = new Parser('start :=> test<"x", "y", "z", "v">. 42 | test :=> x y z v (z y x)?.'); 43 | 44 | $this->assertObject($x->parse("xyzv")); 45 | $this->assertObject($x->parse("xyzvzyx")); 46 | $this->assertFalse($x->parse("xyzvzyv")); 47 | $this->assertFalse($x->parse("xyzvzy")); 48 | $this->assertFalse($x->parse("xyz")); 49 | } 50 | 51 | public function testNestedParams() 52 | { 53 | $x = new Parser('start :=> test1<"x", "y">. 54 | test1 :=> test2. 55 | test2 :=> "a" test3. 56 | test3 :=> "b" x y.'); 57 | 58 | $this->assertObject($x->parse("abyx")); 59 | $this->assertFalse($x->parse("abxy")); 60 | } 61 | 62 | public function testRecursion() 63 | { 64 | $x = new Parser('start :=> list<"", "x">. 65 | list :=> ?x list<(x e), e> 66 | :=> x+",".'); 67 | 68 | $this->assertObject($x->parse("xx,xx")); 69 | $this->assertObject($x->parse("xxx")); 70 | $this->assertObject($x->parse(",,,")); 71 | $this->assertObject($x->parse("xx,xx")); 72 | $this->assertObject($x->parse("xxx,xxx")); 73 | $this->assertObject($x->parse("xx,xx,xx")); 74 | $this->assertFalse($x->parse("xx,xx,xxx")); 75 | $this->assertFalse($x->parse("xxx,xx,xx")); 76 | $this->assertFalse($x->parse("xx,xx,x")); 77 | $this->assertFalse($x->parse("x,xx,xx")); 78 | $this->assertFalse($x->parse("xxx,xx,xxxx")); 79 | } 80 | 81 | public function testLotOfParams() 82 | { 83 | $x = new Parser('start :=> test<"","x","","y","","z">. 84 | test :=> ?xs test<(xs x),x,(ys y),y,(zs z),z> 85 | :=> xs ys zs.'); 86 | 87 | $this->assertObject($x->parse("xyz")); 88 | $this->assertObject($x->parse("xxyyzz")); 89 | $this->assertObject($x->parse("xxxyyyzzz")); 90 | $this->assertObject($x->parse("xxxxyyyyzzzz")); 91 | $this->assertFalse($x->parse("xy")); 92 | $this->assertFalse($x->parse("xxyz")); 93 | $this->assertFalse($x->parse("xxxyyyzz")); 94 | $this->assertFalse($x->parse("xxxyyyzzzz")); 95 | $this->assertFalse($x->parse("xxxxyyyzzz")); 96 | $this->assertFalse($x->parse("xxyyyzzz")); 97 | } 98 | 99 | public function testParametrizedInParams() 100 | { 101 | $x = new Parser('start :=> list<"[", "]", list<"{", "}", list<"[", "]", /[a-z]+/, ";">, ",">, ",">. 102 | list :=> start elem*separator stop.'); 103 | 104 | $this->assertObject($x->parse('[]')); 105 | $this->assertObject($x->parse('[{}]')); 106 | $this->assertObject($x->parse('[{[]}]')); 107 | $this->assertObject($x->parse('[{[a;b],[b]},{[nmn]}]')); 108 | $this->assertFalse($x->parse('[[[]]]')); 109 | $this->assertFalse($x->parse('[{[a,b]}]')); 110 | $this->assertFalse($x->parse('[{[a;b];[a]}]')); 111 | } 112 | 113 | public function testCounter() 114 | { 115 | //n in binary followed by ":" and n characters 116 | $x = new Parser('start :=> counter<":", char>. 117 | counter :=> _counter. 118 | _counter :=> "1" _counter 119 | :=> "0" _counter 120 | :=> separator elems. 121 | char :=> /./ . 122 | '); 123 | 124 | $this->assertObject($x->parse("101:12345")); 125 | $this->assertFalse($x->parse("101:1234")); 126 | $this->assertFalse($x->parse("101:123456")); 127 | 128 | $this->assertObject($x->parse("110:123456")); 129 | $this->assertFalse($x->parse("110:12345")); 130 | $this->assertFalse($x->parse("110:1234567")); 131 | 132 | $this->assertObject($x->parse("10111:12345678901234567890123")); 133 | $this->assertFalse($x->parse("10111:1234567890123456789012")); 134 | $this->assertFalse($x->parse("10111:123456789012345678901234")); 135 | } 136 | 137 | public function testCounter2() 138 | { 139 | $x = new Parser('start :=> counter+. 140 | counter :=> ?elems counter<(elems elems char),(num "1")> 141 | :=> ?elems counter<(elems elems),(num "0")> 142 | :=> elems num. 143 | char :=> /./ . 144 | '); 145 | 146 | $toList = function ($str) use ($x) { 147 | $parsed = $x->parse($str); 148 | if (!$parsed) { 149 | return false; 150 | } 151 | 152 | return array_map('strval', $parsed->getSubnode(0)->getMainNodes()); 153 | }; 154 | 155 | $this->assertEquals(["11", "11"], $toList("1111")); 156 | $this->assertEquals(["1010", "11"], $toList("101011")); 157 | $this->assertEquals(["10110101", "0010"], $toList("101101010010")); 158 | $this->assertEquals(["1001001111", "11011", "1011100"], $toList("1001001111110111011100")); 159 | $this->assertEquals(["11111101111010"], $toList("11111101111010")); 160 | $this->assertEquals(["1111110111", "1110"], $toList("11111101111110")); 161 | 162 | $this->assertFalse($x->parse("0000000000000")); 163 | $this->assertFalse($x->parse("1001001")); 164 | $this->assertFalse($x->parse("10110111")); 165 | } 166 | } 167 | --------------------------------------------------------------------------------