├── .gitmodules ├── README.md ├── astir ├── .gitignore ├── CMakeLists.txt ├── CharType.h ├── CppGenerationVisitor.cpp ├── CppGenerationVisitor.h ├── CppLLkParserGenerator.cpp ├── CppLLkParserGenerator.h ├── CppNFAGenerationHelper.cpp ├── CppNFAGenerationHelper.h ├── Exception.h ├── Field.cpp ├── Field.h ├── FileLocation.h ├── FiniteAutomatonDefinition.cpp ├── FiniteAutomatonDefinition.h ├── GenerationException.cpp ├── GenerationException.h ├── GenerationHelper.cpp ├── GenerationHelper.h ├── GenerationVisitor.h ├── Grammar.astir ├── IActing.h ├── IFileLocalizable.h ├── IGenerationVisitable.h ├── ILLkBuilding.h ├── ILLkFirstable.h ├── ILLkParserGenerable.h ├── INFABuildable.h ├── IReferencing.cpp ├── IReferencing.h ├── ISemanticEntity.h ├── ISyntacticEntity.cpp ├── ISyntacticEntity.h ├── IndentedStringStream.cpp ├── IndentedStringStream.h ├── LLkBuilder.cpp ├── LLkBuilder.h ├── LLkFirster.cpp ├── LLkFirster.h ├── LLkParserDefinition.cpp ├── LLkParserDefinition.h ├── LLkParserGenerator.h ├── LexicalAnalysisException.h ├── LexicalAnalyzer.cpp ├── LexicalAnalyzer.h ├── MachineDefinition.cpp ├── MachineDefinition.h ├── MachineStatement.cpp ├── MachineStatement.h ├── NFA.cpp ├── NFA.h ├── NFAAction.cpp ├── NFAAction.h ├── NFABuilder.cpp ├── NFABuilder.h ├── Regex.cpp ├── Regex.h ├── RegexAction.h ├── Resources │ ├── Exception.h │ ├── Location.cpp │ ├── Location.h │ ├── Machine.h │ ├── Parser.h │ ├── Production.h │ ├── ProductionStream.h │ ├── RawStream.cpp │ ├── RawStream.h │ ├── SpecimenFiniteAutomaton.scpp │ ├── SpecimenFiniteAutomaton.sh │ ├── SpecimenLLkParser.scpp │ ├── SpecimenLLkParser.sh │ └── Terminal.h ├── SemanticAnalysisException.cpp ├── SemanticAnalysisException.h ├── SymbolGroup.cpp ├── SymbolGroup.h ├── SyntacticAnalysisException.h ├── SyntacticAnalyzer.cpp ├── SyntacticAnalyzer.h ├── SyntacticTree.cpp ├── SyntacticTree.h ├── TestingSwitch.h ├── Tests │ ├── Hello Binary │ │ ├── BinaryRecognizer.astir │ │ ├── BinaryRecognizerMain.cpp │ │ ├── BinaryRecognizerRefactored.astir │ │ ├── BinaryTokenizer.astir │ │ └── BinaryTokenizerMain.cpp │ ├── Test01 │ │ ├── Test01.astir │ │ ├── Test01.sln │ │ ├── Test01.vcxproj │ │ ├── Test01.vcxproj.filters │ │ ├── Test01.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test02 │ │ ├── Test02.astir │ │ ├── Test02.sln │ │ ├── Test02.vcxproj │ │ ├── Test02.vcxproj.filters │ │ ├── Test02.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test03 │ │ ├── Test03.astir │ │ ├── Test03.sln │ │ ├── Test03.vcxproj │ │ ├── Test03.vcxproj.filters │ │ ├── Test03.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test04 │ │ ├── Test04.astir │ │ ├── Test04.sln │ │ ├── Test04.vcxproj │ │ ├── Test04.vcxproj.filters │ │ ├── Test04.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test05 │ │ ├── Test05.astir │ │ ├── Test05.sln │ │ ├── Test05.vcxproj │ │ ├── Test05.vcxproj.filters │ │ ├── Test05.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test06 │ │ ├── Test06.astir │ │ ├── Test06.sln │ │ ├── Test06.vcxproj │ │ ├── Test06.vcxproj.filters │ │ ├── Test06.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test07 │ │ ├── Test07.astir │ │ ├── Test07.sln │ │ ├── Test07.vcxproj │ │ ├── Test07.vcxproj.filters │ │ ├── Test07.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test08 │ │ ├── Test08.astir │ │ ├── Test08.sln │ │ ├── Test08.vcxproj │ │ ├── Test08.vcxproj.filters │ │ ├── Test08.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test09 │ │ ├── Test09.astir │ │ ├── Test09.sln │ │ ├── Test09.vcxproj │ │ ├── Test09.vcxproj.filters │ │ ├── Test09.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test10 │ │ ├── Test10.astir │ │ ├── Test10.sln │ │ ├── Test10.vcxproj │ │ ├── Test10.vcxproj.filters │ │ ├── Test10.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test11 │ │ ├── Test11.astir │ │ ├── Test11.sln │ │ ├── Test11.vcxproj │ │ ├── Test11.vcxproj.filters │ │ ├── Test11.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test12 │ │ ├── Test12.astir │ │ ├── Test12.sln │ │ ├── Test12.vcxproj │ │ ├── Test12.vcxproj.filters │ │ ├── Test12.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ ├── Test13 │ │ ├── Test13.astir │ │ ├── Test13.sln │ │ ├── Test13.vcxproj │ │ ├── Test13.vcxproj.filters │ │ ├── Test13.vcxproj.user │ │ ├── input.txt │ │ └── main.cpp │ └── Uncategorized │ │ ├── C99Tokenizer.astir │ │ ├── ExampleParser.apar │ │ └── ExampleTokenizer.astir ├── Token.cpp ├── Token.h ├── astir.sln ├── astir.vcxproj ├── astir.vcxproj.filters ├── astir.vcxproj.user └── main.cpp ├── docs ├── .nojekyll ├── _coverpage.md ├── _media │ ├── tornado-1.png │ ├── tornado-2.0.png │ ├── tornado-2.1.png │ ├── tornado-2.2.png │ ├── tornado-2.3.png │ └── tornado-2.4.png ├── _sidebar.md ├── about.md ├── command-line_interface.md ├── generation.md ├── hello_binary.md ├── index.html ├── language.md ├── output_interface.md ├── setting_up.md ├── something_context-free.md └── something_regular.md └── releases ├── astir-v1.0.0-Winx64.zip └── astir-v1.0.0-Winx86.zip /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "astir/dimcli"] 2 | path = astir/DimCli 3 | url = https://github.com/gknowles/dimcli 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Astir 2 | Astir is a flexible cross-platform parser generator producing output from object-oriented hierarchical context-free grammar specfications. 3 | 4 | Full documentation including a Getting started guide, language reference, generation reference, and output interface reference can be found [here](http://astir.dev). 5 | -------------------------------------------------------------------------------- /astir/.gitignore: -------------------------------------------------------------------------------- 1 | .vs 2 | ./TestingSwitch.h 3 | Build 4 | Tests/*/.vs 5 | Tests/*/Output -------------------------------------------------------------------------------- /astir/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.15) 2 | 3 | project ("Astir" VERSION 1.0.0 LANGUAGES CXX) 4 | 5 | # C++ standard 6 | set (CMAKE_CXX_STANDARD_REQUIRED ON) 7 | set (CMAKE_CXX_STANDARD 17) 8 | 9 | # To Check if we need filesystem 10 | include(CheckCXXSymbolExists) 11 | CHECK_CXX_SYMBOL_EXISTS(std::filesystem::path::preferred_separator filesystem cxx17fs) 12 | 13 | if(NOT cxx17fs) 14 | target_link_libraries("Astir" PRIVATE stdc++fs) 15 | endif() 16 | 17 | # Add source to this project's executable. 18 | add_executable ("astir" 19 | "CharType.h" 20 | "CppGenerationVisitor.cpp" 21 | "CppGenerationVisitor.h" 22 | "CppLLkParserGenerator.cpp" 23 | "CppLLkParserGenerator.h" 24 | "CppNFAGenerationHelper.cpp" 25 | "CppNFAGenerationHelper.h" 26 | "DimCli/libs/dimcli/cli.h" 27 | "DimCli/libs/dimcli/cli.cpp" 28 | "Exception.h" 29 | "Field.cpp" 30 | "Field.h" 31 | "FileLocation.h" 32 | "FiniteAutomatonDefinition.cpp" 33 | "FiniteAutomatonDefinition.h" 34 | "GenerationException.cpp" 35 | "GenerationException.h" 36 | "GenerationHelper.cpp" 37 | "GenerationHelper.h" 38 | "GenerationVisitor.h" 39 | "IActing.h" 40 | "IFileLocalizable.h" 41 | "ILLkBuilding.h" 42 | "ILLkFirstable.h" 43 | "ILLkParserGenerable.h" 44 | "IndentedStringStream.cpp" 45 | "IndentedStringStream.h" 46 | "INFABuildable.h" 47 | "IReferencing.cpp" 48 | "IReferencing.h" 49 | "ISemanticEntity.h" 50 | "ISyntacticEntity.cpp" 51 | "ISyntacticEntity.h" 52 | "LexicalAnalysisException.h" 53 | "LexicalAnalyzer.cpp" 54 | "LexicalAnalyzer.h" 55 | "LLkBuilder.cpp" 56 | "LLkBuilder.h" 57 | "LLkFirster.cpp" 58 | "LLkFirster.h" 59 | "LLkParserDefinition.cpp" 60 | "LLkParserDefinition.h" 61 | "LLkParserGenerator.h" 62 | "MachineDefinition.cpp" 63 | "MachineDefinition.h" 64 | "MachineStatement.cpp" 65 | "MachineStatement.h" 66 | "main.cpp" 67 | "NFA.cpp" 68 | "NFA.h" 69 | "NFAAction.cpp" 70 | "NFAAction.h" 71 | "NFABuilder.cpp" 72 | "NFABuilder.h" 73 | "Regex.cpp" 74 | "Regex.h" 75 | "RegexAction.h" 76 | "SemanticAnalysisException.cpp" 77 | "SemanticAnalysisException.h" 78 | "SymbolGroup.cpp" 79 | "SymbolGroup.h" 80 | "SyntacticAnalysisException.h" 81 | "SyntacticAnalyzer.cpp" 82 | "SyntacticAnalyzer.h" 83 | "SyntacticTree.cpp" 84 | "SyntacticTree.h" 85 | "TestingSwitch.h" 86 | "Token.cpp" 87 | "Token.h" 88 | ) 89 | -------------------------------------------------------------------------------- /astir/CharType.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | using CharType = unsigned char; 4 | using ComputationCharType = signed short int; -------------------------------------------------------------------------------- /astir/CppGenerationVisitor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "GenerationVisitor.h" 7 | #include "FiniteAutomatonDefinition.h" 8 | 9 | class CppGenerationVisitor : public GenerationVisitor { 10 | public: 11 | CppGenerationVisitor(const std::string& folderPath) 12 | : GenerationVisitor(folderPath), m_hasIncludedRawStreamFiles(false) { } 13 | 14 | void setup() const override; 15 | 16 | void visit(const SyntacticTree* tree) override; 17 | void visit(const FiniteAutomatonDefinition* tree) override; 18 | void visit(const LLkParserDefinition* llkParserDefinition) override; 19 | 20 | void visit(const TypeFormingStatement* component) override; 21 | void visit(const FlagField* flagField) override; 22 | void visit(const RawField* rawField) override; 23 | void visit(const ItemField * itemField) override; 24 | void visit(const ListField * listField) override; 25 | 26 | private: 27 | void buildUniversalMachineMacros(std::map& macros, const MachineDefinition* machine); 28 | std::string combineForwardDeclarationsAndClear(); 29 | 30 | std::stringstream m_output; 31 | std::set m_typeFormingStatementsVisited; 32 | void resetOutput(); 33 | std::string outputAndReset(); 34 | 35 | bool m_hasIncludedRawStreamFiles; 36 | }; 37 | 38 | -------------------------------------------------------------------------------- /astir/CppLLkParserGenerator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "LLkParserGenerator.h" 4 | #include "IndentedStringStream.h" 5 | 6 | class CppLLkParserGenerator : public LLkParserGenerator { 7 | public: 8 | CppLLkParserGenerator(LLkBuilder& builder); 9 | 10 | void visitTypeFormingStatements(const std::list>& rootDisjunction) override; 11 | void visitRootDisjunction(const std::list>& typeFormingStatements) override; 12 | 13 | void visit(const CategoryStatement* category) override; 14 | void visit(const PatternStatement* rule) override; 15 | void visit(const ProductionStatement* rule) override; 16 | void visit(const RegexStatement* rule) override; 17 | 18 | void visit(const DisjunctiveRegex* regex) override; 19 | void visit(const ConjunctiveRegex* regex) override; 20 | 21 | void visit(const RepetitiveRegex* regex) override; 22 | 23 | void visit(const EmptyRegex* regex) override; 24 | void visit(const AnyRegex* regex) override; 25 | void visit(const ExceptAnyRegex* regex) override; 26 | void visit(const LiteralRegex* regex) override; 27 | void visit(const ArbitrarySymbolRegex* regex) override; 28 | void visit(const ReferenceRegex* regex) override; 29 | 30 | std::string parsingDeclarations() const; 31 | std::string parsingDefinitions() const { return m_output.str(); } 32 | 33 | private: 34 | void handleTypeFormingPreamble(const std::string& typeName); 35 | void handleTypeFormingPostamble(); 36 | void handleRuleBody(const RuleStatement* rule); 37 | std::string makeConditionTesting(const LLkDecisionPoint& dp, unsigned long depth = 0, bool needsUnpeeking = false) const; 38 | std::string makeCondition(const std::shared_ptr& sgPtr, std::string& postamble, unsigned long depth) const; 39 | 40 | std::string makeExpectationMessage(const LLkDecisionPoint& dp); 41 | std::string makeExpectationMessage(const std::vector& dps); 42 | std::string makeExpectationGrammar(const LLkDecisionPoint& dp); 43 | 44 | std::pair makeActionExecution(const std::list& actions) const; 45 | 46 | IndentedStringStream m_output; 47 | std::list m_declarations; 48 | }; -------------------------------------------------------------------------------- /astir/CppNFAGenerationHelper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "NFA.h" 6 | 7 | using ActionRegisterId = unsigned long; 8 | 9 | class CppNFAGenerationHelper { 10 | public: 11 | CppNFAGenerationHelper(const std::string& machineName, const NFA& fa, const size_t inputTerminalCount) 12 | : m_machineName(machineName), m_fa(fa), m_inputTerminalCount(inputTerminalCount) { } 13 | 14 | void generateMechanicsMaps(std::string& stateMap, std::string& actionRegisterDeclarations, std::string& actionRegisterDefinitions, std::string& transitionActionMap, std::string& stateActionMap) const; 15 | std::string generateContextDeclarations() const; 16 | std::string generateStateFinality() const; 17 | private: 18 | const std::string& m_machineName; 19 | const NFA& m_fa; 20 | const size_t m_inputTerminalCount; 21 | 22 | std::string generateActionRegisterDeclaration(ActionRegisterId registerId, const NFAActionRegister& nar) const; 23 | std::string generateActionRegisterDefinition(ActionRegisterId registerId, const NFAActionRegister& nar) const; 24 | std::string generateActionOperation(const NFAAction& na) const; 25 | }; 26 | 27 | -------------------------------------------------------------------------------- /astir/Exception.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class Exception : public std::runtime_error { 7 | public: 8 | Exception(const std::string& message) 9 | : std::runtime_error(message) {} 10 | virtual ~Exception() = default; 11 | }; 12 | -------------------------------------------------------------------------------- /astir/Field.cpp: -------------------------------------------------------------------------------- 1 | #include "Field.h" 2 | 3 | #include "GenerationVisitor.h" 4 | 5 | void FlagField::accept(GenerationVisitor* visitor) const { 6 | visitor->visit(this); 7 | } 8 | 9 | void RawField::accept(GenerationVisitor* visitor) const { 10 | visitor->visit(this); 11 | } 12 | 13 | void ItemField::accept(GenerationVisitor* visitor) const { 14 | visitor->visit(this); 15 | } 16 | 17 | void ListField::accept(GenerationVisitor* visitor) const { 18 | visitor->visit(this); 19 | } 20 | -------------------------------------------------------------------------------- /astir/Field.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ISemanticEntity.h" 4 | #include "IGenerationVisitable.h" 5 | 6 | struct Field : public ISyntacticEntity, public IGenerationVisitable { 7 | std::string name; 8 | }; 9 | 10 | struct FlagField : public Field { 11 | void accept(GenerationVisitor* visitor) const override; 12 | }; 13 | 14 | struct RawField : public Field { 15 | void accept(GenerationVisitor* visitor) const override; 16 | }; 17 | 18 | struct MachineDefinition; 19 | struct VariablyTypedField : public Field { 20 | std::string type; 21 | const MachineDefinition* machineOfTheType; 22 | 23 | VariablyTypedField() 24 | : type(), machineOfTheType(nullptr) { } 25 | }; 26 | 27 | struct ItemField : public VariablyTypedField { 28 | void accept(GenerationVisitor* visitor) const override; 29 | }; 30 | 31 | struct ListField : public VariablyTypedField { 32 | void accept(GenerationVisitor* visitor) const override; 33 | }; 34 | -------------------------------------------------------------------------------- /astir/FileLocation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | struct FileLocation { 6 | unsigned long line; 7 | unsigned long column; 8 | 9 | FileLocation() 10 | : line(0), column(0) { } 11 | 12 | FileLocation(unsigned long line, unsigned long column) 13 | : line(line), column(column) { } 14 | 15 | std::string toString() const { 16 | return std::to_string(line) + ":" + std::to_string(column); 17 | } 18 | }; -------------------------------------------------------------------------------- /astir/FiniteAutomatonDefinition.cpp: -------------------------------------------------------------------------------- 1 | #include "FiniteAutomatonDefinition.h" 2 | 3 | #include "SemanticAnalysisException.h" 4 | #include "NFABuilder.h" 5 | #include "GenerationVisitor.h" 6 | 7 | void FiniteAutomatonDefinition::initialize() { 8 | if (initialized()) { // really necessary 9 | return; 10 | } 11 | 12 | this->MachineDefinition::initialize(); 13 | 14 | if (!this->uses.empty()) { 15 | throw SemanticAnalysisException("The machine '" + name + "' declared at " + locationString() + " `uses` at least one other machine -- finite automata using other machines is not supported, but the finite automaton can still be `on` input from some other machine"); 16 | } 17 | 18 | // check for any signs of recursion 19 | for (const auto& statementPair : statements) { 20 | std::list relevantReferencesEncountered; 21 | auto recursiveReferenceLocalizableInstance = statementPair.second->findRecursiveReference(relevantReferencesEncountered); 22 | if (recursiveReferenceLocalizableInstance != nullptr) { 23 | std::string hierarchyPath = relevantReferencesEncountered.front()->referenceName(); 24 | // relevantReferencesEncountered.pop_front(); 25 | for (const auto& referenceEncountered : relevantReferencesEncountered) { 26 | hierarchyPath += "-" + referenceEncountered->referenceName(); 27 | } 28 | throw SemanticAnalysisException("Rule/category reference recursion found in the path " + hierarchyPath + "; start at " + statementPair.second->locationString() + ", end at " + recursiveReferenceLocalizableInstance->locationString() + " - no recursion is allowed in finite automata"); 29 | } 30 | } 31 | 32 | if (this->on.second) { 33 | if (!on.second->hasPurelyTerminalRoots()) { 34 | throw SemanticAnalysisException("The finite automaton '" + this->name + "' declared at " + this->locationString() + "' references the machine '" + this->on.first + "' that does not have purely terminal roots - such a machine can not serve as input for a finite automaton, and '" + this->name + "' is no exception"); 35 | } 36 | } 37 | 38 | NFA base; 39 | NFABuilder builder(*this, nullptr, "m_token"); 40 | auto typeFormingStatement = this->getTypeFormingStatements(); 41 | for (const auto& typeFormingStatement : typeFormingStatement) { 42 | if (typeFormingStatement->rootness == Rootness::Unspecified) { 43 | continue; 44 | } 45 | 46 | const std::string& newSubcontextName = typeFormingStatement->name; 47 | std::shared_ptr componentCastPtr = std::dynamic_pointer_cast(typeFormingStatement); 48 | NFA alternativeNfa = componentCastPtr->accept(builder); 49 | 50 | NFAActionRegister elevateContextActionRegister; 51 | // if the component is type-forming, a new context has been created in alternativeNfa and it needs to be elevated to the category level 52 | // but, if it is also terminal, we need to associate the raw capture with the context before elevating 53 | auto productionStatement = std::dynamic_pointer_cast(typeFormingStatement); 54 | if (productionStatement && productionStatement->terminality == Terminality::Terminal) { 55 | elevateContextActionRegister.emplace_back(NFAActionType::TerminalizeContext, "m_token", newSubcontextName); 56 | } 57 | 58 | // if the typeformingStatement is an ignored root, create the NFA and the context but do not elevate here 59 | if (typeFormingStatement->rootness == Rootness::AcceptRoot) { 60 | elevateContextActionRegister.emplace_back(NFAActionType::ElevateContext, "m_token", newSubcontextName); 61 | } else if (typeFormingStatement->rootness == Rootness::IgnoreRoot) { 62 | elevateContextActionRegister.emplace_back(NFAActionType::IgnoreContext, "m_token", newSubcontextName); 63 | } 64 | 65 | alternativeNfa.concentrateFinalStates(elevateContextActionRegister); 66 | 67 | base |= alternativeNfa; 68 | } 69 | 70 | m_nfa = base.buildPseudoDFA(); 71 | } 72 | 73 | void FiniteAutomatonDefinition::accept(GenerationVisitor* visitor) const { 74 | visitor->visit(this); 75 | } -------------------------------------------------------------------------------- /astir/FiniteAutomatonDefinition.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "SyntacticTree.h" 4 | #include "MachineDefinition.h" 5 | 6 | struct FiniteAutomatonDefinition : public MachineDefinition { 7 | FiniteAutomatonDefinition() 8 | : MachineDefinition({ 9 | { MachineFlag::ProductionsTerminalByDefault, MachineDefinitionAttribute(true) }, 10 | { MachineFlag::ProductionsRootByDefault, MachineDefinitionAttribute(true) }, 11 | { MachineFlag::CategoriesRootByDefault, MachineDefinitionAttribute(false) }, 12 | { MachineFlag::AmbiguityResolvedByPrecedence, MachineDefinitionAttribute(false) } 13 | }) { } 14 | 15 | void initialize() override; 16 | 17 | const NFA& getNFA() const { return m_nfa; } 18 | 19 | void accept(GenerationVisitor* visitor) const override; 20 | private: 21 | std::shared_ptr m_finiteAutomatonDefinition; 22 | NFA m_nfa; 23 | }; -------------------------------------------------------------------------------- /astir/GenerationException.cpp: -------------------------------------------------------------------------------- 1 | #include "GenerationException.h" 2 | -------------------------------------------------------------------------------- /astir/GenerationException.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Exception.h" 4 | #include "IFileLocalizable.h" 5 | 6 | class GenerationException : public Exception { 7 | public: 8 | GenerationException() 9 | : Exception("A generation exception has been encountered") { } 10 | GenerationException(const std::string& message) 11 | : Exception(message) { } 12 | GenerationException(const std::string& message, const IFileLocalizable& element) 13 | : Exception(message + " -- on " + element.locationString()) { } 14 | }; 15 | -------------------------------------------------------------------------------- /astir/GenerationHelper.cpp: -------------------------------------------------------------------------------- 1 | #include "GenerationHelper.h" 2 | 3 | #include "GenerationException.h" 4 | 5 | void GenerationHelper::macroWrite(const std::string& sourceString, const std::map& macroPairs, std::ostream& m_output) { 6 | size_t it = 0; 7 | unsigned long lastIndentation = 0; 8 | size_t sourceStringLength = sourceString.length(); 9 | while (it < sourceStringLength) { 10 | auto startOfMacro = sourceString.find("${{", it); 11 | if (startOfMacro == std::string::npos) { 12 | break; 13 | } 14 | 15 | size_t endOfMacro = sourceString.find("}}", startOfMacro); 16 | if (endOfMacro == std::string::npos) { 17 | throw GenerationException("Invalid specimen file supplied, a macro start '${{' without the matching '}}' encountered"); 18 | } 19 | 20 | const std::string macroName = sourceString.substr(startOfMacro+3, endOfMacro-(startOfMacro + 3)); 21 | auto macroPairIt = macroPairs.find(macroName); 22 | if (macroPairIt == macroPairs.end()) { 23 | throw GenerationException("Invalid specimen file supplied, unrecognized macro '" + macroName + "' encountered"); 24 | } 25 | 26 | // identify the preceeding text 27 | const char* precedingTextStart = sourceString.c_str() + it; 28 | size_t precedingTextCount = startOfMacro - it; 29 | 30 | // copy the preceeding text unchanged 31 | m_output.write(precedingTextStart, precedingTextCount); 32 | 33 | // scan the preceeding text for relevant indentation offset to be used for the text inserted 34 | if (precedingTextCount > 0) { 35 | const char* precedingTextLastCharacter = precedingTextStart + precedingTextCount - 1; 36 | unsigned long currentIndentation = 0; 37 | while (precedingTextLastCharacter >= precedingTextStart) { 38 | if (*precedingTextLastCharacter == '\t') { 39 | ++currentIndentation; 40 | } else if (*precedingTextLastCharacter == '\n') { 41 | break; 42 | } else { 43 | currentIndentation = 0; 44 | } 45 | 46 | --precedingTextLastCharacter; 47 | } 48 | if (precedingTextLastCharacter > precedingTextStart) { 49 | // i.e. a newline was encountered between the last macro occurence and the new one 50 | lastIndentation = currentIndentation; 51 | } else { 52 | // i.e. no newline was encountered between the last macro occurence and the new one 53 | // just use lastIndentation 54 | } 55 | } 56 | 57 | const std::string& macroReplacementText = macroPairIt->second; 58 | if (lastIndentation > 0) { 59 | const std::string indentationString(lastIndentation, '\t'); 60 | size_t newTextLength = macroReplacementText.length(); 61 | 62 | size_t theCharacterAfterLastNewlineOffset = 0; 63 | while (theCharacterAfterLastNewlineOffset < newTextLength) { 64 | size_t offsetFromCurrentPos = macroReplacementText.find('\n', theCharacterAfterLastNewlineOffset); 65 | if (offsetFromCurrentPos == std::string::npos) { 66 | m_output.write(macroReplacementText.c_str() + theCharacterAfterLastNewlineOffset, newTextLength - theCharacterAfterLastNewlineOffset); 67 | break; 68 | } 69 | ++offsetFromCurrentPos; 70 | 71 | m_output.write(macroReplacementText.c_str() + theCharacterAfterLastNewlineOffset, offsetFromCurrentPos - theCharacterAfterLastNewlineOffset); 72 | m_output << indentationString; 73 | theCharacterAfterLastNewlineOffset = offsetFromCurrentPos; 74 | } 75 | } else { 76 | m_output << macroReplacementText; 77 | } 78 | 79 | it = endOfMacro + 2; 80 | } 81 | 82 | m_output.write(sourceString.c_str() + it, sourceString.length()-it); 83 | m_output.flush(); 84 | } 85 | -------------------------------------------------------------------------------- /astir/GenerationHelper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | class GenerationHelper { 8 | public: 9 | static void macroWrite(const std::string& sourceString, const std::map& macroPairs, std::ostream& m_output); 10 | 11 | private: 12 | GenerationHelper() = default; 13 | }; 14 | 15 | -------------------------------------------------------------------------------- /astir/GenerationVisitor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "Field.h" 5 | #include "SyntacticTree.h" 6 | #include "FiniteAutomatonDefinition.h" 7 | #include "LLkParserDefinition.h" 8 | #include "MachineStatement.h" 9 | 10 | #include 11 | 12 | class GenerationVisitor { 13 | public: 14 | virtual void setup() const = 0; 15 | 16 | virtual void visit(const SyntacticTree* tree) = 0; 17 | virtual void visit(const FiniteAutomatonDefinition* fad) = 0; 18 | virtual void visit(const LLkParserDefinition* llkParserDefinition) = 0; 19 | 20 | virtual void visit(const TypeFormingStatement* statement) = 0; 21 | virtual void visit(const FlagField* flagField) = 0; 22 | virtual void visit(const RawField* rawField) = 0; 23 | virtual void visit(const ItemField* itemField) = 0; 24 | virtual void visit(const ListField* listField) = 0; 25 | protected: 26 | GenerationVisitor(const std::string& path) 27 | : m_folderPath(path) { } 28 | 29 | const std::filesystem::path m_folderPath; 30 | }; 31 | 32 | -------------------------------------------------------------------------------- /astir/Grammar.astir: -------------------------------------------------------------------------------- 1 | /* 2 | KW_USES, 3 | 4 | KW_ON, 5 | KW_WITH, 6 | 7 | KW_FINITE, 8 | KW_AUTOMATON, 9 | KW_PRODUCTIONS_TERMINAL_BY_DEFAULT, 10 | KW_PRODUCTIONS_NONTERMINAL_BY_DEFAULT, 11 | KW_PRODUCTIONS_ROOT_BY_DEFAULT, 12 | KW_PRODUCTIONS_NONROOT_BY_DEFAULT, 13 | KW_CATEGORIES_ROOT_BY_DEFAULT, 14 | KW_CATEGORIES_NONROOT_BY_DEFAULT, 15 | KW_AMBIGUITY_DISALLOWED, 16 | KW_AMBIGUITY_RESOLVED_BY_PRECEDENCE, 17 | 18 | KW_IGNORED, 19 | KW_ROOT, 20 | KW_TERMINAL, 21 | KW_NONTERMINAL, 22 | KW_CATEGORY, 23 | KW_PRODUCTION, 24 | KW_PATTERN, 25 | KW_REGEX, 26 | 27 | KW_ITEM, 28 | KW_LIST, 29 | KW_RAW, 30 | 31 | KW_FLAG, 32 | KW_UNFLAG, 33 | KW_CAPTURE, 34 | KW_EMPTY, 35 | KW_APPEND, 36 | KW_PREPEND, 37 | KW_SET, 38 | KW_UNSET, 39 | KW_PUSH, 40 | KW_POP, 41 | KW_CLEAR, 42 | 43 | IDENTIFIER, 44 | STRING, 45 | NUMBER, 46 | 47 | PAR_LEFT, 48 | PAR_RIGHT, 49 | SQUARE_LEFT, 50 | SQUARE_RIGHT, 51 | CURLY_LEFT, 52 | CURLY_RIGHT, 53 | 54 | OP_COLON, 55 | OP_EQUALS, 56 | OP_LEFTARR, 57 | OP_SEMICOLON, 58 | OP_COMMA, 59 | OP_DOT, 60 | OP_CARET, 61 | OP_DOLLAR, 62 | 63 | OP_STAR, 64 | OP_PLUS, 65 | OP_QM, 66 | OP_OR, 67 | OP_FWDSLASH, 68 | 69 | OP_AMPERSAND, 70 | OP_DASH, 71 | OP_AT, 72 | 73 | EOS 74 | */ 75 | 76 | production specification = 77 | specificationStatement* 78 | ; 79 | 80 | production specificationStatement = 81 | machineDefinition 82 | | usesStatement 83 | ; 84 | 85 | production usesStatement = 86 | KW_USES STRING 87 | ; 88 | 89 | production machineDefinition = 90 | machineType IDENTIFIER 91 | (KW_WITH machineOptionList)? 92 | (KW_ON IDENTIFIER)? 93 | (KW_USES IDENTIFIER (OP_COMMA IDENTIFIER)*)? CURLY_LEFT 94 | machineDefinitionBody 95 | CURLY_RIGHT 96 | ; 97 | 98 | pattern machineType = 99 | KW_FINITE KW_AUTOMATON 100 | | KW_LL PAR_LEFT (NUMBER|KW_FINITE) PAR_RIGHT KW_PARSER 101 | ; 102 | 103 | pattern machineOptionList = 104 | machineOption (OP_COMMA machineOption)* 105 | ; 106 | 107 | pattern machineOption = 108 | KW_PRODUCTIONS_TERMINAL_BY_DEFAULT 109 | | KW_PRODUCTIONS_NONTERMINAL_BY_DEFAULT 110 | | KW_PRODUCTIONS_ROOT_BY_DEFAULT 111 | | KW_PRODUCTIONS_NONROOT_BY_DEFAULT 112 | | KW_CATEGORIES_ROOT_BY_DEFAULT 113 | | KW_CATEGORIES_NONROOT_BY_DEFAULT 114 | | KW_AMBIGUITY_DISALLOWED 115 | | KW_AMBIGUITY_RESOLVED_BY_PRECEDENCE 116 | ; 117 | 118 | production machineDefinitionBody = 119 | statement* 120 | ; 121 | 122 | production statement = 123 | categoryStatement 124 | | productionStatement 125 | | patternStatement 126 | | regexStatement 127 | ; 128 | 129 | pattern attributableStatementElaboration = 130 | (OP_COLON declarationNameList)? CURLY_LEFT memberDeclaration* CURLY_RIGHT 131 | ; 132 | 133 | pattern typeFormingStatementElaboration = 134 | (KW_ROOT|KW_IGNORED KW_ROOT)? 135 | ; 136 | 137 | production categoryStatement = 138 | typeFormingStatementElaboration KW_CATEGORY IDENTIFIER attributableStatementElaboration OP_SEMICOLON 139 | ; 140 | 141 | production memberDeclaration = 142 | KW_FLAG IDENTIFIER OP_SEMICOLON 143 | | KW_RAW IDENTIFIER OP_SEMICOLON 144 | | IDENTIFIER (KW_LIST|KW_ITEM)? IDENTIFIER OP_SEMICOLON 145 | ; 146 | 147 | pattern categoryStatementBody = 148 | IDENTIFIER* 149 | ; 150 | 151 | production productionStatement = 152 | typeFormingStatementElaboration 153 | terminalityElaboration 154 | KW_PRODUCTION? IDENTIFIER attributableStatementElaboration OP_EQUALS 155 | ruleStatementBody 156 | OP_SEMICOLON 157 | ; 158 | 159 | pattern terminalityElaboration = 160 | (KW_TERMINAL | KW_NONTERMINAL)? 161 | ; 162 | 163 | production patternStatement = 164 | KW_PATTERN IDENTIFIER attributableStatementElaboration OP_EQUALS 165 | ruleStatementBody 166 | OP_SEMICOLON 167 | ; 168 | 169 | production regexStatement = 170 | KW_REGEX IDENTIFIER OP_EQUALS 171 | ruleStatementBody 172 | OP_SEMICOLON 173 | ; 174 | 175 | production ruleStatementBody = 176 | disjunctiveRegex? 177 | ; 178 | 179 | production disjunctiveRegex = 180 | conjuctiveRegex (OP_OR conjuctiveRegex)* 181 | ; 182 | 183 | production conjunctiveRegex = 184 | rootRegex+ 185 | ; 186 | 187 | production rootRegex = 188 | (repetitiveRegex | atomicRegex) actionTag* 189 | ; 190 | 191 | production actionTag = 192 | OP_AT action OP_COLON IDENTIFIER 193 | ; 194 | 195 | pattern action = 196 | | KW_FLAG 197 | | KW_UNFLAG 198 | | KW_CAPTURE 199 | | KW_EMPTY 200 | | KW_APPEND 201 | | KW_PREPEND 202 | | KW_SET 203 | | KW_UNSET 204 | | KW_PUSH 205 | | KW_POP 206 | | KW_CLEAR 207 | ; 208 | 209 | production repetitiveRegex = 210 | atomicRegex OP_QM 211 | | atomicRegex OP_STAR 212 | | atomicRegex OP_PLUS 213 | | atomicRegex CURLY_LEFT NUMBER, NUMBER CURLY_RIGHT 214 | ; 215 | 216 | production atomicRegex = 217 | PAR_LEFT disjunctiveRegex PAR_RIGHT 218 | | SQUARE_LEFT OP_CARET (STRING|regexRange)+ SQUARE_RIGHT 219 | | SQUARE_LEFT (STRING|regexRange)+ SQUARE_RIGHT 220 | | STRING 221 | | (KW_EMPTY | PAR_LEFT PAR_RIGHT) 222 | | OP_DOT 223 | | referenceRegex 224 | ; 225 | 226 | production regexRange = 227 | STRING OP_DASH STRING 228 | ; 229 | 230 | production referenceRegex = 231 | IDENTIFIER 232 | ; 233 | -------------------------------------------------------------------------------- /astir/IActing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | struct MachineDefinition; 4 | struct MachineStatement; 5 | struct Field; 6 | class IActing { 7 | public: 8 | virtual void checkAndTypeformActionUsage(const MachineDefinition& machine, const MachineStatement* context, bool areActionsAllowed) { }; 9 | 10 | protected: 11 | IActing() = default; 12 | }; -------------------------------------------------------------------------------- /astir/IFileLocalizable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "FileLocation.h" 4 | 5 | class IFileLocalizable { 6 | public: 7 | virtual const FileLocation& location() const = 0; 8 | std::string locationString() const { return location().toString(); } 9 | 10 | protected: 11 | IFileLocalizable() = default; 12 | virtual ~IFileLocalizable() = default; 13 | }; 14 | 15 | typedef const IFileLocalizable* IFileLocalizableCPtr; -------------------------------------------------------------------------------- /astir/IGenerationVisitable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | class GenerationVisitor; 4 | 5 | class IGenerationVisitable { 6 | public: 7 | virtual void accept(GenerationVisitor* visitor) const = 0; 8 | 9 | protected: 10 | IGenerationVisitable() = default; 11 | }; -------------------------------------------------------------------------------- /astir/ILLkBuilding.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | class LLkBuilder; 4 | 5 | class ILLkBuilding { 6 | public: 7 | virtual void accept(LLkBuilder* nfaBuilder) const = 0; 8 | 9 | virtual ~ILLkBuilding() = default; 10 | protected: 11 | ILLkBuilding() = default; 12 | }; 13 | 14 | typedef const ILLkBuilding* ILLkBuildingCPtr; -------------------------------------------------------------------------------- /astir/ILLkFirstable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include "SymbolGroup.h" 6 | 7 | class LLkFirster; 8 | class ILLkFirstable { 9 | public: 10 | virtual SymbolGroupList first(LLkFirster* firster, const SymbolGroupList& prefix) const = 0; 11 | 12 | protected: 13 | ILLkFirstable() = default; 14 | }; 15 | 16 | typedef const ILLkFirstable* ILLkFirstableCPtr; 17 | 18 | class ILLkNonterminal : public ILLkFirstable { 19 | public: 20 | protected: 21 | ILLkNonterminal() = default; 22 | }; 23 | 24 | typedef const ILLkNonterminal* ILLkNonterminalCPtr; -------------------------------------------------------------------------------- /astir/ILLkParserGenerable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | class LLkParserGenerator; 4 | class ILLkParserGenerable { 5 | public: 6 | virtual void accept(LLkParserGenerator* generator) const = 0; 7 | 8 | protected: 9 | ILLkParserGenerable() = default; 10 | }; 11 | 12 | typedef const ILLkParserGenerable* ILLkParserGenerableCPtr; -------------------------------------------------------------------------------- /astir/INFABuildable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | class NFA; 4 | class NFABuilder; 5 | 6 | class INFABuildable { 7 | public: 8 | virtual NFA accept(const NFABuilder& nfaBuilder) const = 0; 9 | 10 | virtual ~INFABuildable() = default; 11 | protected: 12 | INFABuildable() = default; 13 | }; -------------------------------------------------------------------------------- /astir/IReferencing.cpp: -------------------------------------------------------------------------------- 1 | #include "IReferencing.h" 2 | 3 | std::string IReferencing::referenceName() const { 4 | return std::string(); 5 | } 6 | 7 | void IReferencing::completeReferences(const MachineDefinition& machine) { } 8 | 9 | IFileLocalizableCPtr IReferencing::findRecursiveReference(std::list& referencingEntitiesEncountered) const { 10 | return nullptr; 11 | } 12 | -------------------------------------------------------------------------------- /astir/IReferencing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "IFileLocalizable.h" 7 | 8 | class IReferencing; 9 | typedef const IReferencing* IReferencingCPtr; 10 | 11 | struct MachineDefinition; 12 | class IReferencing { 13 | public: 14 | virtual ~IReferencing() = default; 15 | 16 | virtual std::string referenceName() const; 17 | virtual void completeReferences(const MachineDefinition& machine); 18 | virtual IFileLocalizableCPtr findRecursiveReference(std::list& referencingEntitiesEncountered) const; 19 | 20 | protected: 21 | IReferencing() = default; 22 | }; 23 | -------------------------------------------------------------------------------- /astir/ISemanticEntity.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "IFileLocalizable.h" 6 | #include "ISyntacticEntity.h" 7 | 8 | class ISemanticEntity { 9 | public: 10 | virtual void initialize() { 11 | m_initialized = true; 12 | } 13 | 14 | bool initialized() const { return m_initialized; } 15 | protected: 16 | ISemanticEntity() 17 | : m_initialized(false) { } 18 | virtual ~ISemanticEntity() = default; 19 | private: 20 | bool m_initialized; 21 | }; -------------------------------------------------------------------------------- /astir/ISyntacticEntity.cpp: -------------------------------------------------------------------------------- 1 | #include "ISyntacticEntity.h" 2 | 3 | const FileLocation& ISyntacticEntity::location() const { 4 | return m_location; 5 | } 6 | 7 | void ISyntacticEntity::copyLocation(const IFileLocalizable& anotherLocalizableThingy) { 8 | m_location = anotherLocalizableThingy.location(); 9 | } 10 | -------------------------------------------------------------------------------- /astir/ISyntacticEntity.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "IFileLocalizable.h" 4 | 5 | class ISyntacticEntity : public IFileLocalizable { 6 | public: 7 | const FileLocation& location() const override; 8 | 9 | void copyLocation(const IFileLocalizable& anotherLocalizableThingy); 10 | protected: 11 | ISyntacticEntity() = default; 12 | virtual ~ISyntacticEntity() = default; 13 | 14 | private: 15 | FileLocation m_location; 16 | }; -------------------------------------------------------------------------------- /astir/IndentedStringStream.cpp: -------------------------------------------------------------------------------- 1 | #include "IndentedStringStream.h" 2 | 3 | void IndentedStringStream::put(const std::string& s) { 4 | if (s.empty()) { 5 | return; 6 | } 7 | 8 | _put(s); 9 | } 10 | 11 | void IndentedStringStream::putln(const std::string& s) { 12 | if (s.empty()) { 13 | return; 14 | } 15 | 16 | _put(s); 17 | *this << std::endl; 18 | } 19 | 20 | void IndentedStringStream::newline() { 21 | *this << m_indentation << std::endl; 22 | } 23 | 24 | void IndentedStringStream::indent() { 25 | *this << m_indentation; 26 | } 27 | 28 | void IndentedStringStream::increaseIndentation() { 29 | m_indentation += '\t'; 30 | } 31 | 32 | void IndentedStringStream::decreaseIndentation() { 33 | m_indentation.resize(m_indentation.size() - 1); 34 | } 35 | 36 | void IndentedStringStream::_put(const std::string& s) { 37 | size_t nextStop = 0; 38 | while (nextStop < s.length()) { 39 | size_t lastStop = nextStop; 40 | nextStop = s.find('\n', lastStop); 41 | if(nextStop != std::string::npos) { 42 | ++nextStop; 43 | *this << m_indentation << s.substr(lastStop, nextStop-lastStop); 44 | } else { 45 | *this << m_indentation << s.substr(lastStop); 46 | break; 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /astir/IndentedStringStream.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class IndentedStringStream : public std::stringstream { 6 | public: 7 | void put(const std::string& s); 8 | void putln(const std::string& s); 9 | 10 | void newline(); 11 | void indent(); 12 | 13 | void increaseIndentation(); 14 | void decreaseIndentation(); 15 | 16 | private: 17 | void _put(const std::string& s); 18 | std::string m_indentation; 19 | }; -------------------------------------------------------------------------------- /astir/LLkBuilder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "MachineStatement.h" 4 | #include "Regex.h" 5 | 6 | #include 7 | 8 | #include "ILLkFirstable.h" 9 | #include "SymbolGroup.h" 10 | #include "LLkFirster.h" 11 | 12 | struct LLkNonterminalContext { 13 | ILLkNonterminalCPtr parent; 14 | std::list followedBy; 15 | 16 | LLkNonterminalContext() 17 | : parent(nullptr) { } 18 | LLkNonterminalContext(ILLkNonterminalCPtr parent) 19 | : parent(parent) { } 20 | LLkNonterminalContext(ILLkNonterminalCPtr parent, const std::list& followedBy) 21 | : parent(parent), followedBy(followedBy) { } 22 | }; 23 | 24 | struct LLkTransition; 25 | struct LLkDecisionPoint { 26 | std::list> transitions; 27 | 28 | SymbolGroupList computeConditionSymbols() const; 29 | 30 | LLkDecisionPoint& operator+=(const LLkDecisionPoint& rhs); 31 | size_t maxDepth() const; 32 | }; 33 | 34 | struct LLkTransition { 35 | std::shared_ptr condition; 36 | LLkDecisionPoint point; 37 | 38 | LLkTransition() = default; 39 | LLkTransition(const std::shared_ptr& condition) 40 | : condition(condition) { } 41 | LLkTransition(const std::shared_ptr& condition, const LLkDecisionPoint& point) 42 | : condition(condition), point(point) { } 43 | }; 44 | 45 | struct LLkFlyweight { 46 | std::list contexts; 47 | LLkDecisionPoint decisions; 48 | }; 49 | 50 | struct LLkParserDefinition; 51 | class LLkBuilder { 52 | public: 53 | LLkBuilder(const LLkParserDefinition* context); 54 | 55 | void visitRootDisjunction(const std::list>& rootDisjunction); 56 | 57 | void visit(const CategoryStatement* categoryStatement); 58 | void visit(const RuleStatement* ruleStatement); 59 | 60 | void visit(const DisjunctiveRegex* regex); 61 | void visit(const ConjunctiveRegex* regex); 62 | 63 | void visit(const RepetitiveRegex* regex); 64 | void visit(const ReferenceRegex* regex); 65 | 66 | void disambiguate(const std::list& alternatives); 67 | void disambiguatePair(ILLkNonterminalCPtr first, ILLkNonterminalCPtr second); 68 | void disambiguateDecisionPoints(ILLkNonterminalCPtr first, ILLkNonterminalCPtr second, LLkDecisionPoint& firstPoint, LLkDecisionPoint& secondPoint, SymbolGroupList& prefix); 69 | void fillDisambiguationParent(ILLkNonterminalCPtr parent, const std::list& alternatives); 70 | 71 | SymbolGroupList lookahead(ILLkFirstableCPtr nonterminal, const SymbolGroupList& prefix); 72 | 73 | LLkDecisionPoint getDecisionTree(ILLkFirstableCPtr firstable); 74 | 75 | const LLkParserDefinition* contextMachine() const { return m_contextMachine; } 76 | LLkFirster& firster() { return m_firster; } 77 | const std::map& flyweights() const { return m_flyweights; } 78 | 79 | private: 80 | const LLkParserDefinition* m_contextMachine; 81 | std::map m_flyweights; 82 | LLkFirster m_firster; 83 | 84 | SymbolGroupList sequentialLookahead(std::list::const_iterator& sequenceIt, const std::list::const_iterator& sequenceEnd, const SymbolGroupList& prefix); 85 | void registerContextAppearance(ILLkNonterminalCPtr target, ILLkNonterminalCPtr parent, const std::list& followedBy); 86 | void registerContextAppearance(ILLkNonterminalCPtr target, ILLkNonterminalCPtr parent, std::list::const_iterator followedByIt, std::list::const_iterator followedByEnd); 87 | }; 88 | 89 | -------------------------------------------------------------------------------- /astir/LLkFirster.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ILLkFirstable.h" 4 | #include "MachineDefinition.h" 5 | 6 | #include "MachineStatement.h" 7 | #include "Regex.h" 8 | 9 | struct LLkParserDefinition; 10 | class LLkFirster { 11 | public: 12 | LLkFirster(const LLkParserDefinition* machine); 13 | 14 | SymbolGroupList visit(const CategoryStatement* cs, const SymbolGroupList& prefix); 15 | SymbolGroupList visit(const RuleStatement* rs, const SymbolGroupList& prefix); 16 | 17 | SymbolGroupList visit(const RepetitiveRegex* rr, const SymbolGroupList& prefix); 18 | SymbolGroupList visit(const DisjunctiveRegex* dr, const SymbolGroupList& prefix); 19 | SymbolGroupList visit(const ConjunctiveRegex* cr, const SymbolGroupList& prefix); 20 | 21 | SymbolGroupList visit(const EmptyRegex* rr, const SymbolGroupList& prefix); 22 | SymbolGroupList visit(const AnyRegex* ar, const SymbolGroupList& prefix); 23 | SymbolGroupList visit(const ExceptAnyRegex* ar, const SymbolGroupList& prefix); 24 | SymbolGroupList visit(const LiteralRegex* lr, const SymbolGroupList& prefix); 25 | SymbolGroupList visit(const ReferenceRegex* rr, const SymbolGroupList& prefix); 26 | SymbolGroupList visit(const ArbitrarySymbolRegex* asr, const SymbolGroupList& prefix); 27 | 28 | private: 29 | const LLkParserDefinition* m_machine; 30 | }; 31 | 32 | -------------------------------------------------------------------------------- /astir/LLkParserDefinition.cpp: -------------------------------------------------------------------------------- 1 | #include "LLkParserDefinition.h" 2 | #include "GenerationVisitor.h" 3 | 4 | LLkParserDefinition::LLkParserDefinition(unsigned long k) 5 | : MachineDefinition({ 6 | { MachineFlag::ProductionsTerminalByDefault, MachineDefinitionAttribute(false) }, 7 | { MachineFlag::ProductionsRootByDefault, MachineDefinitionAttribute(false) }, 8 | { MachineFlag::CategoriesRootByDefault, MachineDefinitionAttribute(false) }, 9 | { MachineFlag::AmbiguityResolvedByPrecedence, MachineDefinitionAttribute(false) } 10 | }), m_builder(std::make_unique(this)), m_k(k) { } 11 | 12 | void LLkParserDefinition::initialize() { 13 | if (initialized()) { // really necessary 14 | return; 15 | } 16 | 17 | this->MachineDefinition::initialize(); 18 | 19 | for (const auto& statementPair : statements) { 20 | auto statementAsLLkBuilding = dynamic_cast(statementPair.second.get()); 21 | statementAsLLkBuilding->accept(m_builder.get()); 22 | } 23 | 24 | auto roots = this->getRoots(); 25 | m_builder->visitRootDisjunction(roots); 26 | } 27 | 28 | void LLkParserDefinition::accept(GenerationVisitor* visitor) const { 29 | visitor->visit(this); 30 | } 31 | -------------------------------------------------------------------------------- /astir/LLkParserDefinition.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "SyntacticTree.h" 4 | #include "MachineDefinition.h" 5 | 6 | #include "LLkBuilder.h" 7 | #include "LLkFirster.h" 8 | 9 | #include 10 | 11 | struct LLkParserDefinition : public MachineDefinition { 12 | LLkParserDefinition(unsigned long k); 13 | 14 | void initialize() override; 15 | 16 | void accept(GenerationVisitor* visitor) const override; 17 | 18 | LLkBuilder& builder() const { return *m_builder; } 19 | unsigned long k() const { return m_k; } 20 | private: 21 | unsigned long m_k; 22 | std::unique_ptr m_builder; 23 | }; -------------------------------------------------------------------------------- /astir/LLkParserGenerator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "LLkParserDefinition.h" 4 | #include "MachineStatement.h" 5 | 6 | class LLkParserGenerator { 7 | public: 8 | virtual void visitTypeFormingStatements(const std::list>& rootDisjunction) = 0; 9 | virtual void visitRootDisjunction(const std::list>& rootDisjunction) = 0; 10 | 11 | virtual void visit(const CategoryStatement* category) = 0; 12 | virtual void visit(const PatternStatement* rule) = 0; 13 | virtual void visit(const ProductionStatement* rule) = 0; 14 | virtual void visit(const RegexStatement* rule) = 0; 15 | 16 | virtual void visit(const DisjunctiveRegex* regex) = 0; 17 | virtual void visit(const ConjunctiveRegex* regex) = 0; 18 | 19 | virtual void visit(const RepetitiveRegex* regex) = 0; 20 | 21 | virtual void visit(const EmptyRegex* regex) = 0; 22 | virtual void visit(const AnyRegex* regex) = 0; 23 | virtual void visit(const ExceptAnyRegex* regex) = 0; 24 | virtual void visit(const LiteralRegex* regex) = 0; 25 | virtual void visit(const ArbitrarySymbolRegex* regex) = 0; 26 | virtual void visit(const ReferenceRegex* regex) = 0; 27 | 28 | protected: 29 | LLkParserGenerator(LLkBuilder& builder) 30 | : m_builder(builder) { } 31 | 32 | LLkBuilder& m_builder; 33 | }; -------------------------------------------------------------------------------- /astir/LexicalAnalysisException.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Exception.h" 4 | 5 | class LexicalAnalysisException : public Exception { 6 | public: 7 | LexicalAnalysisException(const std::string& errmsg) 8 | : Exception(errmsg) { } 9 | }; -------------------------------------------------------------------------------- /astir/LexicalAnalyzer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "Token.h" 8 | #include "LexicalAnalysisException.h" 9 | 10 | enum class LexicalAnalyzerState { 11 | Default, 12 | ForwardSlash, 13 | LineComment, 14 | MultilineComment, 15 | MultilineCommentStarEncountered, 16 | 17 | Identifier, 18 | Number, 19 | String, 20 | StringEscapeSequence, 21 | StringOctalEscapeSequence, 22 | StringHexEscapeSequence, 23 | LeftArrow 24 | }; 25 | 26 | class LexicalAnalyzer { 27 | public: 28 | LexicalAnalyzer(); 29 | 30 | std::list process(std::istream& input); 31 | void resetInternalState(); 32 | void resetPositionState(); 33 | void resetState(); 34 | private: 35 | FileLocation m_currentLocation; 36 | LexicalAnalyzerState m_state; 37 | bool m_stringIsDoubleQuote; 38 | std::string m_currentEscapeSequence; 39 | Token m_currentToken; 40 | 41 | char m_currentCharacter; 42 | bool m_consumeNew; 43 | bool m_endOfStreamReached; 44 | 45 | const std::map m_keywordMap = std::map({ 46 | std::pair("uses", TokenType::KW_USES), 47 | 48 | std::pair("on", TokenType::KW_ON), 49 | std::pair("with", TokenType::KW_WITH), 50 | 51 | std::pair("finite", TokenType::KW_FINITE), 52 | std::pair("automaton", TokenType::KW_AUTOMATON), 53 | std::pair("LL", TokenType::KW_LL), 54 | std::pair("parser", TokenType::KW_PARSER), 55 | 56 | std::pair("productions_terminal_by_default", TokenType::KW_PRODUCTIONS_TERMINAL_BY_DEFAULT), 57 | std::pair("productions_nonterminal_by_default", TokenType::KW_PRODUCTIONS_NONTERMINAL_BY_DEFAULT), 58 | std::pair("productions_root_by_default", TokenType::KW_PRODUCTIONS_ROOT_BY_DEFAULT), 59 | std::pair("productions_nonroot_by_default", TokenType::KW_PRODUCTIONS_NONROOT_BY_DEFAULT), 60 | std::pair("categories_root_by_default", TokenType::KW_CATEGORIES_ROOT_BY_DEFAULT), 61 | std::pair("categories_nonroot_by_default", TokenType::KW_CATEGORIES_NONROOT_BY_DEFAULT), 62 | std::pair("ambiguity_disallowed", TokenType::KW_AMBIGUITY_DISALLOWED), 63 | std::pair("ambiguity_resolved_by_precedence", TokenType::KW_AMBIGUITY_RESOLVED_BY_PRECEDENCE), 64 | 65 | std::pair("ignored", TokenType::KW_IGNORED), 66 | std::pair("root", TokenType::KW_ROOT), 67 | std::pair("terminal", TokenType::KW_TERMINAL), 68 | std::pair("nonterminal", TokenType::KW_NONTERMINAL), 69 | std::pair("category", TokenType::KW_CATEGORY), 70 | std::pair("production", TokenType::KW_PRODUCTION), 71 | std::pair("pattern", TokenType::KW_PATTERN), 72 | std::pair("regex", TokenType::KW_REGEX), 73 | 74 | std::pair("item", TokenType::KW_ITEM), 75 | std::pair("list", TokenType::KW_LIST), 76 | std::pair("raw", TokenType::KW_RAW), 77 | 78 | std::pair("flag", TokenType::KW_FLAG), 79 | std::pair("unflag", TokenType::KW_UNFLAG), 80 | 81 | std::pair("capture", TokenType::KW_CAPTURE), 82 | std::pair("empty", TokenType::KW_EMPTY), 83 | std::pair("append", TokenType::KW_APPEND), 84 | std::pair("prepend", TokenType::KW_PREPEND), 85 | 86 | std::pair("set", TokenType::KW_SET), 87 | std::pair("unset", TokenType::KW_UNSET), 88 | 89 | std::pair("push", TokenType::KW_PUSH), 90 | std::pair("pop", TokenType::KW_POP), 91 | std::pair("clear", TokenType::KW_CLEAR), 92 | }); 93 | }; 94 | -------------------------------------------------------------------------------- /astir/MachineDefinition.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ISyntacticEntity.h" 4 | #include "ISemanticEntity.h" 5 | #include "IGenerationVisitable.h" 6 | 7 | #include "MachineStatement.h" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | enum class MachineFlag { 15 | ProductionsTerminalByDefault, 16 | ProductionsRootByDefault, 17 | CategoriesRootByDefault, 18 | AmbiguityResolvedByPrecedence 19 | }; 20 | 21 | struct MachineDefinitionAttribute { 22 | bool set; 23 | bool value; 24 | 25 | MachineDefinitionAttribute() 26 | : set(false), value(false) { } 27 | MachineDefinitionAttribute(bool value) 28 | : set(false), value(value) { } 29 | }; 30 | 31 | struct MachineDefinition : public ISyntacticEntity, public ISemanticEntity, public IGenerationVisitable { 32 | public: 33 | std::string name; 34 | std::map attributes; 35 | std::map> uses; 36 | std::pair> on; 37 | std::map> statements; 38 | 39 | void initialize() override; 40 | 41 | std::shared_ptr findMachineStatement(const std::string& name, const MachineDefinition** sourceMachine = nullptr) const; // anyone calling this function shall not take up even a partial ownership of the component, normal pointer suffices 42 | std::list> getTerminalProductions() const; 43 | std::list> getTypeFormingStatements() const; 44 | bool hasPurelyTerminalRoots() const; 45 | std::list> getRoots() const; 46 | std::list getUnderlyingProductionsOfRoots() const; 47 | std::list> getTerminalRoots() const; 48 | TerminalTypeIndex terminalProductionCount() const { return m_terminalCount; }; 49 | 50 | void completeCategoryReferences(std::list namesEncountered, const std::shared_ptr& attributedStatement, bool mustBeACategory = false) const; 51 | 52 | SymbolGroupList computeArbitrarySymbolGroupList() const; 53 | bool isOnTerminalInput() const { return m_isOnTerminalInput; } 54 | 55 | protected: 56 | MachineDefinition() 57 | : attributes({ 58 | { MachineFlag::ProductionsTerminalByDefault, MachineDefinitionAttribute(false) }, 59 | { MachineFlag::ProductionsRootByDefault, MachineDefinitionAttribute(false) }, 60 | { MachineFlag::CategoriesRootByDefault, MachineDefinitionAttribute(false) }, 61 | { MachineFlag::AmbiguityResolvedByPrecedence, MachineDefinitionAttribute(false) } 62 | }), m_terminalCount((TerminalTypeIndex)0), m_isOnTerminalInput(false) { } 63 | 64 | MachineDefinition(const std::map& attributes); 65 | 66 | private: 67 | TerminalTypeIndex m_terminalCount; 68 | 69 | void mergeInAttributes(const std::map& attributes); 70 | bool m_isOnTerminalInput; 71 | }; 72 | -------------------------------------------------------------------------------- /astir/MachineStatement.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ISyntacticEntity.h" 4 | #include "ISemanticEntity.h" 5 | #include "IReferencing.h" 6 | #include "INFABuildable.h" 7 | #include "ILLkFirstable.h" 8 | #include "ILLkBuilding.h" 9 | #include "IGenerationVisitable.h" 10 | #include "ILLkParserGenerable.h" 11 | 12 | #include "Field.h" 13 | #include "Regex.h" 14 | 15 | #include 16 | #include 17 | 18 | struct MachineDefinition; 19 | using TerminalTypeIndex = size_t; 20 | 21 | enum class Rootness { 22 | AcceptRoot, 23 | IgnoreRoot, 24 | Unspecified 25 | }; 26 | 27 | enum class Terminality { 28 | Terminal, 29 | Nonterminal, 30 | Unspecified 31 | }; 32 | 33 | /* The following locally supresses the "'X' inherits 'Y' via dominance" warnings */ 34 | #pragma warning( push ) 35 | #pragma warning( disable : 4250 ) 36 | 37 | 38 | struct MachineStatement : public ISyntacticEntity, public ISemanticEntity, public IReferencing, 39 | public INFABuildable, public ILLkNonterminal, public ILLkBuilding, ILLkParserGenerable { 40 | std::string name; 41 | virtual ~MachineStatement() = default; 42 | 43 | std::string referenceName() const override; 44 | 45 | protected: 46 | MachineStatement() = default; 47 | MachineStatement(const std::string& name) 48 | : name(name) { } 49 | }; 50 | 51 | struct CategoryStatement; 52 | struct ProductionStatement; 53 | struct AttributedStatement : public virtual MachineStatement { 54 | std::map> categories; 55 | std::list> fields; 56 | 57 | std::shared_ptr findField(const std::string& name, std::shared_ptr& categoryFoundIn) const; 58 | void completeFieldDeclarations(MachineDefinition& context) const; 59 | 60 | virtual std::list calculateInstandingProductions() const = 0; 61 | 62 | virtual bool categoricallyRefersTo(const AttributedStatement* statement) const = 0; 63 | virtual std::set unpickReferal(const AttributedStatement* statement) const = 0; 64 | 65 | private: 66 | std::shared_ptr findCategoryField(const std::string& name, std::shared_ptr& categoryFoundIn) const; 67 | }; 68 | 69 | struct TypeFormingStatement : public AttributedStatement, public IGenerationVisitable { 70 | Rootness rootness; 71 | 72 | void accept(GenerationVisitor* visitor) const override; 73 | protected: 74 | TypeFormingStatement() 75 | : rootness(Rootness::Unspecified) { } 76 | TypeFormingStatement(Rootness rootness) 77 | : rootness(rootness) { } 78 | 79 | }; 80 | 81 | struct RuleStatement : public virtual MachineStatement { 82 | std::shared_ptr regex; 83 | 84 | void completeReferences(MachineDefinition& machine) const; 85 | IFileLocalizableCPtr findRecursiveReference(std::list& referencingEntitiesEncountered) const override; 86 | 87 | virtual void verifyContextualValidity(const MachineDefinition& machine) const = 0; 88 | 89 | void accept(LLkBuilder* llkBuilder) const override; 90 | SymbolGroupList first(LLkFirster* firster, const SymbolGroupList& prefix) const override; 91 | }; 92 | 93 | struct CategoryReference { 94 | const AttributedStatement* statement; 95 | bool isAReferenceFromUnderlyingMachine; // well, if you put it this way, it really does sounds stupid! 96 | 97 | CategoryReference() 98 | : statement(nullptr), isAReferenceFromUnderlyingMachine(false) { } 99 | CategoryReference(const AttributedStatement* statement, bool isAReferenceFromUnderlyingMachine) 100 | : statement(statement), isAReferenceFromUnderlyingMachine(isAReferenceFromUnderlyingMachine) { } 101 | }; 102 | 103 | struct CategoryStatement : public TypeFormingStatement { 104 | std::map references; // references to 'me', i.e. by other machine components. Non-owning pointers so ok. 105 | 106 | IFileLocalizableCPtr findRecursiveReference(std::list& referencingEntitiesEncountered) const override; 107 | 108 | bool categoricallyRefersTo(const AttributedStatement* statement) const override; 109 | std::set unpickReferal(const AttributedStatement* statement) const override; 110 | std::list calculateInstandingProductions() const override; 111 | 112 | NFA accept(const NFABuilder& nfaBuilder) const override; 113 | SymbolGroupList first(LLkFirster* firster, const SymbolGroupList& prefix) const override; 114 | void accept(LLkBuilder* llkBuilder) const override; 115 | void accept(LLkParserGenerator* generator) const override; 116 | }; 117 | 118 | struct ProductionStatement : public TypeFormingStatement, public RuleStatement { 119 | Terminality terminality; 120 | TerminalTypeIndex terminalTypeIndex; 121 | 122 | ProductionStatement() 123 | : terminality(Terminality::Unspecified), terminalTypeIndex(0) { } 124 | 125 | void verifyContextualValidity(const MachineDefinition& machine) const override; 126 | 127 | bool categoricallyRefersTo(const AttributedStatement* statement) const override; 128 | std::set unpickReferal(const AttributedStatement* statement) const override; 129 | std::list calculateInstandingProductions() const override; 130 | 131 | NFA accept(const NFABuilder& nfaBuilder) const override; 132 | using RuleStatement::first; 133 | using RuleStatement::findRecursiveReference; 134 | void accept(LLkParserGenerator* generator) const override; 135 | }; 136 | 137 | struct PatternStatement : public AttributedStatement, public RuleStatement { 138 | void verifyContextualValidity(const MachineDefinition& machine) const override; 139 | 140 | bool categoricallyRefersTo(const AttributedStatement* statement) const override; 141 | std::set unpickReferal(const AttributedStatement* statement) const override; 142 | std::list calculateInstandingProductions() const override; 143 | 144 | NFA accept(const NFABuilder& nfaBuilder) const override; 145 | using RuleStatement::first; 146 | using RuleStatement::findRecursiveReference; 147 | void accept(LLkParserGenerator* generator) const override; 148 | }; 149 | 150 | struct RegexStatement : public RuleStatement { 151 | void verifyContextualValidity(const MachineDefinition& machine) const override; 152 | 153 | NFA accept(const NFABuilder& nfaBuilder) const override; 154 | using RuleStatement::first; 155 | using RuleStatement::findRecursiveReference; 156 | void accept(LLkParserGenerator* generator) const override; 157 | }; 158 | 159 | #pragma warning( pop ) -------------------------------------------------------------------------------- /astir/NFA.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "Regex.h" 10 | #include "NFAAction.h" 11 | #include "SymbolGroup.h" 12 | 13 | using State = size_t; 14 | 15 | struct Transition { 16 | State target; 17 | NFAActionRegister actions; 18 | std::shared_ptr condition; 19 | bool doNotOptimizeTargetIntoSymbolClosure; 20 | 21 | Transition(State target) 22 | : target(target), condition(nullptr), doNotOptimizeTargetIntoSymbolClosure(false) { } 23 | Transition(State target, const std::shared_ptr& condition, const NFAActionRegister& actions, bool doNotOptimizeTargetIntoSymbolClosure = false) 24 | : target(target), condition(condition), actions(actions), doNotOptimizeTargetIntoSymbolClosure(doNotOptimizeTargetIntoSymbolClosure) { } 25 | Transition(State target, const std::shared_ptr& condition) 26 | : Transition(target, condition, NFAActionRegister()) { } 27 | 28 | bool equals(const Transition& rhs) const; 29 | bool alignedSymbolWise(const Transition& rhs) const; 30 | std::list disjoinFrom(const Transition& rhs); 31 | }; 32 | 33 | using TransitionList = std::list; 34 | 35 | struct NFAState { 36 | TransitionList transitions; 37 | NFAActionRegister actions; 38 | }; 39 | 40 | using CapturePointId = size_t; 41 | 42 | class NFA { 43 | public: 44 | std::set finalStates; 45 | std::vector states; // 0th element of this vector is by default the initial state 46 | std::list> contexts; // parent context name, subcontext name (also the type) 47 | 48 | NFA(); 49 | 50 | void orNFA(const NFA& rhs, bool preventSymbolClosureOptimisation); 51 | void andNFA(const NFA& rhs, bool preventSymbolClosureOptimisation); 52 | void operator|=(const NFA& rhs); 53 | void operator&=(const NFA& rhs); 54 | 55 | State addState(); 56 | void addTransition(State state, const Transition& transition); 57 | Transition& addEmptyTransition(State state, State target); 58 | Transition& addEmptyTransition(State state, State target, const NFAActionRegister& ar); 59 | void registerContext(const std::string& parentContextName, const std::string& name); 60 | 61 | void addFinalActions(const NFAActionRegister& actions); 62 | void addInitialActions(const NFAActionRegister& actions); 63 | State concentrateFinalStates(); 64 | State concentrateFinalStates(const NFAActionRegister& actions); 65 | 66 | NFA buildPseudoDFA() const; 67 | 68 | static void calculateDisjointTransitions(std::list& symbolGroups); 69 | static std::list> makeComplementSymbolGroups(const std::list>& symbolGroups); 70 | 71 | private: 72 | void mergeInContexts(const NFA& rhs); 73 | 74 | struct DFAState { 75 | std::set nfaStates; 76 | bool marked; 77 | NFAActionRegister actions; 78 | 79 | DFAState() 80 | : marked(false) { } 81 | DFAState(const std::set& nfaStates, const NFAActionRegister& actions) 82 | : nfaStates(nfaStates), marked(false), actions(actions) { } 83 | }; 84 | 85 | struct SymbolClosure { 86 | std::shared_ptr symbols; 87 | std::set states; 88 | NFAActionRegister actions; 89 | 90 | SymbolClosure() 91 | : symbols(nullptr) { } 92 | SymbolClosure(const std::shared_ptr& symbols, const std::set& states) 93 | : symbols(symbols), states(states) { } 94 | SymbolClosure(const std::shared_ptr& symbols, const std::set& states, const NFAActionRegister& actions) 95 | : symbols(symbols), states(states), actions(actions) { } 96 | }; 97 | 98 | DFAState calculateEpsilonClosure(const std::set& states) const; 99 | std::list calculateSymbolClosures(const std::list& transitions) const; 100 | std::list calculateTransitions(const std::set& states) const; 101 | 102 | 103 | State findUnmarkedState(const std::deque& stateMap) const; 104 | State findStateByNFAStateSet(const std::deque& stateMap, const std::set& nfaSet) const; 105 | }; 106 | -------------------------------------------------------------------------------- /astir/NFAAction.cpp: -------------------------------------------------------------------------------- 1 | #include "NFAAction.h" 2 | 3 | #include "GenerationVisitor.h" 4 | 5 | #include 6 | 7 | bool NFAAction::operator==(const NFAAction& rhs) const { 8 | return 9 | this->type == rhs.type && this->contextPath == rhs.contextPath 10 | && this->targetName == rhs.targetName && this->payload == rhs.payload 11 | ; 12 | } 13 | 14 | NFAActionRegister NFAActionRegister::operator+(const NFAActionRegister& rhs) const { 15 | NFAActionRegister ret(*this); 16 | 17 | for (const auto& are : rhs) { 18 | auto it = std::find_if(begin(), end(), [are](const NFAAction& entry) { 19 | return entry.type == are.type && are.contextPath == entry.contextPath && are.targetName == entry.targetName; 20 | }); 21 | if (it == end()) { 22 | ret.push_back(are); 23 | } 24 | } 25 | 26 | return ret; 27 | } 28 | 29 | const NFAActionRegister& NFAActionRegister::operator+=(const NFAActionRegister& rhs) { 30 | for (const auto& are : rhs) { 31 | auto it = std::find_if(begin(), end(), [&are](const NFAAction& entry) { 32 | return entry.type == are.type && are.contextPath == entry.contextPath && are.targetName == entry.targetName; 33 | }); 34 | if (it == end()) { 35 | this->push_back(are); 36 | } 37 | } 38 | 39 | return *this; 40 | } 41 | 42 | bool NFAActionRegister::operator==(const NFAActionRegister& rhs) const { 43 | // TODO: might need an improvement for the future to consider different orders... or maybe not? 44 | return *static_cast*>(this) == rhs; 45 | } 46 | -------------------------------------------------------------------------------- /astir/NFAAction.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | enum class NFAActionType : unsigned char { 8 | Flag = 1, 9 | Unflag = 2, 10 | 11 | Capture = 3, 12 | Empty = 4, 13 | Append = 5, 14 | Prepend = 6, 15 | 16 | Set = 7, 17 | Unset = 8, 18 | Push = 9, 19 | Pop = 10, 20 | Clear = 11, 21 | 22 | CreateContext = 101, 23 | TerminalizeContext = 102, 24 | ElevateContext = 103, 25 | IgnoreContext = 104, 26 | InitiateCapture = 105, 27 | 28 | None = 255 29 | }; 30 | 31 | struct Field; 32 | struct NFAAction { 33 | NFAActionType type; 34 | std::string contextPath; 35 | std::string targetName; 36 | std::string payload; 37 | std::shared_ptr targetField; 38 | 39 | NFAAction(NFAActionType faAction, const std::string& contextPath, const std::string& targetName) 40 | : NFAAction(faAction, contextPath, targetName, nullptr) { } 41 | NFAAction(NFAActionType faAction, const std::string& contextPath, const std::string& targetName, const std::shared_ptr& targetField) 42 | : NFAAction(faAction, contextPath, targetName, targetField, std::string()) { } 43 | NFAAction(NFAActionType faAction, const std::string& contextPath, const std::string& targetName, const std::shared_ptr& targetField, const std::string& payload) 44 | : type(faAction), contextPath(contextPath), targetName(targetName), targetField(targetField), payload(payload) { } 45 | 46 | bool operator==(const NFAAction& rhs) const; 47 | }; 48 | 49 | class NFAActionRegister : public std::list { 50 | public: 51 | NFAActionRegister() = default; 52 | 53 | NFAActionRegister operator+(const NFAActionRegister& rhs) const; 54 | const NFAActionRegister& operator+=(const NFAActionRegister& rhs); 55 | bool operator==(const NFAActionRegister& rhs) const; 56 | }; -------------------------------------------------------------------------------- /astir/NFABuilder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "NFA.h" 4 | 5 | #include "SyntacticTree.h" 6 | #include "MachineStatement.h" 7 | #include "Regex.h" 8 | 9 | class NFABuilder { 10 | public: 11 | NFABuilder(const MachineDefinition& context, const MachineStatement* statement, const std::string& generationContextPath) 12 | : m_contextMachine(context), m_contextStatement(statement), m_generationContextPath(generationContextPath) { } 13 | 14 | NFA visit(const CategoryStatement* category) const; 15 | NFA visit(const PatternStatement* rule) const; 16 | NFA visit(const ProductionStatement* rule) const; 17 | NFA visit(const RegexStatement* rule) const; 18 | 19 | NFA visit(const DisjunctiveRegex* regex) const; 20 | NFA visit(const ConjunctiveRegex* regex) const; 21 | 22 | NFA visit(const RepetitiveRegex* regex) const; 23 | 24 | NFA visit(const EmptyRegex* regex) const; 25 | NFA visit(const AnyRegex* regex) const; 26 | NFA visit(const ExceptAnyRegex* regex) const; 27 | NFA visit(const LiteralRegex* regex) const; 28 | NFA visit(const ArbitrarySymbolRegex* regex) const; 29 | NFA visit(const ReferenceRegex* regex) const; 30 | 31 | private: 32 | const MachineDefinition& m_contextMachine; 33 | const MachineStatement* m_contextStatement; 34 | const std::string m_generationContextPath; 35 | 36 | std::pair computeActionRegisterEntries(const std::list& actions) const; 37 | std::pair computeActionRegisterEntries(const std::list& actions, const std::string& payload) const; 38 | }; 39 | 40 | -------------------------------------------------------------------------------- /astir/RegexAction.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Field.h" 4 | #include "ISemanticEntity.h" 5 | 6 | #include 7 | #include 8 | 9 | enum class RegexActionType : unsigned char { 10 | Flag = 1, 11 | Unflag = 2, 12 | 13 | Capture = 3, 14 | Empty = 4, 15 | Append = 5, 16 | Prepend = 6, 17 | 18 | Set = 7, 19 | Unset = 8, 20 | Push = 9, 21 | Pop = 10, 22 | Clear = 11, 23 | 24 | None = 255 25 | }; 26 | 27 | struct RegexAction : public ISyntacticEntity { 28 | RegexActionType type = RegexActionType::None; 29 | std::string target; 30 | std::shared_ptr targetField; 31 | 32 | RegexAction() 33 | : targetField(nullptr) { } 34 | RegexAction(RegexActionType type, std::string target) 35 | : type(type), target(target), targetField(nullptr) { } 36 | }; -------------------------------------------------------------------------------- /astir/Resources/Exception.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class Exception : public std::exception { 7 | public: 8 | Exception() = default; 9 | Exception(const std::string& message) 10 | : std::exception(message.c_str()) { } 11 | 12 | virtual ~Exception() = default; 13 | }; -------------------------------------------------------------------------------- /astir/Resources/Location.cpp: -------------------------------------------------------------------------------- 1 | #include "Location.h" 2 | 3 | void TextLocation::note(char c) { 4 | ++column; 5 | if (c == '\n') { 6 | ++line; 7 | column = 0; 8 | } 9 | } 10 | 11 | void TextLocation::advance() { 12 | ++column; 13 | } 14 | 15 | std::string TextLocation::toString() const { 16 | return std::to_string(line) + ":" + std::to_string(column); 17 | } 18 | 19 | std::shared_ptr TextLocation::clone() const { 20 | return std::make_shared(*this); 21 | } 22 | 23 | std::string TextFileLocation::toString() const { 24 | return fileName + ":" + this->TextLocation::toString(); 25 | } 26 | 27 | std::shared_ptr TextFileLocation::clone() const { 28 | return std::make_shared(*this); 29 | } 30 | 31 | void InvalidLocation::note(char c) { 32 | throw std::exception(); 33 | } 34 | 35 | void InvalidLocation::advance() { 36 | throw std::exception(); 37 | } 38 | 39 | std::string InvalidLocation::toString() const { 40 | throw std::exception(); 41 | } 42 | 43 | std::shared_ptr InvalidLocation::clone() const { 44 | throw std::exception(); 45 | } 46 | -------------------------------------------------------------------------------- /astir/Resources/Location.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | class Location { 7 | public: 8 | virtual void note(char c) = 0; 9 | virtual void advance() = 0; 10 | virtual std::string toString() const = 0; 11 | 12 | virtual std::shared_ptr clone() const = 0; 13 | 14 | protected: 15 | Location() = default; 16 | }; 17 | 18 | class InvalidLocation : public Location { 19 | public: 20 | void note(char c) override; 21 | void advance() override; 22 | std::string toString() const override; 23 | 24 | std::shared_ptr clone() const override; 25 | 26 | InvalidLocation() = default; 27 | }; 28 | 29 | class TextLocation : public Location { 30 | public: 31 | unsigned long line; 32 | unsigned long column; 33 | 34 | TextLocation() 35 | : line(1), column(0) { } 36 | 37 | TextLocation(unsigned long line, unsigned long column) 38 | : line(line), column(column) { } 39 | 40 | void note(char c) override; 41 | void advance() override; 42 | std::string toString() const override; 43 | std::shared_ptr clone() const override; 44 | }; 45 | 46 | class TextFileLocation : public TextLocation { 47 | public: 48 | std::string fileName; 49 | TextFileLocation() 50 | : TextLocation(), fileName() { } 51 | 52 | TextFileLocation(const std::string& fileName, unsigned long line, unsigned long column) 53 | : TextLocation(line, column), fileName(fileName) { } 54 | 55 | std::string toString() const override; 56 | std::shared_ptr clone() const override; 57 | }; -------------------------------------------------------------------------------- /astir/Resources/Parser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "Machine.h" 6 | 7 | class ParserException : public MachineException { 8 | public: 9 | ParserException() = default; 10 | ParserException(const std::string& message) 11 | : MachineException(message) { } 12 | }; 13 | 14 | template 15 | class Parser : public Machine { 16 | public: 17 | Parser() 18 | : m_lastApplicationSuccessful(false), m_lastException(nullptr) { } 19 | 20 | std::shared_ptr apply(InputStreamType& rs) override; 21 | 22 | std::shared_ptr parse(InputStreamType& rs); 23 | std::shared_ptr parseWithIgnorance(InputStreamType& rs); 24 | std::list> parseStream(InputStreamType& rs); 25 | std::list> parseStreamWithIgnorance(InputStreamType& rs); 26 | 27 | bool lastApplicationSuccessful() const override { return m_lastApplicationSuccessful; } 28 | void reset() override; 29 | std::string lastError() const; 30 | 31 | protected: 32 | virtual std::shared_ptr parse_root(InputStreamType& is) = 0; 33 | void error(const std::string& message) const; 34 | 35 | private: 36 | bool m_lastApplicationSuccessful; 37 | std::unique_ptr m_lastException; 38 | }; 39 | 40 | template 41 | inline std::shared_ptr Parser::apply(InputStreamType& rs) { 42 | try { 43 | m_lastApplicationSuccessful = true; 44 | return parse_root(rs); 45 | } catch (const Exception& ex) { 46 | m_lastException = std::make_unique(ex); 47 | m_lastApplicationSuccessful = false; 48 | return nullptr; 49 | } 50 | } 51 | 52 | template 53 | inline std::shared_ptr Parser::parse(InputStreamType& rs) { 54 | return parse_root(rs); 55 | } 56 | 57 | template 58 | inline std::shared_ptr Parser::parseWithIgnorance(InputStreamType& rs) { 59 | while (rs.good()) { 60 | auto ret = parse(rs); 61 | if (ret) { 62 | return ret; 63 | } 64 | } 65 | 66 | throw ParserException("Parse with ignorance so far unsuccessful but the input stream is no longer good()"); 67 | return nullptr; 68 | } 69 | 70 | template 71 | inline std::list> Parser::parseStream(InputStreamType& rs) { 72 | std::list> ret; 73 | 74 | while (rs.good()) { 75 | auto p = parse(rs); 76 | ret.push_back(p); 77 | } 78 | 79 | return ret; 80 | } 81 | 82 | template 83 | inline std::list> Parser::parseStreamWithIgnorance(InputStreamType& rs) { 84 | std::list> ret; 85 | 86 | while (rs.good()) { 87 | auto p = parse(rs); 88 | if (p) { 89 | ret.push_back(p); 90 | } 91 | } 92 | 93 | return ret; 94 | } 95 | 96 | template 97 | inline void Parser::reset() { 98 | m_lastApplicationSuccessful = false; 99 | } 100 | 101 | template 102 | inline std::string Parser::lastError() const { 103 | if (m_lastException) { 104 | return m_lastException->what(); 105 | } else { 106 | return ""; 107 | } 108 | } 109 | 110 | template 111 | inline void Parser::error(const std::string& message) const { 112 | throw ParserException(message); 113 | } -------------------------------------------------------------------------------- /astir/Resources/Production.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "Location.h" 6 | 7 | class ILocalizable { 8 | public: 9 | virtual const std::shared_ptr& location() const = 0; 10 | std::string locationString() const { return location()->toString(); } 11 | 12 | protected: 13 | ILocalizable() = default; 14 | virtual ~ILocalizable() = default; 15 | }; 16 | 17 | class Production : public ILocalizable { 18 | public: 19 | Production(const std::shared_ptr& occurenceLocation) 20 | : m_location(occurenceLocation) { } 21 | Production(const ILocalizable& underlyingEntity) 22 | : m_location(underlyingEntity.location()) { } 23 | 24 | virtual std::string stringForError() const = 0; 25 | 26 | const std::shared_ptr& location() const override { return m_location; } 27 | private: 28 | std::shared_ptr m_location; 29 | }; -------------------------------------------------------------------------------- /astir/Resources/RawStream.cpp: -------------------------------------------------------------------------------- 1 | #include "ProductionStream.h" 2 | #include "RawStream.h" 3 | 4 | bool RawStream::streamGet(std::shared_ptr& c) { 5 | char payload; 6 | bool ret = bool{ m_underlyingStream.get(payload) }; 7 | 8 | if (ret) { 9 | m_currentStreamLocation->note(payload); 10 | c = std::make_shared(payload, m_currentStreamLocation->clone()); 11 | } 12 | 13 | return ret; 14 | } 15 | 16 | bool RawStream::streamGood() const { 17 | return m_underlyingStream.good(); 18 | } 19 | 20 | TextFileStream::TextFileStream(const std::string& fileName) 21 | : m_fileStream(fileName), RawStream(m_fileStream, std::make_shared(fileName, 1, 0)) { } 22 | -------------------------------------------------------------------------------- /astir/Resources/RawStream.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "ProductionStream.h" 6 | #include "Terminal.h" 7 | 8 | class RawTerminal : public Terminal { 9 | public: 10 | RawTerminal() 11 | : Terminal('\0', nullptr) { } 12 | //^ something distinguishably invalid (is it tho?), should never be referred to in practice if not initialized in a valid fashion 13 | 14 | RawTerminal(char c, const std::shared_ptr& occurenceLocation) 15 | : Terminal(c, std::string({ c }), occurenceLocation) { } 16 | }; 17 | 18 | class RawStream : public ProductionStream { 19 | public: 20 | RawStream(std::istream& underlyingStream, const std::shared_ptr& startingStreamLocation) 21 | : m_underlyingStream(underlyingStream), m_currentStreamLocation(startingStreamLocation), ProductionStream(startingStreamLocation) { } 22 | 23 | protected: 24 | bool streamGet(std::shared_ptr& c) override; 25 | bool streamGood() const override; 26 | 27 | private: 28 | std::istream& m_underlyingStream; 29 | 30 | std::shared_ptr m_currentStreamLocation; 31 | }; 32 | 33 | #include 34 | 35 | class TextFileStream : public RawStream { 36 | public: 37 | TextFileStream(const std::string& fileName); 38 | 39 | private: 40 | std::ifstream m_fileStream; 41 | }; -------------------------------------------------------------------------------- /astir/Resources/SpecimenFiniteAutomaton.sh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // stream 11 | #include "${{AppropriateStreamHeader}}" 12 | 13 | // general dependencies 14 | #include "Terminal.h" 15 | #include "Machine.h" 16 | 17 | // particular dependencies 18 | ${{DependencyHeaderIncludes}} 19 | 20 | namespace ${{MachineName}} { 21 | enum class ${{MachineName}}TerminalType { 22 | ${{OutputTerminalTypesEnumerated}} 23 | EOS = 0 24 | }; 25 | 26 | typedef ${{MachineName}}TerminalType OutputTerminalType; 27 | 28 | class ${{MachineName}}Terminal : public Terminal { 29 | public: 30 | ${{MachineName}}Terminal() 31 | : ${{MachineName}}Terminal(OutputTerminalType::EOS, nullptr) { } 32 | // something distinguishably invalid, should never be referred to in practice if not initialized in a valid fashion 33 | 34 | protected: 35 | ${{MachineName}}Terminal(OutputTerminalType type, const std::shared_ptr& occurenceLocation) 36 | : Terminal(type, occurenceLocation) { } 37 | ${{MachineName}}Terminal(OutputTerminalType type, const std::string& str, const std::shared_ptr& occurenceLocation) 38 | : Terminal(type, str, occurenceLocation) { } 39 | }; 40 | 41 | typedef ${{MachineName}}Terminal OutputTerminal; 42 | typedef std::shared_ptr OutputTerminalPtr; 43 | typedef ${{OutputType}} OutputProduction; 44 | typedef std::shared_ptr OutputProductionPtr; 45 | 46 | class EOS : public OutputTerminal { 47 | public: 48 | EOS(const std::shared_ptr& location) 49 | : OutputTerminal(OutputTerminalType::EOS, location) { } 50 | }; 51 | 52 | ${{TypeForwardDeclarations}} 53 | ${{TypeDeclarations}} 54 | using State = size_t; 55 | 56 | typedef ${{InputStreamTypeName}} InputStream; 57 | typedef ${{InputTerminalTypeName}} InputTerminal; 58 | typedef std::shared_ptr<${{InputTerminalTypeName}}> InputTerminalPtr; 59 | 60 | class ${{MachineName}}; 61 | typedef void (${{MachineName}}::* ActionMethodPointer)(size_t, const std::deque&, const std::shared_ptr&); 62 | class ${{MachineName}} : public Machine { 63 | public: 64 | ${{MachineName}}() 65 | : m_currentState(0) { } 66 | 67 | std::shared_ptr apply(InputStream& rs) override; 68 | 69 | bool lastApplicationSuccessful() const override { return m_stateFinality[m_currentState]; } 70 | void reset() override; 71 | 72 | private: 73 | // state-switching internals 74 | State m_currentState; 75 | static std::vector m_stateMap[${{StateCount}}][${{TransitionSymbolCount}}]; 76 | static bool m_stateFinality[${{StateCount}}]; 77 | static std::vector&, const std::shared_ptr&)> m_transitionActions[${{StateCount}}][${{TransitionSymbolCount}}]; 78 | static void (${{MachineName}}::* m_stateActions[${{StateCount}}])(size_t, const std::deque&, const std::shared_ptr&); 79 | 80 | // raw-capture internals 81 | std::stack m_captureStack; 82 | 83 | // helper methods 84 | ${{CombineRawDeclaration}} 85 | 86 | // dependency machines 87 | ${{DependencyMachineFields}} 88 | // action contexts 89 | std::shared_ptr m_token; 90 | ${{ActionContextsDeclarations}} 91 | // actions 92 | ${{ActionDeclarations}} 93 | }; 94 | }; 95 | -------------------------------------------------------------------------------- /astir/Resources/SpecimenLLkParser.scpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "${{MachineName}}.h" 6 | 7 | namespace ${{MachineName}} { 8 | ${{ParsingDefinitions}} 9 | // helper methods 10 | ${{CombineRawDefinition}} 11 | } -------------------------------------------------------------------------------- /astir/Resources/SpecimenLLkParser.sh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // stream 11 | #include "${{AppropriateStreamHeader}}" 12 | 13 | // general dependencies 14 | #include "Terminal.h" 15 | #include "Parser.h" 16 | 17 | // particular dependencies 18 | ${{DependencyHeaderIncludes}} 19 | 20 | namespace ${{MachineName}} { 21 | enum class ${{MachineName}}TerminalType { 22 | ${{OutputTerminalTypesEnumerated}} 23 | EOS = 0 24 | }; 25 | 26 | typedef ${{MachineName}}TerminalType OutputTerminalType; 27 | 28 | class ${{MachineName}}Terminal : public Terminal { 29 | public: 30 | ${{MachineName}}Terminal() 31 | : ${{MachineName}}Terminal(OutputTerminalType::EOS, nullptr) { } 32 | // something distinguishably invalid, should never be referred to in practice if not initialized in a valid fashion 33 | 34 | protected: 35 | ${{MachineName}}Terminal(OutputTerminalType type, const std::shared_ptr& occurenceLocation) 36 | : Terminal(type, occurenceLocation) { } 37 | ${{MachineName}}Terminal(OutputTerminalType type, const std::string& str, const std::shared_ptr& occurenceLocation) 38 | : Terminal(type, str, occurenceLocation) { } 39 | }; 40 | 41 | typedef ${{MachineName}}Terminal OutputTerminal; 42 | typedef std::shared_ptr OutputTerminalPtr; 43 | typedef ${{OutputType}} OutputProduction; 44 | typedef std::shared_ptr OutputProductionPtr; 45 | 46 | class EOS : public OutputTerminal { 47 | public: 48 | EOS(const std::shared_ptr& location) 49 | : OutputTerminal(OutputTerminalType::EOS, location) { } 50 | }; 51 | 52 | ${{TypeForwardDeclarations}} 53 | ${{TypeDeclarations}} 54 | typedef ${{InputStreamTypeName}} InputStream; 55 | typedef ${{InputTypeName}} InputType; 56 | typedef std::shared_ptr<${{InputTypeName}}> InputTypePtr; 57 | 58 | class ${{MachineName}} : public Parser { 59 | public: 60 | ${{MachineName}}() = default; 61 | 62 | protected: 63 | // helper methods 64 | ${{CombineRawDeclaration}} 65 | 66 | // dependency machines 67 | ${{DependencyMachineFields}} 68 | // parsing declarations 69 | ${{ParsingDeclarations}} 70 | }; 71 | }; 72 | -------------------------------------------------------------------------------- /astir/Resources/Terminal.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "Production.h" 6 | 7 | template 8 | class Terminal : public Production { 9 | public: 10 | TerminalTypeType type; 11 | std::string raw; 12 | 13 | std::string stringForError() const override { return '\'' + raw + '\''; }; 14 | protected: 15 | Terminal(TerminalTypeType type, const std::shared_ptr& occurenceLocation) 16 | : type(type), raw(), Production(occurenceLocation) { } 17 | Terminal(TerminalTypeType type, const std::string& raw, const std::shared_ptr& occurenceLocation) 18 | : type(type), raw(raw), Production(occurenceLocation) { } 19 | }; -------------------------------------------------------------------------------- /astir/SemanticAnalysisException.cpp: -------------------------------------------------------------------------------- 1 | #include "SemanticAnalysisException.h" 2 | 3 | SemanticAnalysisException::SemanticAnalysisException(const std::string& message, const IFileLocalizable& somethingLocalizableToPinpointLocationBy) 4 | : Exception(message + " -- at " + somethingLocalizableToPinpointLocationBy.locationString()) { } 5 | -------------------------------------------------------------------------------- /astir/SemanticAnalysisException.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "Exception.h" 6 | #include "IFileLocalizable.h" 7 | 8 | class SemanticAnalysisException : public Exception { 9 | public: 10 | SemanticAnalysisException(const std::string& message) 11 | : Exception(message) { } 12 | SemanticAnalysisException(const std::string& message, const IFileLocalizable& somethingLocalizableToPinpointLocationBy); 13 | }; -------------------------------------------------------------------------------- /astir/SymbolGroup.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "CharType.h" 8 | 9 | using SymbolIndex = size_t; 10 | 11 | struct SymbolGroup { 12 | public: 13 | virtual ~SymbolGroup() = default; 14 | 15 | virtual bool equals(const SymbolGroup* rhs) const = 0; 16 | virtual bool disjoint(const SymbolGroup* rhs) const = 0; 17 | virtual std::list, bool>> disjoinFrom(const std::shared_ptr& rhs) = 0; 18 | virtual std::string toString() const = 0; 19 | 20 | virtual std::shared_ptr> retrieveSymbolIndices() const = 0; 21 | protected: 22 | SymbolGroup() = default; 23 | }; 24 | 25 | class SymbolGroupList : public std::list> { 26 | public: 27 | SymbolGroupList() = default; 28 | SymbolGroupList(std::initializer_list> il) 29 | : std::list>(il) { } 30 | SymbolGroupList(const SymbolGroupList::const_iterator& begin, const SymbolGroupList::const_iterator& end) 31 | : std::list>(begin, end) { } 32 | 33 | bool contains(const std::shared_ptr& symbolGroupPtr) const; 34 | bool containsEmpty() const; 35 | SymbolGroupList allButEmpty() const; 36 | void removeEmpty(); 37 | 38 | std::string asSequenceString() const; 39 | 40 | SymbolGroupList& operator+=(const SymbolGroupList& rhs); 41 | }; 42 | 43 | struct EmptySymbolGroup : public SymbolGroup { 44 | public: 45 | EmptySymbolGroup() = default; 46 | 47 | bool equals(const SymbolGroup* rhs) const override; 48 | bool disjoint(const SymbolGroup* rhs) const override; 49 | std::list, bool>> disjoinFrom(const std::shared_ptr& rhs) override; 50 | std::string toString() const override; 51 | 52 | std::shared_ptr> retrieveSymbolIndices() const override; 53 | protected: 54 | }; 55 | 56 | struct ByteSymbolGroup : public SymbolGroup { 57 | ByteSymbolGroup() 58 | : ByteSymbolGroup(0, 0) { } 59 | ByteSymbolGroup(CharType rangeStart, CharType rangeEnd) 60 | : SymbolGroup(), rangeStart(rangeStart), rangeEnd(rangeEnd), m_symbolIndicesFlyweight(std::make_shared>()) { } 61 | ByteSymbolGroup(const ByteSymbolGroup& lsg) 62 | : ByteSymbolGroup(lsg.rangeStart, lsg.rangeEnd) { } 63 | 64 | bool equals(const SymbolGroup* rhs) const override; 65 | bool disjoint(const SymbolGroup* rhs) const override; 66 | std::list, bool>> disjoinFrom(const std::shared_ptr& rhs) override; 67 | std::string toString() const override; 68 | 69 | CharType rangeStart; 70 | CharType rangeEnd; 71 | 72 | std::shared_ptr> retrieveSymbolIndices() const override; 73 | private: 74 | std::shared_ptr> m_symbolIndicesFlyweight; 75 | }; 76 | 77 | struct LiteralSymbolGroup : public SymbolGroup { 78 | LiteralSymbolGroup() = default; 79 | LiteralSymbolGroup(const std::string& literal) 80 | : SymbolGroup(), literal(literal) { } 81 | 82 | bool equals(const SymbolGroup* rhs) const override; 83 | bool disjoint(const SymbolGroup* rhs) const override; 84 | std::list, bool>> disjoinFrom(const std::shared_ptr& rhs) override; 85 | std::string toString() const override; 86 | 87 | std::string literal; 88 | 89 | std::shared_ptr> retrieveSymbolIndices() const override; 90 | }; 91 | 92 | struct TypeFormingStatement; 93 | struct MachineDefinition; 94 | struct StatementSymbolGroup : public SymbolGroup { 95 | StatementSymbolGroup() = default; 96 | StatementSymbolGroup(const TypeFormingStatement* statement, const MachineDefinition* statementMachine) 97 | : SymbolGroup(), statement(statement), statementMachine(statementMachine), m_symbolIndicesFlyweight(std::make_shared>()) { } 98 | 99 | bool equals(const SymbolGroup* rhs) const override; 100 | bool disjoint(const SymbolGroup* rhs) const override; 101 | std::list, bool>> disjoinFrom(const std::shared_ptr& rhs) override; 102 | std::string toString() const override; 103 | 104 | const TypeFormingStatement* statement; 105 | const MachineDefinition* statementMachine; 106 | 107 | std::shared_ptr> retrieveSymbolIndices() const override; 108 | 109 | private: 110 | std::shared_ptr> m_symbolIndicesFlyweight; 111 | }; 112 | -------------------------------------------------------------------------------- /astir/SyntacticAnalysisException.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Exception.h" 4 | #include "Token.h" 5 | 6 | class SyntacticAnalysisException : public Exception { 7 | public: 8 | SyntacticAnalysisException(const std::string& message) 9 | : Exception(message) { } 10 | SyntacticAnalysisException(const std::string& messagePrefix, const Token& currentToken) 11 | : SyntacticAnalysisException(messagePrefix + "\nCurrent Token: " + currentToken.toString()) { } 12 | SyntacticAnalysisException(const std::string& messagePrefix, const Token& currentToken, const Token& initialToken) 13 | : SyntacticAnalysisException(messagePrefix + "\nCurrent Token: " + currentToken.toString() + "\nInitial Token: " + initialToken.toString()) { } 14 | }; 15 | 16 | class UnexpectedTokenException : public SyntacticAnalysisException { 17 | public: 18 | UnexpectedTokenException(const std::string& message) 19 | : SyntacticAnalysisException(message) { } 20 | 21 | UnexpectedTokenException(const Token& tokenGiven, const std::string& expected) 22 | : SyntacticAnalysisException("Unexpected token " + tokenGiven.toHumanString() + " encountered at " + tokenGiven.locationString() + ", expected " + expected) { } 23 | 24 | UnexpectedTokenException(const Token& tokenGiven, const std::string& expected, const std::string& productionDescription) 25 | : SyntacticAnalysisException("Unexpected token " + tokenGiven.toHumanString() + " encountered at " + tokenGiven.locationString() + " in production " + productionDescription + ", expected " + expected) { } 26 | 27 | UnexpectedTokenException(const Token& tokenGiven, const std::string& expected, const std::string& productionDescription, const Token& initialToken) 28 | : SyntacticAnalysisException("Unexpected token " + tokenGiven.toHumanString() + " encountered at " + tokenGiven.locationString() + " in production " + productionDescription + ", expected " + expected + ". The current production started at " + initialToken.locationString() + " with " + initialToken.toHumanString()) { } 29 | }; -------------------------------------------------------------------------------- /astir/SyntacticAnalyzer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Token.h" 4 | #include 5 | #include 6 | 7 | #include "SyntacticTree.h" 8 | #include "MachineDefinition.h" 9 | #include "FiniteAutomatonDefinition.h" 10 | #include "LLkParserDefinition.h" 11 | #include "MachineStatement.h" 12 | #include "SyntacticAnalysisException.h" 13 | 14 | class SyntacticAnalyzer { 15 | public: 16 | SyntacticAnalyzer() = default; 17 | 18 | std::unique_ptr process(const std::list& tokens) const; 19 | private: 20 | std::unique_ptr parseUsesStatement(std::list::const_iterator& it) const; 21 | 22 | std::unique_ptr parseMachineDefinition(std::list::const_iterator& it) const; 23 | bool tryParseMachineFlag(std::list::const_iterator& it, std::map& attributes) const; 24 | std::unique_ptr parseMachineType(std::list::const_iterator & it) const; 25 | 26 | std::unique_ptr parseMachineStatement(std::list::const_iterator& it) const; 27 | std::unique_ptr parseCategoryStatement(std::list::const_iterator& it, Rootness rootness) const; 28 | void parseInAttributedStatement(std::list::const_iterator& productionStartIt, std::list::const_iterator& it, AttributedStatement& statement, const std::string& attributedStatementType) const; 29 | void parseInRuleStatement(std::list::const_iterator& productionStartIt, std::list::const_iterator& it, RuleStatement& statement, const std::string& attributedStatementType) const; 30 | std::unique_ptr parseProductionStatement(std::list::const_iterator& it, Rootness rootness, Terminality terminality) const; 31 | std::unique_ptr parsePatternStatement(std::list::const_iterator& it) const; 32 | std::unique_ptr parseRegexStatement(std::list::const_iterator& it) const; 33 | 34 | std::unique_ptr parseMemberDeclaration(std::list::const_iterator& it) const; 35 | std::unique_ptr parseRootRegex(std::list::const_iterator& it) const; 36 | std::unique_ptr parseRepetitiveRegex(std::list::const_iterator& it) const; 37 | RegexActionType parseRegexAction(std::list::const_iterator& it) const; 38 | 39 | std::unique_ptr parseAtomicRegex(std::list::const_iterator& it) const; 40 | std::unique_ptr parsePrimitiveRegex(std::list::const_iterator& it) const; 41 | bool tryParseAnyRegex(std::list::const_iterator& it, std::unique_ptr& anyRegexPtr) const; 42 | std::unique_ptr parseConjunctiveRegex(std::list::const_iterator& it) const; 43 | std::unique_ptr parseDisjunctiveRegex(std::list::const_iterator& it) const; 44 | }; 45 | -------------------------------------------------------------------------------- /astir/SyntacticTree.cpp: -------------------------------------------------------------------------------- 1 | #include "SyntacticTree.h" 2 | 3 | #include "GenerationVisitor.h" 4 | #include "NFABuilder.h" 5 | #include "SemanticAnalysisException.h" 6 | 7 | #include 8 | #include 9 | 10 | void SyntacticTree::initialize() { 11 | if (initialized()) { 12 | return; 13 | } 14 | 15 | ISemanticEntity::initialize(); 16 | 17 | // Future TODO: parse and load other files here 18 | 19 | // check for recursion 20 | std::list namesEncountered; 21 | for (const auto& definitionPair : machineDefinitions) { 22 | completeMachineHierarchy(namesEncountered, definitionPair.second); 23 | } 24 | 25 | // and, finally, internally initialize all machines 26 | for (const auto& machinePair : machineDefinitions) { 27 | machinePair.second->initialize(); 28 | } 29 | } 30 | 31 | void SyntacticTree::completeMachineHierarchy(std::list& namesEncountered, const std::shared_ptr& machineDefinitionToComplete) const { 32 | const std::string& nameConsidered = machineDefinitionToComplete->name; 33 | bool collision = std::find(namesEncountered.cbegin(), namesEncountered.cend(), nameConsidered) != namesEncountered.cend(); 34 | namesEncountered.push_back(nameConsidered); 35 | 36 | if (collision) { 37 | std::string hierarchyPath = namesEncountered.front(); 38 | namesEncountered.pop_front(); 39 | for (const auto& nameEncountered : namesEncountered) { 40 | hierarchyPath += "-" + nameEncountered; 41 | } 42 | throw SemanticAnalysisException("Definition recursion found in the mixed follow/extends hierarchy path " + hierarchyPath, *this); 43 | } 44 | 45 | const std::string& onName = machineDefinitionToComplete->on.first; 46 | if (onName.empty()) { 47 | machineDefinitionToComplete->on.second = nullptr; 48 | } else { 49 | auto onIt = machineDefinitions.find(onName); 50 | if (onIt == machineDefinitions.end()) { 51 | throw SemanticAnalysisException("Unknown machine name '" + onName + "' referenced as 'on' dependency by machine '" + machineDefinitionToComplete->name + "', declared at " + machineDefinitionToComplete->locationString()); 52 | } 53 | 54 | machineDefinitionToComplete->on.second = onIt->second; 55 | completeMachineHierarchy(namesEncountered, onIt->second); 56 | } 57 | 58 | for (auto& usesPair : machineDefinitionToComplete->uses) { 59 | auto usesIt = machineDefinitions.find(usesPair.first); 60 | if (usesIt == machineDefinitions.end()) { 61 | throw SemanticAnalysisException("Unknown machine name '" + onName + "' referenced as 'uses' dependency by machine '" + machineDefinitionToComplete->name + "', declared at " + machineDefinitionToComplete->locationString()); 62 | } 63 | 64 | usesPair.second = usesIt->second; 65 | completeMachineHierarchy(namesEncountered, usesIt->second); 66 | } 67 | 68 | namesEncountered.pop_back(); 69 | } 70 | 71 | void SyntacticTree::accept(GenerationVisitor* visitor) const { 72 | visitor->visit(this); 73 | } 74 | -------------------------------------------------------------------------------- /astir/SyntacticTree.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "ISyntacticEntity.h" 9 | #include "Regex.h" 10 | #include "Field.h" 11 | #include "NFA.h" 12 | #include "ILLkFirstable.h" 13 | 14 | /* 15 | As a general rule, avoid creating full insertive constructors for objects, since the container ownership of unique_ptrs then often gets quite tricky. 16 | It's usually much better to create a 'minimal' initialization in in the default constructor and have everything else done from outside by the relevant parsing procedure. Hence also the choice of struct over class. 17 | */ 18 | 19 | struct UsesStatement; 20 | struct MachineDefinition; 21 | struct SyntacticTree : public ISyntacticEntity, public ISemanticEntity, public IGenerationVisitable { 22 | std::list> usesStatements; 23 | std::map> machineDefinitions; 24 | 25 | // semantic bit 26 | void initialize() override; 27 | void completeMachineHierarchy(std::list& namesEncountered, const std::shared_ptr& machineDefinitionToComplete) const; 28 | 29 | // generation bit 30 | void accept(GenerationVisitor* visitor) const override; 31 | }; 32 | 33 | struct UsesStatement : public ISyntacticEntity { 34 | std::string filePath; 35 | }; 36 | -------------------------------------------------------------------------------- /astir/TestingSwitch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #undef TESTING -------------------------------------------------------------------------------- /astir/Tests/Hello Binary/BinaryRecognizer.astir: -------------------------------------------------------------------------------- 1 | // BinaryRecognizer - an example from the getting started part of the documentation 2 | 3 | finite automaton BinaryRecognizer { 4 | language = ['0' '1']+ ([' ' '\t' '\n']+ ['0' '1']+)*; 5 | } -------------------------------------------------------------------------------- /astir/Tests/Hello Binary/BinaryRecognizerMain.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Output/BinaryRecognizer.h" 4 | 5 | int main() { 6 | TextFileStream tfs("input.txt"); 7 | 8 | BinaryRecognizer::BinaryRecognizer tokenizer; 9 | tokenizer.apply(tfs); 10 | 11 | if(tokenizer.lastApplicationSuccessful()) { 12 | std::cout << "The input in 'input.txt' was recognized; it indeed belongs to our language" << std::endl; 13 | return 0; 14 | } else { 15 | std::cout << "The input in 'input.txt' was not recognized; it does not belong to our language" << std::endl; 16 | return 1; 17 | } 18 | } -------------------------------------------------------------------------------- /astir/Tests/Hello Binary/BinaryRecognizerRefactored.astir: -------------------------------------------------------------------------------- 1 | // BinaryRecognizer (refactored) - the example from the getting started part of the documentation 2 | 3 | finite automaton BinaryRecognizer { 4 | regex binaryDigit = ['0' '1']; 5 | regex whiteSpaceCharacter = [' ' '\t' '\n']; 6 | 7 | production Language = binaryDigit+ (whiteSpaceCharacter+ binaryDigit+)*; 8 | } -------------------------------------------------------------------------------- /astir/Tests/Hello Binary/BinaryTokenizer.astir: -------------------------------------------------------------------------------- 1 | // BinaryTokenizer - an example from the getting started part of the documentation 2 | 3 | finite automaton BinaryTokenizer { 4 | regex binaryDigit = ['0' '1']; 5 | regex whiteSpaceCharacter = [' ' '\t' '\n']; 6 | 7 | production BinaryString = binaryDigit+; 8 | production WhiteSpace = whiteSpaceCharacter+; 9 | } -------------------------------------------------------------------------------- /astir/Tests/Hello Binary/BinaryTokenizerMain.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Output/BinaryTokenizer.h" 4 | 5 | int main() { 6 | TextFileStream tfs("input.txt"); 7 | 8 | BinaryTokenizer::BinaryTokenizer tokenizer; 9 | auto listOfTokens = tokenizer.processStream(tfs); 10 | 11 | for(const auto& tokenPtr : listOfTokens) { 12 | if(tokenPtr->type == BinaryTokenizerTerminalType::BinaryString) { 13 | std::cout << "[" << tokenPtr->locationString() << "] BinaryString: " << tokenPtr->raw << std::endl; 14 | } else { 15 | std::cout << "[" << tokenPtr->locationString() << "] WhiteSpace of length " << tokenPtr->raw.length() << std::endl; 16 | } 17 | } 18 | 19 | return 0; 20 | } -------------------------------------------------------------------------------- /astir/Tests/Test01/Test01.astir: -------------------------------------------------------------------------------- 1 | // Test01 - basic lexing, parsing, and code generation testing 2 | // - example input: abbb 3 | // - correct behaviour: pattern p3 is recognized 4 | // - an example of incorrect behaviour (due to failing to branch properly): pattern p2 is recognized prematurely and the next recognition fails 5 | 6 | finite automaton Test01 { 7 | terminal production p1 = "a"; 8 | terminal production p2 = "a" "b" "b"; 9 | production p3 = "a"* "b"+; 10 | } 11 | 12 | 13 | /* 14 | // A simpler test case if the above doesn't work 15 | finite automaton Test01 { 16 | production p3 = "a" "b"+; 17 | } 18 | */ -------------------------------------------------------------------------------- /astir/Tests/Test01/Test01.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test01", "Test01.vcxproj", "{1B52D419-C64C-421D-85DA-7D6C8DB08D18}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {1B52D419-C64C-421D-85DA-7D6C8DB08D18}.Debug|x64.ActiveCfg = Debug|x64 17 | {1B52D419-C64C-421D-85DA-7D6C8DB08D18}.Debug|x64.Build.0 = Debug|x64 18 | {1B52D419-C64C-421D-85DA-7D6C8DB08D18}.Debug|x86.ActiveCfg = Debug|Win32 19 | {1B52D419-C64C-421D-85DA-7D6C8DB08D18}.Debug|x86.Build.0 = Debug|Win32 20 | {1B52D419-C64C-421D-85DA-7D6C8DB08D18}.Release|x64.ActiveCfg = Release|x64 21 | {1B52D419-C64C-421D-85DA-7D6C8DB08D18}.Release|x64.Build.0 = Release|x64 22 | {1B52D419-C64C-421D-85DA-7D6C8DB08D18}.Release|x86.ActiveCfg = Release|Win32 23 | {1B52D419-C64C-421D-85DA-7D6C8DB08D18}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {27FBD902-6A2D-4ADD-8323-32C1CDB62609} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test01/Test01.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | Header Files 26 | 27 | 28 | Header Files 29 | 30 | 31 | Header Files 32 | 33 | 34 | Header Files 35 | 36 | 37 | Header Files 38 | 39 | 40 | Header Files 41 | 42 | 43 | Header Files 44 | 45 | 46 | 47 | 48 | Source Files 49 | 50 | 51 | Source Files 52 | 53 | 54 | Source Files 55 | 56 | 57 | Source Files 58 | 59 | 60 | 61 | 62 | Resource Files 63 | 64 | 65 | 66 | 67 | Resource Files 68 | 69 | 70 | -------------------------------------------------------------------------------- /astir/Tests/Test01/Test01.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | $(ProjectDir) 5 | WindowsLocalDebugger 6 | 7 | -------------------------------------------------------------------------------- /astir/Tests/Test01/input.txt: -------------------------------------------------------------------------------- 1 | abbb -------------------------------------------------------------------------------- /astir/Tests/Test01/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Output/Test01.h" 2 | 3 | int main() { 4 | TextFileStream input("input.txt"); 5 | 6 | Test01::Test01 tokenizer; 7 | auto tokenized = tokenizer.processStream(input); 8 | 9 | return 0; 10 | } -------------------------------------------------------------------------------- /astir/Tests/Test02/Test02.astir: -------------------------------------------------------------------------------- 1 | // Test02 - basic lexing, parsing, and type context generation testing 2 | // - example input: aa 3 | // - correct behaviour: p1 is recognized with 4 | // - isShort: false 5 | // - isLong: true 6 | // - an example of incorrect behaviour: p3 is recognized, or p1 is recognized with 7 | // - (isShort, isLong) in { (false, false), (true, false), (true, true) } 8 | 9 | finite automaton Test02 { 10 | terminal production p1 { 11 | flag isShort; 12 | flag isLong; 13 | } = "a"@flag:isShort@unflag:isLong 14 | | "a" "a"@unflag:isShort@flag:isLong 15 | ; 16 | 17 | terminal production p2 = "a" "b" "b"; 18 | terminal production p3 = "a"* "b"+; 19 | } -------------------------------------------------------------------------------- /astir/Tests/Test02/Test02.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test02", "Test02.vcxproj", "{1FCF15C1-DC5A-40BF-97FF-86B51A3156E7}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {1FCF15C1-DC5A-40BF-97FF-86B51A3156E7}.Debug|x64.ActiveCfg = Debug|x64 17 | {1FCF15C1-DC5A-40BF-97FF-86B51A3156E7}.Debug|x64.Build.0 = Debug|x64 18 | {1FCF15C1-DC5A-40BF-97FF-86B51A3156E7}.Debug|x86.ActiveCfg = Debug|Win32 19 | {1FCF15C1-DC5A-40BF-97FF-86B51A3156E7}.Debug|x86.Build.0 = Debug|Win32 20 | {1FCF15C1-DC5A-40BF-97FF-86B51A3156E7}.Release|x64.ActiveCfg = Release|x64 21 | {1FCF15C1-DC5A-40BF-97FF-86B51A3156E7}.Release|x64.Build.0 = Release|x64 22 | {1FCF15C1-DC5A-40BF-97FF-86B51A3156E7}.Release|x86.ActiveCfg = Release|Win32 23 | {1FCF15C1-DC5A-40BF-97FF-86B51A3156E7}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {5086A55C-4E7B-4494-8591-D92D3DE01EC9} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test02/Test02.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | 42 | 43 | Header Files 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | -------------------------------------------------------------------------------- /astir/Tests/Test02/Test02.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /astir/Tests/Test02/input.txt: -------------------------------------------------------------------------------- 1 | aa -------------------------------------------------------------------------------- /astir/Tests/Test02/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Output/Test02.h" 2 | 3 | int main() { 4 | TextFileStream tfs("input.txt"); 5 | 6 | Test02::Test02* tokenizer = new Test02::Test02(); 7 | auto tokens = tokenizer->processStream(tfs); 8 | delete tokenizer; 9 | 10 | return 0; 11 | } -------------------------------------------------------------------------------- /astir/Tests/Test03/Test03.astir: -------------------------------------------------------------------------------- 1 | // Test02 - testing for correctness of advanced lexing 2 | // - focus on 3 | // - escape sequences 4 | // - the difference between 'string' and "string" 5 | // - presence of line comments, not affecting the internal location indicators (i.e. as if we were counting comments as `whitespace`) 6 | // - presence of multiline comments, not affecting the internal location indicators 7 | 8 | finite automaton Test03 { 9 | terminal production p1 { // end of line comment 10 | flag/* a completely unnecessarycomment * 1 * / */ isShort; 11 | flag isLong; 12 | } = "a"@flag:isShort@unflag:isLong/* multiline 13 | comment */ 14 | | "aa"@unflag:isShort@flag:isLong 15 | ; 16 | 17 | terminal production p2 = "a\142\x62" "" 'h' "hlelujah"; // "a" "b" "b" 18 | terminal production p3 = "\141"* "b"+; // "a"* "b"+ 19 | } -------------------------------------------------------------------------------- /astir/Tests/Test03/Test03.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test03", "Test03.vcxproj", "{2E48BE9B-5A12-4E55-A854-6FC42D72F9D6}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {2E48BE9B-5A12-4E55-A854-6FC42D72F9D6}.Debug|x64.ActiveCfg = Debug|x64 17 | {2E48BE9B-5A12-4E55-A854-6FC42D72F9D6}.Debug|x64.Build.0 = Debug|x64 18 | {2E48BE9B-5A12-4E55-A854-6FC42D72F9D6}.Debug|x86.ActiveCfg = Debug|Win32 19 | {2E48BE9B-5A12-4E55-A854-6FC42D72F9D6}.Debug|x86.Build.0 = Debug|Win32 20 | {2E48BE9B-5A12-4E55-A854-6FC42D72F9D6}.Release|x64.ActiveCfg = Release|x64 21 | {2E48BE9B-5A12-4E55-A854-6FC42D72F9D6}.Release|x64.Build.0 = Release|x64 22 | {2E48BE9B-5A12-4E55-A854-6FC42D72F9D6}.Release|x86.ActiveCfg = Release|Win32 23 | {2E48BE9B-5A12-4E55-A854-6FC42D72F9D6}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {5DF0840B-65F3-4586-9992-9F2FA220777D} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test03/Test03.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | 42 | 43 | Header Files 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | Header Files 68 | 69 | 70 | -------------------------------------------------------------------------------- /astir/Tests/Test03/Test03.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /astir/Tests/Test03/input.txt: -------------------------------------------------------------------------------- 1 | aa -------------------------------------------------------------------------------- /astir/Tests/Test03/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Output/Test03.h" 2 | 3 | int main() { 4 | TextFileStream tfs("input.txt"); 5 | 6 | Test03::Test03* tokenizer = new Test03::Test03(); 7 | auto tokens = tokenizer->processStream(tfs); 8 | delete tokenizer; 9 | 10 | return 0; 11 | } -------------------------------------------------------------------------------- /astir/Tests/Test04/Test04.astir: -------------------------------------------------------------------------------- 1 | // Test04 - alternation context mixing testing 2 | // - example input: abc 3 | // - correct behaviour: T is recognized with 4 | // - payloadA: b 5 | // - payloadB: a 6 | // - payloadC: c 7 | // - overwriting behaviour (incorrect, an example): T is recognized with 8 | // - payloadA: b 9 | // - payloadB: b 10 | // - payloadC: c 11 | 12 | finite automaton Test04 { 13 | terminal production T { 14 | raw payloadA; 15 | raw payloadB; 16 | raw payloadC; 17 | } = 18 | .@capture:payloadA .@capture:payloadB 19 | | .@capture:payloadB .@capture:payloadA .@capture:payloadC 20 | ; 21 | } -------------------------------------------------------------------------------- /astir/Tests/Test04/Test04.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test04", "Test04.vcxproj", "{7DCB8CF2-BAC7-4161-BFFF-32E7674E36DC}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {7DCB8CF2-BAC7-4161-BFFF-32E7674E36DC}.Debug|x64.ActiveCfg = Debug|x64 17 | {7DCB8CF2-BAC7-4161-BFFF-32E7674E36DC}.Debug|x64.Build.0 = Debug|x64 18 | {7DCB8CF2-BAC7-4161-BFFF-32E7674E36DC}.Debug|x86.ActiveCfg = Debug|Win32 19 | {7DCB8CF2-BAC7-4161-BFFF-32E7674E36DC}.Debug|x86.Build.0 = Debug|Win32 20 | {7DCB8CF2-BAC7-4161-BFFF-32E7674E36DC}.Release|x64.ActiveCfg = Release|x64 21 | {7DCB8CF2-BAC7-4161-BFFF-32E7674E36DC}.Release|x64.Build.0 = Release|x64 22 | {7DCB8CF2-BAC7-4161-BFFF-32E7674E36DC}.Release|x86.ActiveCfg = Release|Win32 23 | {7DCB8CF2-BAC7-4161-BFFF-32E7674E36DC}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {53015C7C-F8AE-40BF-8FBB-945F733728CF} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test04/Test04.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | 42 | 43 | Header Files 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | Header Files 68 | 69 | 70 | -------------------------------------------------------------------------------- /astir/Tests/Test04/Test04.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /astir/Tests/Test04/input.txt: -------------------------------------------------------------------------------- 1 | 012 -------------------------------------------------------------------------------- /astir/Tests/Test04/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Output/Test04.h" 4 | 5 | int main() { 6 | TextFileStream tfs("input.txt"); 7 | 8 | Test04::Test04 tokenizer; 9 | auto token = tokenizer.apply(tfs); 10 | 11 | return 0; 12 | } -------------------------------------------------------------------------------- /astir/Tests/Test05/Test05.astir: -------------------------------------------------------------------------------- 1 | // Test05 - repetition context mixing testing 2 | // - example input: abcd 3 | // - correct behaviour: T is recognized with 4 | // - payloadA: abc 5 | // - payloadB: d 6 | // - overwriting behaviour (incorrect, an example): T is recognized with 7 | // - payloadA: abcd 8 | // - payloadB: d 9 | 10 | finite automaton Test05 { 11 | terminal production T { 12 | raw payloadA; 13 | raw payloadB; 14 | } = 15 | (.@append:payloadA)* .@capture:payloadB 16 | ; 17 | } -------------------------------------------------------------------------------- /astir/Tests/Test05/Test05.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test05", "Test05.vcxproj", "{D444D839-1F3E-4231-9DD5-B39BA96D9C46}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {D444D839-1F3E-4231-9DD5-B39BA96D9C46}.Debug|x64.ActiveCfg = Debug|x64 17 | {D444D839-1F3E-4231-9DD5-B39BA96D9C46}.Debug|x64.Build.0 = Debug|x64 18 | {D444D839-1F3E-4231-9DD5-B39BA96D9C46}.Debug|x86.ActiveCfg = Debug|Win32 19 | {D444D839-1F3E-4231-9DD5-B39BA96D9C46}.Debug|x86.Build.0 = Debug|Win32 20 | {D444D839-1F3E-4231-9DD5-B39BA96D9C46}.Release|x64.ActiveCfg = Release|x64 21 | {D444D839-1F3E-4231-9DD5-B39BA96D9C46}.Release|x64.Build.0 = Release|x64 22 | {D444D839-1F3E-4231-9DD5-B39BA96D9C46}.Release|x86.ActiveCfg = Release|Win32 23 | {D444D839-1F3E-4231-9DD5-B39BA96D9C46}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {54B752B7-1834-45FD-A441-3BC67198EA26} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test05/Test05.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | 42 | 43 | Header Files 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | Header Files 68 | 69 | 70 | -------------------------------------------------------------------------------- /astir/Tests/Test05/Test05.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /astir/Tests/Test05/input.txt: -------------------------------------------------------------------------------- 1 | abcd -------------------------------------------------------------------------------- /astir/Tests/Test05/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Output/Test05.h" 4 | 5 | int main() { 6 | TextFileStream tfs("input.txt"); 7 | 8 | Test05::Test05 tokenizer; 9 | auto token = tokenizer.apply(tfs); 10 | 11 | return 0; 12 | } -------------------------------------------------------------------------------- /astir/Tests/Test06/Test06.astir: -------------------------------------------------------------------------------- 1 | // Test06 - finite automaton on terminals 2 | 3 | finite automaton PrimaryAutomaton { 4 | terminal production WORD = ['a'-'z''A'-'Z']+; 5 | terminal production WHITESPACE = ' '* ' '; 6 | terminal production PUNCTUATION = ['.' '!' '?']; 7 | } 8 | 9 | finite automaton SecondaryAutomaton on PrimaryAutomaton { 10 | nonterminal production Sentence { 11 | PUNCTUATION type; 12 | WORD list words; 13 | 14 | flag emptyFlag; 15 | raw rawSentence; 16 | } = 17 | (()@flag:emptyFlag (WORD@push:words WHITESPACE)* WORD@push:words PUNCTUATION@set:type)@capture:rawSentence 18 | // (WORD@push:words WHITESPACE)* WORD@push:words PUNCTUATION@set:type 19 | ; 20 | } -------------------------------------------------------------------------------- /astir/Tests/Test06/Test06.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test06", "Test06.vcxproj", "{38AD2D87-1442-48FF-A026-D90F3873121E}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {38AD2D87-1442-48FF-A026-D90F3873121E}.Debug|x64.ActiveCfg = Debug|x64 17 | {38AD2D87-1442-48FF-A026-D90F3873121E}.Debug|x64.Build.0 = Debug|x64 18 | {38AD2D87-1442-48FF-A026-D90F3873121E}.Debug|x86.ActiveCfg = Debug|Win32 19 | {38AD2D87-1442-48FF-A026-D90F3873121E}.Debug|x86.Build.0 = Debug|Win32 20 | {38AD2D87-1442-48FF-A026-D90F3873121E}.Release|x64.ActiveCfg = Release|x64 21 | {38AD2D87-1442-48FF-A026-D90F3873121E}.Release|x64.Build.0 = Release|x64 22 | {38AD2D87-1442-48FF-A026-D90F3873121E}.Release|x86.ActiveCfg = Release|Win32 23 | {38AD2D87-1442-48FF-A026-D90F3873121E}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {6CB22D35-9CF1-4337-A2E9-74EC09DFD8A1} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test06/Test06.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | Source Files 42 | 43 | 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | Header Files 68 | 69 | 70 | Header Files 71 | 72 | 73 | Header Files 74 | 75 | 76 | -------------------------------------------------------------------------------- /astir/Tests/Test06/Test06.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | $(ProjectDir)/ 5 | WindowsLocalDebugger 6 | 7 | -------------------------------------------------------------------------------- /astir/Tests/Test06/input.txt: -------------------------------------------------------------------------------- 1 | Hello!How is it going?I am fine thank you. -------------------------------------------------------------------------------- /astir/Tests/Test06/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Output/PrimaryAutomaton.h" 4 | #include "Output/SecondaryAutomaton.h" 5 | 6 | int main() { 7 | TextFileStream tfs("input.txt"); 8 | 9 | PrimaryAutomaton::PrimaryAutomaton primaryTokenizer; 10 | auto primaryStreamProcessed = primaryTokenizer.processStream(tfs); 11 | 12 | ListProductionStream lps(primaryStreamProcessed); 13 | 14 | SecondaryAutomaton::SecondaryAutomaton secondaryTokenizer; 15 | auto secondaryStreamProcessed = secondaryTokenizer.processStream(lps); 16 | 17 | return 0; 18 | } -------------------------------------------------------------------------------- /astir/Tests/Test07/Test07.astir: -------------------------------------------------------------------------------- 1 | // Test07 - finite automaton rootness and root ignorance test 2 | 3 | finite automaton PrimaryAutomaton with productions_nonroot_by_default { 4 | root terminal production WORD = ['a'-'z''A'-'Z']+; 5 | ignored root terminal production WHITESPACE = ' '* ' '; 6 | root terminal production PUNCTUATION = ['.' '!' '?']; 7 | 8 | production irrelevant = 'irrelevant'; 9 | } 10 | 11 | finite automaton SecondaryAutomaton on PrimaryAutomaton { 12 | nonterminal production Sentence { 13 | PUNCTUATION type; 14 | WORD list words; 15 | } = 16 | (WORD@push:words)+ PUNCTUATION@set:type 17 | ; 18 | } -------------------------------------------------------------------------------- /astir/Tests/Test07/Test07.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test07", "Test07.vcxproj", "{C1B3CA46-D5EB-491D-A256-C8CF39B92CCE}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {C1B3CA46-D5EB-491D-A256-C8CF39B92CCE}.Debug|x64.ActiveCfg = Debug|x64 17 | {C1B3CA46-D5EB-491D-A256-C8CF39B92CCE}.Debug|x64.Build.0 = Debug|x64 18 | {C1B3CA46-D5EB-491D-A256-C8CF39B92CCE}.Debug|x86.ActiveCfg = Debug|Win32 19 | {C1B3CA46-D5EB-491D-A256-C8CF39B92CCE}.Debug|x86.Build.0 = Debug|Win32 20 | {C1B3CA46-D5EB-491D-A256-C8CF39B92CCE}.Release|x64.ActiveCfg = Release|x64 21 | {C1B3CA46-D5EB-491D-A256-C8CF39B92CCE}.Release|x64.Build.0 = Release|x64 22 | {C1B3CA46-D5EB-491D-A256-C8CF39B92CCE}.Release|x86.ActiveCfg = Release|Win32 23 | {C1B3CA46-D5EB-491D-A256-C8CF39B92CCE}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {D1C08A57-7C4D-4E9E-81CB-A0325E763D4B} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test07/Test07.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | 35 | 36 | Resource Files 37 | 38 | 39 | 40 | 41 | Resource Files 42 | 43 | 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | Header Files 68 | 69 | 70 | Header Files 71 | 72 | 73 | Header Files 74 | 75 | 76 | -------------------------------------------------------------------------------- /astir/Tests/Test07/Test07.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /astir/Tests/Test07/input.txt: -------------------------------------------------------------------------------- 1 | Hello! How is it going? I am fine thank you.Yay! -------------------------------------------------------------------------------- /astir/Tests/Test07/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Output/PrimaryAutomaton.h" 2 | #include "Output/SecondaryAutomaton.h" 3 | 4 | int main() { 5 | TextFileStream tfs("input.txt"); 6 | 7 | PrimaryAutomaton::PrimaryAutomaton primaryTokenizer; 8 | auto primaryStreamProcessed = primaryTokenizer.processStreamWithIgnorance(tfs); 9 | 10 | ListProductionStream lps(primaryStreamProcessed); 11 | 12 | SecondaryAutomaton::SecondaryAutomaton secondaryTokenizer; 13 | auto secondaryStreamProcessed = secondaryTokenizer.processStream(lps); 14 | 15 | return 0; 16 | } -------------------------------------------------------------------------------- /astir/Tests/Test08/Test08.astir: -------------------------------------------------------------------------------- 1 | // Test08 - a context-free grammar LL(2) parser test 2 | 3 | // The following is actually properly ambiguous (in a construction-prohibiting way that is) 4 | /* 5 | LL(2) parser TreeParser with ambiguity_resolved_by_precedence { 6 | root production Node = 7 | '(' Node ')' 8 | | '(' Node Node ')' 9 | | 'L' // as in "leaf" 10 | ; 11 | } 12 | */ 13 | 14 | LL(2) parser TreeParser with ambiguity_resolved_by_precedence { 15 | root production Node = 16 | '(' ')' // empty 17 | | '(' Node Node ')' 18 | | 'L' // as in "leaf" 19 | ; 20 | } -------------------------------------------------------------------------------- /astir/Tests/Test08/Test08.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test08", "Test08.vcxproj", "{AE058043-4CCD-4C5B-B627-120AD17132C6}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {AE058043-4CCD-4C5B-B627-120AD17132C6}.Debug|x64.ActiveCfg = Debug|x64 17 | {AE058043-4CCD-4C5B-B627-120AD17132C6}.Debug|x64.Build.0 = Debug|x64 18 | {AE058043-4CCD-4C5B-B627-120AD17132C6}.Debug|x86.ActiveCfg = Debug|Win32 19 | {AE058043-4CCD-4C5B-B627-120AD17132C6}.Debug|x86.Build.0 = Debug|Win32 20 | {AE058043-4CCD-4C5B-B627-120AD17132C6}.Release|x64.ActiveCfg = Release|x64 21 | {AE058043-4CCD-4C5B-B627-120AD17132C6}.Release|x64.Build.0 = Release|x64 22 | {AE058043-4CCD-4C5B-B627-120AD17132C6}.Release|x86.ActiveCfg = Release|Win32 23 | {AE058043-4CCD-4C5B-B627-120AD17132C6}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {B96417E4-6EF0-4962-A529-AF3832AAB40B} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test08/Test08.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | 42 | 43 | Header Files 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | Header Files 68 | 69 | 70 | -------------------------------------------------------------------------------- /astir/Tests/Test08/Test08.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | $(ProjectDir) 5 | WindowsLocalDebugger 6 | 7 | -------------------------------------------------------------------------------- /astir/Tests/Test08/input.txt: -------------------------------------------------------------------------------- 1 | (L(()((L(()L))()))) -------------------------------------------------------------------------------- /astir/Tests/Test08/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Output/TreeParser.h" 4 | 5 | int main() { 6 | TextFileStream tfs("input.txt"); 7 | 8 | TreeParser::TreeParser treeParser; 9 | auto primaryStreamProcessed = treeParser.apply(tfs); 10 | 11 | return 0; 12 | } -------------------------------------------------------------------------------- /astir/Tests/Test09/Test09.astir: -------------------------------------------------------------------------------- 1 | // Test09 - a context-free grammar LL(2) parser test 2 | // This time, the parser is fed the outputs of a finite automaton that does the tokenizing 3 | 4 | finite automaton TreeTokenizer { 5 | ignored root WhiteSpace = [' ' '\n' '\r' '\t']+; 6 | root PAR_LEFT = '('; 7 | root PAR_RIGHT = ')'; 8 | root LEAF = ['a'-'z' 'A'-'Z' '_' '0' - '9']+; 9 | } 10 | 11 | LL(2) parser TreeParser with ambiguity_resolved_by_precedence on TreeTokenizer { 12 | root category Node; 13 | 14 | production EmptyNode : Node = PAR_LEFT PAR_RIGHT; 15 | production BranchingNode : Node = PAR_LEFT Node+ PAR_RIGHT; 16 | production Leaf : Node = LEAF; 17 | } -------------------------------------------------------------------------------- /astir/Tests/Test09/Test09.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test09", "Test09.vcxproj", "{C58A6783-992B-422F-BE2A-8C413E7C49FC}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {C58A6783-992B-422F-BE2A-8C413E7C49FC}.Debug|x64.ActiveCfg = Debug|x64 17 | {C58A6783-992B-422F-BE2A-8C413E7C49FC}.Debug|x64.Build.0 = Debug|x64 18 | {C58A6783-992B-422F-BE2A-8C413E7C49FC}.Debug|x86.ActiveCfg = Debug|Win32 19 | {C58A6783-992B-422F-BE2A-8C413E7C49FC}.Debug|x86.Build.0 = Debug|Win32 20 | {C58A6783-992B-422F-BE2A-8C413E7C49FC}.Release|x64.ActiveCfg = Release|x64 21 | {C58A6783-992B-422F-BE2A-8C413E7C49FC}.Release|x64.Build.0 = Release|x64 22 | {C58A6783-992B-422F-BE2A-8C413E7C49FC}.Release|x86.ActiveCfg = Release|Win32 23 | {C58A6783-992B-422F-BE2A-8C413E7C49FC}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {C0CF4186-8E5D-4257-9D33-C03306361E89} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test09/Test09.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | Source Files 42 | 43 | 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | Header Files 68 | 69 | 70 | Header Files 71 | 72 | 73 | Header Files 74 | 75 | 76 | -------------------------------------------------------------------------------- /astir/Tests/Test09/Test09.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | $(ProjectDir) 5 | WindowsLocalDebugger 6 | 7 | -------------------------------------------------------------------------------- /astir/Tests/Test09/input.txt: -------------------------------------------------------------------------------- 1 | ( 2 | (Du) 3 | (Hast viel geweint) 4 | (Im Geist getrennt 5 | (Getrennt getrennt getrennt getrennt) 6 | ) 7 | () 8 | ) -------------------------------------------------------------------------------- /astir/Tests/Test09/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Output/TreeTokenizer.h" 4 | #include "Output/TreeParser.h" 5 | 6 | int main() { 7 | TextFileStream tfs("input.txt"); 8 | 9 | TreeTokenizer::TreeTokenizer treeTokenizer; 10 | auto primaryStreamProcessed = treeTokenizer.processStreamWithIgnorance(tfs); 11 | 12 | ListProductionStream lcs(primaryStreamProcessed); 13 | 14 | TreeParser::TreeParser treeParser; 15 | auto secondaryStreamProcessed = treeParser.apply(lcs); 16 | 17 | return 0; 18 | } -------------------------------------------------------------------------------- /astir/Tests/Test10/Test10.astir: -------------------------------------------------------------------------------- 1 | // Test10 - a context-free grammar LL(2) parser test 2 | // This time, the parser 'uses' finite automaton that does the tokenizing, referring to it whenever a token is needed 3 | 4 | finite automaton TreeTokenizer { 5 | ignored root WhiteSpace = [' ' '\n' '\r' '\t']+; 6 | root PAR_LEFT = '('; 7 | root PAR_RIGHT = ')'; 8 | root LEAF = ['a'-'z' 'A'-'Z' '_' '0' - '9']+; 9 | } 10 | 11 | LL(2) parser TreeParser with ambiguity_resolved_by_precedence uses TreeTokenizer { 12 | root category Node; 13 | 14 | production EmptyNode : Node = PAR_LEFT PAR_RIGHT; 15 | production BranchingNode : Node = PAR_LEFT Node+ PAR_RIGHT; 16 | production Leaf : Node = LEAF; 17 | } -------------------------------------------------------------------------------- /astir/Tests/Test10/Test10.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test10", "Test10.vcxproj", "{49B0238B-B0CD-4E8F-8530-00064E94DAAA}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {49B0238B-B0CD-4E8F-8530-00064E94DAAA}.Debug|x64.ActiveCfg = Debug|x64 17 | {49B0238B-B0CD-4E8F-8530-00064E94DAAA}.Debug|x64.Build.0 = Debug|x64 18 | {49B0238B-B0CD-4E8F-8530-00064E94DAAA}.Debug|x86.ActiveCfg = Debug|Win32 19 | {49B0238B-B0CD-4E8F-8530-00064E94DAAA}.Debug|x86.Build.0 = Debug|Win32 20 | {49B0238B-B0CD-4E8F-8530-00064E94DAAA}.Release|x64.ActiveCfg = Release|x64 21 | {49B0238B-B0CD-4E8F-8530-00064E94DAAA}.Release|x64.Build.0 = Release|x64 22 | {49B0238B-B0CD-4E8F-8530-00064E94DAAA}.Release|x86.ActiveCfg = Release|Win32 23 | {49B0238B-B0CD-4E8F-8530-00064E94DAAA}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {2BF70AB9-B253-47B5-9E2D-F3FF8AF341B9} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test10/Test10.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Header Files 30 | 31 | 32 | Header Files 33 | 34 | 35 | Header Files 36 | 37 | 38 | Header Files 39 | 40 | 41 | Header Files 42 | 43 | 44 | Header Files 45 | 46 | 47 | Header Files 48 | 49 | 50 | Header Files 51 | 52 | 53 | Header Files 54 | 55 | 56 | Header Files 57 | 58 | 59 | 60 | 61 | Source Files 62 | 63 | 64 | Source Files 65 | 66 | 67 | Source Files 68 | 69 | 70 | Source Files 71 | 72 | 73 | Source Files 74 | 75 | 76 | -------------------------------------------------------------------------------- /astir/Tests/Test10/Test10.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | $(ProjectDir) 5 | WindowsLocalDebugger 6 | 7 | -------------------------------------------------------------------------------- /astir/Tests/Test10/input.txt: -------------------------------------------------------------------------------- 1 | (L(()((L(()L))()))) -------------------------------------------------------------------------------- /astir/Tests/Test10/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Output/TreeTokenizer.h" 2 | #include "Output/TreeParser.h" 3 | 4 | int main() { 5 | TextFileStream tfs("input.txt"); 6 | 7 | TreeParser::TreeParser treeParser; 8 | auto secondaryStreamProcessed = treeParser.parse(tfs); 9 | 10 | return 0; 11 | } -------------------------------------------------------------------------------- /astir/Tests/Test11/Test11.astir: -------------------------------------------------------------------------------- 1 | // Test11 - test for the disjoinFrom procedures of various SymbolGroups 2 | 3 | finite automaton PrimaryAutomaton { 4 | root A = ['0'-'3']; 5 | root B = ['2'-'5']; 6 | root C = ['4'-'7']; 7 | root D = ['6'-'9']; 8 | } 9 | 10 | finite automaton SecondaryAutomaton on PrimaryAutomaton { 11 | root First = (A | B | D) A A; 12 | root Second = (B | C) (A | B) B; 13 | root Third = (B | C | D) C C; 14 | } -------------------------------------------------------------------------------- /astir/Tests/Test11/Test11.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test11", "Test11.vcxproj", "{B2ACD718-8286-4172-BB09-2BDF3DB8688F}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {B2ACD718-8286-4172-BB09-2BDF3DB8688F}.Debug|x64.ActiveCfg = Debug|x64 17 | {B2ACD718-8286-4172-BB09-2BDF3DB8688F}.Debug|x64.Build.0 = Debug|x64 18 | {B2ACD718-8286-4172-BB09-2BDF3DB8688F}.Debug|x86.ActiveCfg = Debug|Win32 19 | {B2ACD718-8286-4172-BB09-2BDF3DB8688F}.Debug|x86.Build.0 = Debug|Win32 20 | {B2ACD718-8286-4172-BB09-2BDF3DB8688F}.Release|x64.ActiveCfg = Release|x64 21 | {B2ACD718-8286-4172-BB09-2BDF3DB8688F}.Release|x64.Build.0 = Release|x64 22 | {B2ACD718-8286-4172-BB09-2BDF3DB8688F}.Release|x86.ActiveCfg = Release|Win32 23 | {B2ACD718-8286-4172-BB09-2BDF3DB8688F}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {F2D48383-2CF6-4946-81BC-898F9D35AD2B} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test11/Test11.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | Header Files 28 | 29 | 30 | Header Files 31 | 32 | 33 | Header Files 34 | 35 | 36 | Header Files 37 | 38 | 39 | Header Files 40 | 41 | 42 | Header Files 43 | 44 | 45 | Header Files 46 | 47 | 48 | Header Files 49 | 50 | 51 | Header Files 52 | 53 | 54 | 55 | 56 | Header Files 57 | 58 | 59 | Source Files 60 | 61 | 62 | Source Files 63 | 64 | 65 | Source Files 66 | 67 | 68 | Source Files 69 | 70 | 71 | 72 | 73 | Resource Files 74 | 75 | 76 | -------------------------------------------------------------------------------- /astir/Tests/Test11/Test11.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /astir/Tests/Test11/input.txt: -------------------------------------------------------------------------------- 1 | 01025451010210554952318 -------------------------------------------------------------------------------- /astir/Tests/Test11/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Output/PrimaryAutomaton.h" 2 | #include "Output/SecondaryAutomaton.h" 3 | 4 | int main() { 5 | TextFileStream tfs("input.txt"); 6 | 7 | PrimaryAutomaton::PrimaryAutomaton primaryAutomaton; 8 | auto primaryStreamProcessed = primaryAutomaton.processStream(tfs); 9 | 10 | ListProductionStream lps(primaryStreamProcessed); 11 | 12 | SecondaryAutomaton::SecondaryAutomaton secondaryAutomaton; 13 | auto secondaryStreamProcessed = secondaryAutomaton.processStreamWithIgnorance(lps); 14 | 15 | return 0; 16 | } -------------------------------------------------------------------------------- /astir/Tests/Test12/Test12.astir: -------------------------------------------------------------------------------- 1 | // Test12 - test for the disjoinFrom (unpickReferal) procedures on category hierarchies 2 | 3 | LL(3) parser CategoricalParser with ambiguity_resolved_by_precedence { 4 | category TopLevelCategory; 5 | category MidLevelCategory : TopLevelCategory; 6 | 7 | a : MidLevelCategory = '0'; 8 | b : MidLevelCategory = '1'; 9 | c : MidLevelCategory = '2'; 10 | d : TopLevelCategory = '3'; 11 | e : TopLevelCategory = '4'; 12 | f : TopLevelCategory = '5'; 13 | 14 | root TestA = 15 | TopLevelCategory 'X' 'A' 16 | | f 'Y' 'A' 17 | ; 18 | 19 | root TestB = 20 | TopLevelCategory 'X' 'B' 21 | | a 'Y' 'B' 22 | ; 23 | 24 | root TestC = 25 | TopLevelCategory 'X' 'C' 26 | | MidLevelCategory 'Y' 'C' 27 | ; 28 | 29 | root TestD = 30 | TopLevelCategory 'X' 'D' 31 | | MidLevelCategory 'Y' 'D' 32 | | b 'Z' 'D' 33 | ; 34 | 35 | root TestE = 36 | MidLevelCategory 'X' 'E' 37 | | c 'Y' 'E' 38 | ; 39 | 40 | root TestF = 41 | MidLevelCategory 'X' 'F' 42 | | e 'Y' 'F' 43 | ; 44 | } 45 | 46 | /* 47 | // A Simplified version for debugging 48 | LL(1) parser CategoricalParser with ambiguity_resolved_by_precedence { 49 | category TopLevelCategory; 50 | category MidLevelCategory : TopLevelCategory; 51 | 52 | A : MidLevelCategory = '0'; 53 | F : TopLevelCategory = '5'; 54 | 55 | root TestA = 56 | TopLevelCategory 'X' 57 | | F 'Y' 58 | ; 59 | }*/ -------------------------------------------------------------------------------- /astir/Tests/Test12/Test12.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test12", "Test12.vcxproj", "{4B38915B-B040-4862-A540-5B287AEAAE9E}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {4B38915B-B040-4862-A540-5B287AEAAE9E}.Debug|x64.ActiveCfg = Debug|x64 17 | {4B38915B-B040-4862-A540-5B287AEAAE9E}.Debug|x64.Build.0 = Debug|x64 18 | {4B38915B-B040-4862-A540-5B287AEAAE9E}.Debug|x86.ActiveCfg = Debug|Win32 19 | {4B38915B-B040-4862-A540-5B287AEAAE9E}.Debug|x86.Build.0 = Debug|Win32 20 | {4B38915B-B040-4862-A540-5B287AEAAE9E}.Release|x64.ActiveCfg = Release|x64 21 | {4B38915B-B040-4862-A540-5B287AEAAE9E}.Release|x64.Build.0 = Release|x64 22 | {4B38915B-B040-4862-A540-5B287AEAAE9E}.Release|x86.ActiveCfg = Release|Win32 23 | {4B38915B-B040-4862-A540-5B287AEAAE9E}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {35465830-72E3-49CC-9DD8-B9CC7FA8BAFF} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test12/Test12.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | 42 | 43 | Header Files 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | Header Files 68 | 69 | 70 | -------------------------------------------------------------------------------- /astir/Tests/Test12/Test12.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /astir/Tests/Test12/input.txt: -------------------------------------------------------------------------------- 1 | 1XA -------------------------------------------------------------------------------- /astir/Tests/Test12/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "Output/CategoricalParser.h" 4 | 5 | int main() { 6 | TextFileStream tfs("input.txt"); 7 | 8 | CategoricalParser::CategoricalParser categoricalParser; 9 | auto primaryStreamProcessed = categoricalParser.parse(tfs); 10 | 11 | return 0; 12 | } -------------------------------------------------------------------------------- /astir/Tests/Test13/Test13.astir: -------------------------------------------------------------------------------- 1 | // Test10 - a context-free grammar LL(1) parser test 2 | // This time, the parser 'uses' finite automaton that does the tokenizing, referring to it whenever a token is needed 3 | 4 | finite automaton TreeTokenizer { 5 | ignored root WhiteSpace = [' ' '\n' '\r' '\t']+; 6 | root PAR_LEFT = '('; 7 | root PAR_RIGHT = ')'; 8 | root LEAF = ['a'-'z' 'A'-'Z' '_' '0' - '9']+; 9 | } 10 | 11 | LL(2) parser TreeParser with ambiguity_resolved_by_precedence on TreeTokenizer { 12 | root category Node; 13 | 14 | production EmptyNode : Node { 15 | flag isReallyEmpty; 16 | } = PAR_LEFT ()@flag:isReallyEmpty PAR_RIGHT; 17 | 18 | production BranchingNode : Node { 19 | Node list nodes; 20 | } = PAR_LEFT (Node@push:nodes)+ PAR_RIGHT; 21 | 22 | production Leaf : Node { 23 | raw text; 24 | } = LEAF@capture:text; 25 | } -------------------------------------------------------------------------------- /astir/Tests/Test13/Test13.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Test13", "Test13.vcxproj", "{A15E7B3B-5C5C-4A9B-8EB7-DEDCB4AC8027}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {A15E7B3B-5C5C-4A9B-8EB7-DEDCB4AC8027}.Debug|x64.ActiveCfg = Debug|x64 17 | {A15E7B3B-5C5C-4A9B-8EB7-DEDCB4AC8027}.Debug|x64.Build.0 = Debug|x64 18 | {A15E7B3B-5C5C-4A9B-8EB7-DEDCB4AC8027}.Debug|x86.ActiveCfg = Debug|Win32 19 | {A15E7B3B-5C5C-4A9B-8EB7-DEDCB4AC8027}.Debug|x86.Build.0 = Debug|Win32 20 | {A15E7B3B-5C5C-4A9B-8EB7-DEDCB4AC8027}.Release|x64.ActiveCfg = Release|x64 21 | {A15E7B3B-5C5C-4A9B-8EB7-DEDCB4AC8027}.Release|x64.Build.0 = Release|x64 22 | {A15E7B3B-5C5C-4A9B-8EB7-DEDCB4AC8027}.Release|x86.ActiveCfg = Release|Win32 23 | {A15E7B3B-5C5C-4A9B-8EB7-DEDCB4AC8027}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {7FC0859B-8724-43F4-83C1-9FF8391317A0} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/Tests/Test13/Test13.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Resource Files 20 | 21 | 22 | 23 | 24 | Resource Files 25 | 26 | 27 | 28 | 29 | Source Files 30 | 31 | 32 | Source Files 33 | 34 | 35 | Source Files 36 | 37 | 38 | Source Files 39 | 40 | 41 | Source Files 42 | 43 | 44 | 45 | 46 | Header Files 47 | 48 | 49 | Header Files 50 | 51 | 52 | Header Files 53 | 54 | 55 | Header Files 56 | 57 | 58 | Header Files 59 | 60 | 61 | Header Files 62 | 63 | 64 | Header Files 65 | 66 | 67 | Header Files 68 | 69 | 70 | Header Files 71 | 72 | 73 | Header Files 74 | 75 | 76 | -------------------------------------------------------------------------------- /astir/Tests/Test13/Test13.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | $(ProjectDir) 5 | WindowsLocalDebugger 6 | 7 | -------------------------------------------------------------------------------- /astir/Tests/Test13/input.txt: -------------------------------------------------------------------------------- 1 | ( 2 | L 3 | ( 4 | () 5 | ( 6 | ( 7 | L 8 | ( 9 | () 10 | L 11 | ) 12 | ) 13 | () 14 | ) 15 | ) 16 | ) -------------------------------------------------------------------------------- /astir/Tests/Test13/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Output/TreeTokenizer.h" 2 | #include "Output/TreeParser.h" 3 | 4 | int main() { 5 | TextFileStream tfs("input.txt"); 6 | 7 | TreeTokenizer::TreeTokenizer treeTokenizer; 8 | auto primaryStreamProcessed = treeTokenizer.processStreamWithIgnorance(tfs); 9 | 10 | ListProductionStream lps(primaryStreamProcessed); 11 | 12 | TreeParser::TreeParser treeParser; 13 | auto secondaryStreamProcessed = treeParser.parse(lps); 14 | 15 | return 0; 16 | } -------------------------------------------------------------------------------- /astir/Tests/Uncategorized/C99Tokenizer.astir: -------------------------------------------------------------------------------- 1 | nondeterministic finite automaton C99Tokenizer with individual_string_literals { 2 | pattern OCTDIGIT = ["0"-"7"]; 3 | pattern DECDIGIT = ["0"-"9"]; 4 | patter HEXDIGIT = ["a"-"f" "A"-"F" "0"-"9"]; 5 | pattern LETTER = ["a"-"z" "A"-"Z" "_"]; 6 | pattern EXPONENT_PART = ["E" "e"]["+" "-"]?D+; 7 | pattern FLOAT_SPECIFIER = ("f" | "F" | "l" | "L"); 8 | pattern INTEGER_SPECFIER = ("u" | "U" | "l" | "L")*; 9 | 10 | terminal production KW_AUTO = "auto"; 11 | terminal production KW_BREAK = "break"; 12 | terminal production KW_CASE = "case"; 13 | terminal production KW_CHAR = "char"; 14 | terminal production KW_CONST = "const"; 15 | terminal production KW_CONTINUE = "continue"; 16 | terminal production KW_DEFAULT = "default"; 17 | terminal production KW_DO = "do"; 18 | terminal production KW_DOUBLE = "double"; 19 | terminal production KW_ELSE = "else"; 20 | terminal production KW_ENUM = "enum"; 21 | terminal production KW_EXTERN = "extern"; 22 | terminal production KW_FLOAT = "float"; 23 | terminal production KW_FOR = "for"; 24 | terminal production KW_GOTO = "goto"; 25 | terminal production KW_IF = "if"; 26 | terminal production KW_INT = "int"; 27 | terminal production KW_LONG = "long"; 28 | terminal production KW_REGISTER = "register"; 29 | terminal production KW_RETURN = "return"; 30 | terminal production KW_SHORT = "short"; 31 | terminal production KW_SIGNED = "signed"; 32 | terminal production KW_SIZEOF = "sizeof"; 33 | terminal production KW_STATIC = "static"; 34 | terminal production KW_STRUCT = "struct"; 35 | terminal production KW_SWITCH = "switch"; 36 | terminal production KW_TYPEDEF = "typedef"; 37 | terminal production KW_UNION = "union"; 38 | terminal production KW_UNSIGNED = "unsigned"; 39 | terminal production KW_VOID = "void"; 40 | terminal production KW_VOLATILE = "volatile"; 41 | terminal production KW_WHILE = "while"; 42 | 43 | terminal production IDENTIFIER = LETTER(LETTER|DECDIGIT)*; 44 | terminal production NUMERIC_CONSTANT = 45 | 0[xX]{HEXDIGIT+ INTEGER_SPECFIER? 46 | | "0" OCTDIGIT+ INTEGER_SPECFIER? 47 | | DECDIGIT+ INTEGER_SPECFIER? 48 | | "L"? "'" ("\" . | [^"\" "'"])+ "'" 49 | | DECDIGIT+ EXPONENT_PART FLOAT_SPECIFIER? 50 | | DECDIGIT* "." DECDIGIT+ (EXPONENT_PART)? FLOAT_SPECIFIER? 51 | | DECDIGIT+ "." DECDIGIT* (EXPONENT_PART)? FLOAT_SPECIFIER? 52 | ; 53 | 54 | terminal production NUMERIC_CONSTANT = 55 | "L"? "\"" ("\" . | [^"\" "\""])* "\"" 56 | ; 57 | 58 | terminal production ELLIPSIS = "..."; 59 | terminal production RIGHT_ASSIGN = ">>="; 60 | terminal production LEFT_ASSIGN = "<<="; 61 | terminal production ADD_ASSIGN = "+="; 62 | terminal production SUB_ASSIGN = "-="; 63 | terminal production MUL_ASSIGN = "*="; 64 | terminal production DIV_ASSIGN = "/="; 65 | terminal production MOD_ASSIGN = "%="; 66 | terminal production AND_ASSIGN = "&="; 67 | terminal production XOR_ASSIGN = "^="; 68 | terminal production OR_ASSIGN = "|="; 69 | terminal production SHIFT_RIGHT = ">>"; 70 | terminal production SHIFT_LEFT = "<<"; 71 | terminal production UNOP_INC = "++"; 72 | terminal production UNOP_DEC = "--"; 73 | terminal production OP_PTR = "->"; 74 | terminal production OP_STAR = "*"; 75 | 76 | terminal production LOGICAL_AND = "&&"; 77 | terminal production LOGICAL_OR = "||"; 78 | terminal production LOGICAL_NOT = "!"; 79 | terminal production COMP_LESS = "<" 80 | terminal production COMP_GREATER = ">" 81 | terminal production COMP_LEQ = "<=" 82 | terminal production COMP_GEQ = ">="; 83 | terminal production COMP_EQ = "=="; 84 | terminal production COMP_NEQ = "!="; 85 | terminal production SEMICOLON = ";"; 86 | 87 | terminal production CURLY_OPEN = ("{" | "<%"); 88 | terminal production CURLY_CLOSE = (")" | "%>"); 89 | terminal production COMMA = ","; 90 | terminal production COLON = ":"; 91 | terminal production EQUALS = "="; 92 | terminal production PAR_OPEN = "("; 93 | terminal production PAR_CLOSE = ")"; 94 | terminal production SQUARE_OPEN = ("["|"<:"); 95 | terminal production SQUARE_CLOSE = ("]"|":>"); 96 | terminal production DOT = "."; 97 | 98 | terminal production BITWISE_AND = "&"; 99 | terminal production BITWISE_OR = "|"; 100 | terminal production BITWISE_NOT = "~"; 101 | terminal production BITWISE_XOR = "^"; 102 | terminal production OP_MINUS = "-"; 103 | terminal production OP_PLUS = "+"; 104 | terminal production OP_DIV = "/"; 105 | terminal production OP_MOD = "%"; 106 | terminal production OP_OR = "|"; 107 | terminal production OP_QUESTIONMARK = "?"; 108 | 109 | terminal pattern _SPACE = [" " "\t" "\v" "\n" "\f"]; 110 | terminal pattern _IGNORE = .; 111 | } 112 | -------------------------------------------------------------------------------- /astir/Tests/Uncategorized/ExampleParser.apar: -------------------------------------------------------------------------------- 1 | recursive_descent parser ExampleParser follows ExampleTokenizer { 2 | category declaration { 3 | identifier item name; 4 | identifier list interfaces; 5 | flag isAlsoDefinition; 6 | string _randomText; 7 | }; 8 | 9 | production functionSpecification : declaration = 10 | keyword_function identifier@set:name "(" functionArgument*@parameters ")" ";" 11 | | keyword_function identifier@set:name "(" functionArgument*@parameters ")" "{"@flag:isAlsoDefinition functionBodyStatement*@statements "}" 12 | ; 13 | } -------------------------------------------------------------------------------- /astir/Tests/Uncategorized/ExampleTokenizer.astir: -------------------------------------------------------------------------------- 1 | nondeterministic finite automaton ExampleTokenizer { 2 | terminal pattern digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"; 3 | 4 | category Number { 5 | flag isNegative; 6 | }; 7 | 8 | category Integer : Number { 9 | raw value; 10 | }; 11 | 12 | production PositiveInteger : Integer = (digit@append:value)+; 13 | production NegativeInteger : Integer = "-"@flag:isNegative (digit@append:value)+; 14 | 15 | production Float : Number { 16 | PositiveInteger item mantissaWholes; 17 | PositiveInteger item mantissaDecimals; 18 | PositiveInteger item exponent; 19 | } = 20 | ("-"@flag:isNegative)? PositiveInteger@set:mantissaWholes "." PositiveInteger@set:mantissaDecimals (("e"|"E") PositiveInteger@set:exponent)? 21 | ; 22 | 23 | pattern identifierStartingCharacter = ["a"-"z" "A"-"Z" "_"]; 24 | pattern identifierCharacter = identifierStartingCharacter | digit; 25 | 26 | production Identifier { 27 | raw name; 28 | } = (identifierStartingCharacter@append:name) (identifierCharacter@append:name)*; 29 | 30 | production Operator = 31 | "+" 32 | | "-" 33 | | "*" 34 | | "\" 35 | ; 36 | } -------------------------------------------------------------------------------- /astir/Token.cpp: -------------------------------------------------------------------------------- 1 | #include "Token.h" 2 | 3 | #include 4 | 5 | std::string Token::toString() const { 6 | return std::string("type ") + typeString() + ", string \'" + string + "\', location " + locationString(); 7 | } 8 | 9 | void Token::setLocation(const FileLocation& loc) { 10 | m_fileLocation = loc; 11 | } 12 | 13 | std::string Token::typeString() const { 14 | return Token::convertTypeToString(this->type); 15 | } 16 | 17 | std::string Token::toHumanString() const { 18 | std::string ret = typeString() + " ('"; 19 | if (type == TokenType::STRING) { 20 | ret += std::string("\"") + string + "\""; 21 | } else { 22 | ret += string; 23 | } 24 | ret += "')"; 25 | 26 | return ret; 27 | } 28 | 29 | const FileLocation& Token::location() const { 30 | return m_fileLocation; 31 | } 32 | 33 | std::string Token::convertTypeToString(TokenType type) { 34 | switch (type) { 35 | case TokenType::KW_USES: 36 | return "KW_USES"; 37 | 38 | case TokenType::KW_WITH: 39 | return "KW_WITH"; 40 | case TokenType::KW_ON: 41 | return "KW_ON"; 42 | 43 | case TokenType::KW_FINITE: 44 | return "KW_FINITE"; 45 | case TokenType::KW_AUTOMATON: 46 | return "KW_AUTOMATON"; 47 | case TokenType::KW_LL: 48 | return "KW_LL"; 49 | case TokenType::KW_PARSER: 50 | return "KW_PARSER"; 51 | 52 | case TokenType::KW_PRODUCTIONS_TERMINAL_BY_DEFAULT: 53 | return "KW_PRODUCTIONS_TERMINAL_BY_DEFAULT"; 54 | case TokenType::KW_PRODUCTIONS_NONTERMINAL_BY_DEFAULT: 55 | return "KW_PRODUCTIONS_NONTERMINAL_BY_DEFAULT"; 56 | case TokenType::KW_PRODUCTIONS_ROOT_BY_DEFAULT: 57 | return "KW_PRODUCTIONS_ROOT_BY_DEFAULT"; 58 | case TokenType::KW_PRODUCTIONS_NONROOT_BY_DEFAULT: 59 | return "KW_PRODUCTIONS_NONROOT_BY_DEFAULT"; 60 | case TokenType::KW_CATEGORIES_ROOT_BY_DEFAULT: 61 | return "KW_CATEGORIES_ROOT_BY_DEFAULT"; 62 | case TokenType::KW_CATEGORIES_NONROOT_BY_DEFAULT: 63 | return "KW_CATEGORIES_NONROOT_BY_DEFAULT"; 64 | case TokenType::KW_AMBIGUITY_DISALLOWED: 65 | return "KW_AMBIGUITY_DISALLOWED"; 66 | case TokenType::KW_AMBIGUITY_RESOLVED_BY_PRECEDENCE: 67 | return "KW_AMBIGUITY_RESOLVED_BY_PRECEDENCE"; 68 | 69 | case TokenType::KW_ROOT: 70 | return "KW_ROOT"; 71 | case TokenType::KW_IGNORED: 72 | return "KW_IGNORED"; 73 | case TokenType::KW_TERMINAL: 74 | return "KW_TERMINAL"; 75 | case TokenType::KW_NONTERMINAL: 76 | return "KW_NONTERMINAL"; 77 | case TokenType::KW_CATEGORY: 78 | return "KW_CATEGORY"; 79 | case TokenType::KW_PRODUCTION: 80 | return "KW_PRODUCTION"; 81 | case TokenType::KW_PATTERN: 82 | return "KW_PATTERN"; 83 | case TokenType::KW_REGEX: 84 | return "KW_REGEX"; 85 | 86 | case TokenType::KW_ITEM: 87 | return "KW_ITEM"; 88 | case TokenType::KW_LIST: 89 | return "KW_LIST"; 90 | case TokenType::KW_RAW: 91 | return "KW_RAW"; 92 | 93 | case TokenType::KW_FLAG: 94 | return "KW_FLAG"; 95 | case TokenType::KW_UNFLAG: 96 | return "KW_UNFLAG"; 97 | 98 | case TokenType::KW_CAPTURE: 99 | return "KW_CAPTURE"; 100 | case TokenType::KW_EMPTY: 101 | return "KW_EMPTY"; 102 | case TokenType::KW_APPEND: 103 | return "KW_APPEND"; 104 | case TokenType::KW_PREPEND: 105 | return "KW_PREPEND"; 106 | 107 | case TokenType::KW_SET: 108 | return "KW_SET"; 109 | case TokenType::KW_UNSET: 110 | return "KW_UNSET"; 111 | 112 | case TokenType::KW_PUSH: 113 | return "KW_PUSH"; 114 | case TokenType::KW_POP: 115 | return "KW_POP"; 116 | case TokenType::KW_CLEAR: 117 | return "KW_CLEAR"; 118 | 119 | case TokenType::IDENTIFIER: 120 | return "IDENTIFIER"; 121 | case TokenType::STRING: 122 | return "STRING"; 123 | case TokenType::NUMBER: 124 | return "NUMBER"; 125 | 126 | case TokenType::PAR_LEFT: 127 | return "PAR_LEFT"; 128 | case TokenType::PAR_RIGHT: 129 | return "PAR_RIGHT"; 130 | case TokenType::SQUARE_LEFT: 131 | return "SQUARE_LEFT"; 132 | case TokenType::SQUARE_RIGHT: 133 | return "SQUARE_RIGHT"; 134 | case TokenType::CURLY_LEFT: 135 | return "CURLY_LEFT"; 136 | case TokenType::CURLY_RIGHT: 137 | return "CURLY_RIGHT"; 138 | 139 | case TokenType::OP_COLON: 140 | return "OP_COLON"; 141 | case TokenType::OP_EQUALS: 142 | return "OP_EQUALS"; 143 | case TokenType::OP_LEFTARR: 144 | return "OP_LEFTARR"; 145 | case TokenType::OP_SEMICOLON: 146 | return "OP_SEMICOLON"; 147 | case TokenType::OP_COMMA: 148 | return "OP_COMMA"; 149 | case TokenType::OP_DOT: 150 | return "OP_DOT"; 151 | case TokenType::OP_CARET: 152 | return "OP_CARET"; 153 | case TokenType::OP_DOLLAR: 154 | return "OP_DOLLAR"; 155 | 156 | case TokenType::OP_STAR: 157 | return "OP_STAR"; 158 | case TokenType::OP_PLUS: 159 | return "OP_PLUS"; 160 | case TokenType::OP_QM: 161 | return "OP_QM"; 162 | case TokenType::OP_OR: 163 | return "OP_OR"; 164 | case TokenType::OP_FWDSLASH: 165 | return "OP_FWDSLASH"; 166 | 167 | case TokenType::OP_AMPERSAND: 168 | return "OP_AMPERSAND"; 169 | case TokenType::OP_DASH: 170 | return "OP_DASH"; 171 | case TokenType::OP_AT: 172 | return "OP_AT"; 173 | 174 | case TokenType::EOS: 175 | return "EOS"; 176 | 177 | default: 178 | throw Exception("Unrecognized token type: " + std::to_string((unsigned long)type)); 179 | } 180 | } -------------------------------------------------------------------------------- /astir/Token.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "Exception.h" 6 | #include "IFileLocalizable.h" 7 | 8 | enum class TokenType { 9 | KW_USES, 10 | 11 | KW_ON, 12 | KW_WITH, 13 | 14 | KW_FINITE, 15 | KW_AUTOMATON, 16 | KW_LL, 17 | KW_PARSER, 18 | 19 | KW_PRODUCTIONS_TERMINAL_BY_DEFAULT, 20 | KW_PRODUCTIONS_NONTERMINAL_BY_DEFAULT, 21 | KW_PRODUCTIONS_ROOT_BY_DEFAULT, 22 | KW_PRODUCTIONS_NONROOT_BY_DEFAULT, 23 | KW_CATEGORIES_ROOT_BY_DEFAULT, 24 | KW_CATEGORIES_NONROOT_BY_DEFAULT, 25 | KW_AMBIGUITY_DISALLOWED, 26 | KW_AMBIGUITY_RESOLVED_BY_PRECEDENCE, 27 | 28 | KW_IGNORED, 29 | KW_ROOT, 30 | KW_TERMINAL, 31 | KW_NONTERMINAL, 32 | KW_CATEGORY, 33 | KW_PRODUCTION, 34 | KW_PATTERN, 35 | KW_REGEX, 36 | 37 | KW_ITEM, 38 | KW_LIST, 39 | KW_RAW, 40 | 41 | KW_FLAG, 42 | KW_UNFLAG, 43 | KW_CAPTURE, 44 | KW_EMPTY, 45 | KW_APPEND, 46 | KW_PREPEND, 47 | KW_SET, 48 | KW_UNSET, 49 | KW_PUSH, 50 | KW_POP, 51 | KW_CLEAR, 52 | 53 | IDENTIFIER, 54 | STRING, 55 | NUMBER, 56 | 57 | PAR_LEFT, 58 | PAR_RIGHT, 59 | SQUARE_LEFT, 60 | SQUARE_RIGHT, 61 | CURLY_LEFT, 62 | CURLY_RIGHT, 63 | 64 | OP_COLON, 65 | OP_EQUALS, 66 | OP_LEFTARR, 67 | OP_SEMICOLON, 68 | OP_COMMA, 69 | OP_DOT, 70 | OP_CARET, 71 | OP_DOLLAR, 72 | 73 | OP_STAR, 74 | OP_PLUS, 75 | OP_QM, 76 | OP_OR, 77 | OP_FWDSLASH, 78 | 79 | OP_AMPERSAND, 80 | OP_DASH, 81 | OP_AT, 82 | 83 | EOS /* end of stream wiseapples */ 84 | }; 85 | 86 | struct Token : public IFileLocalizable { 87 | TokenType type; 88 | std::string string; 89 | 90 | Token() 91 | : m_fileLocation(1, 1), type(TokenType::IDENTIFIER), string() {} 92 | 93 | void setLocation(const FileLocation& loc); 94 | std::string typeString() const; 95 | std::string toString() const; 96 | std::string toHumanString() const; 97 | 98 | const FileLocation& location() const override; 99 | 100 | static std::string convertTypeToString(TokenType type); 101 | private: 102 | FileLocation m_fileLocation; 103 | }; -------------------------------------------------------------------------------- /astir/astir.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30320.27 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "astir", "astir.vcxproj", "{0D69953E-D0A0-47B6-833D-3EF2CF3EFC99}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {0D69953E-D0A0-47B6-833D-3EF2CF3EFC99}.Debug|x64.ActiveCfg = Debug|x64 17 | {0D69953E-D0A0-47B6-833D-3EF2CF3EFC99}.Debug|x64.Build.0 = Debug|x64 18 | {0D69953E-D0A0-47B6-833D-3EF2CF3EFC99}.Debug|x86.ActiveCfg = Debug|Win32 19 | {0D69953E-D0A0-47B6-833D-3EF2CF3EFC99}.Debug|x86.Build.0 = Debug|Win32 20 | {0D69953E-D0A0-47B6-833D-3EF2CF3EFC99}.Release|x64.ActiveCfg = Release|x64 21 | {0D69953E-D0A0-47B6-833D-3EF2CF3EFC99}.Release|x64.Build.0 = Release|x64 22 | {0D69953E-D0A0-47B6-833D-3EF2CF3EFC99}.Release|x86.ActiveCfg = Release|Win32 23 | {0D69953E-D0A0-47B6-833D-3EF2CF3EFC99}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {9D859AF5-E3A0-455C-AAE5-4D03FB1BDAA7} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /astir/astir.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | "Tests/Test06/Test06.alex" --outputDirectory="Tests/Test06/Output" 5 | WindowsLocalDebugger 6 | 7 | -------------------------------------------------------------------------------- /astir/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "LexicalAnalyzer.h" 7 | #include "SyntacticAnalyzer.h" 8 | #include "CppGenerationVisitor.h" 9 | 10 | #include "DimCli/libs/dimcli/cli.h" 11 | 12 | void printTokenList(const std::list& tokenList); 13 | 14 | #include "TestingSwitch.h" 15 | int main(int argc, char* argv[]) { 16 | #ifndef TESTING 17 | Dim::Cli cli; 18 | cli.versionOpt("1.0.0"); 19 | auto& grammarFilePath = cli.opt("").desc("The path to the containing the grammar specification that is to be processed"); 20 | auto& outputDirectoryPath = cli.opt("outputDirectory", ".").desc("The directory where the generated files are meant to go."); 21 | if (!cli.parse(std::cerr, argc, argv)) 22 | return cli.exitCode(); 23 | 24 | std::fstream grammarFile(*grammarFilePath); 25 | 26 | try { 27 | LexicalAnalyzer lexicalAnalyzer; 28 | std::cout << "Tokenizing grammar file" << std::endl; 29 | auto tokenList = lexicalAnalyzer.process(grammarFile); 30 | 31 | SyntacticAnalyzer syntacticAnalyzer; 32 | std::cout << "Parsing grammar file" << std::endl; 33 | std::shared_ptr syntacticTree = syntacticAnalyzer.process(tokenList); 34 | 35 | std::cout << "Semantically processing the grammar" << std::endl; 36 | syntacticTree->initialize(); 37 | 38 | CppGenerationVisitor generationVisitor(*outputDirectoryPath); 39 | generationVisitor.setup(); 40 | std::cout << "Generating output code" << std::endl; 41 | generationVisitor.visit(syntacticTree.get()); 42 | } catch (const Exception& exception) { 43 | std::cerr << "Error: " << exception.what() << std::endl; 44 | } 45 | 46 | #else 47 | std::list> testsToRun = { 48 | { "Test01", "Test01" }, 49 | { "Test02", "Test02" }, 50 | { "Test03", "Test03" }, 51 | { "Test04", "Test04" }, 52 | { "Test05", "Test05" }, 53 | { "Test06", "Test06" }, 54 | { "Test07", "Test07" }, 55 | { "Test08", "Test08" }, 56 | { "Test09", "Test09" }, 57 | { "Test10", "Test10" }, 58 | { "Test11", "Test11" }, 59 | { "Test12", "Test12" }, 60 | { "Test13", "Test13" }, 61 | { "Hello Binary", "BinaryRecognizer" }, 62 | { "Hello Binary", "BinaryRecognizerRefactored" }, 63 | { "Hello Binary", "BinaryTokenizer" }, 64 | }; 65 | for (const auto& folderFilePair : testsToRun) { 66 | std::fstream inputFile("Tests/" + folderFilePair.first + "/" + folderFilePair.second + ".astir"); 67 | 68 | LexicalAnalyzer analyzer; 69 | auto tokenList = analyzer.process(inputFile); 70 | // printTokenList(tokenList); 71 | 72 | SyntacticAnalyzer parser; 73 | std::shared_ptr syntacticTree = parser.process(tokenList); 74 | syntacticTree->initialize(); 75 | 76 | CppGenerationVisitor generationVisitor("Tests/" + folderFilePair.first + "/Output"); 77 | generationVisitor.setup(); 78 | generationVisitor.visit(syntacticTree.get()); 79 | } 80 | #endif 81 | 82 | return 0; 83 | } 84 | 85 | void printTokenList(const std::list& tokenList) { 86 | for (const Token& token : tokenList) { 87 | std::cout << "[" << token.locationString() << "] " 88 | << token.typeString() 89 | << ": \"" << token.string << "\"" 90 | << std::endl 91 | ; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexected/astir/6eec0d6e24c498a6ff874c0bb33928651a688e80/docs/.nojekyll -------------------------------------------------------------------------------- /docs/_coverpage.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ![logo](_media/tornado-2.4.png) 4 | 5 | # Astir 1.0.0 6 | 7 | > An OOP-based context-free grammar parser generator for C++ 8 | 9 | [GitHub](https://github.com/lexected/astir/) 10 | [Get Started](#about) 11 | 12 | ![color](#f0f0f0) -------------------------------------------------------------------------------- /docs/_media/tornado-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexected/astir/6eec0d6e24c498a6ff874c0bb33928651a688e80/docs/_media/tornado-1.png -------------------------------------------------------------------------------- /docs/_media/tornado-2.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexected/astir/6eec0d6e24c498a6ff874c0bb33928651a688e80/docs/_media/tornado-2.0.png -------------------------------------------------------------------------------- /docs/_media/tornado-2.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexected/astir/6eec0d6e24c498a6ff874c0bb33928651a688e80/docs/_media/tornado-2.1.png -------------------------------------------------------------------------------- /docs/_media/tornado-2.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexected/astir/6eec0d6e24c498a6ff874c0bb33928651a688e80/docs/_media/tornado-2.2.png -------------------------------------------------------------------------------- /docs/_media/tornado-2.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexected/astir/6eec0d6e24c498a6ff874c0bb33928651a688e80/docs/_media/tornado-2.3.png -------------------------------------------------------------------------------- /docs/_media/tornado-2.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexected/astir/6eec0d6e24c498a6ff874c0bb33928651a688e80/docs/_media/tornado-2.4.png -------------------------------------------------------------------------------- /docs/_sidebar.md: -------------------------------------------------------------------------------- 1 | - [About](about.md) 2 | 3 | - Getting started 4 | 5 | - [Setting up](setting_up.md) 6 | - ["Hello Binary!"](hello_binary.md) 7 | - [Something regular](something_regular.md) 8 | - [Something context-free](something_context-free.md) 9 | 10 | - Reference 11 | 12 | - [Command-line interface](command-line_interface.md) 13 | - [Language](language.md) 14 | - [Generation](generation.md) 15 | - [Output interface](output_interface.md) 16 | -------------------------------------------------------------------------------- /docs/about.md: -------------------------------------------------------------------------------- 1 | # About 2 | 3 | ## What is it? 4 | 5 | Astir is a parser generator -- it takes a grammar specification on input and produces code that, when run, recognizes lexical and syntactical structures on its own input. We refer to this code as to *parser code* or just *parser*. This parser code can then be extended to process the structures recognized in some meaningful way (i.e. compiled). 6 | 7 | As a parser generator it is *output-*, rather than *input-*centred. Traditionally, parser generators have been designed with generality (in terms of the breadth of the variety of grammars their output could recognize) and parsing performance in mind. While the development of Astir actively eyed both of these characteristics, it was also designed with extreme care to make sure that 8 | 9 | - a single grammar specification could be used to generate **human-friendly** (where possible) parsers in **multiple languages**, 10 | - the output of any generated parser made proper and appropriately extensive use of the **object-oriented** paradigm, 11 | - the output parsers themselves were **not limited to** using **just one parsing algorithm** or one "true" type and structure of input. 12 | 13 | Astir thus comes with its own grammar-specification language, which 14 | 15 | - makes **full use** of **regular expressions**, 16 | - exhaustively captures **inheritance** and **population** relationships among grammar productions, 17 | - allows for parser output construction in a completely **target-language-neutral** way. 18 | 19 | ## What is it good for? 20 | 21 | The generator output can, depending on its configuration, process an arbitrary raw byte stream or a(n only very lightly restricted) generic target-language-structure stream. This output parser can then recognize any context-free grammar within the limitations of the pre-selected target algorithmic machinery. 22 | 23 | To learn more about Astir's limitations please see the code generation reference. 24 | 25 | ## How is it different? 26 | 27 | You might be familiar with tools such as *lex*, *bison*, or *ANTLR*, all of which generate code for parsers of some feasible regular or context-free languages. To reiterate on some of the points made above, in this context, Astir allows you to 28 | 29 | - write object-oriented context-free grammars, 30 | - use just one grammar file for all target languages, and 31 | - modularly draw on a database of various parsing algorithms in the quest to efficiently parse intrisically complex syntactic structures. -------------------------------------------------------------------------------- /docs/command-line_interface.md: -------------------------------------------------------------------------------- 1 | # Command-line interface 2 | 3 | Astir uses [DimCli](https://github.com/gknowles/dimcli) for its command-line interface. 4 | 5 | The following is the output of `astir --help` 6 | 7 | ```bash 8 | usage: astir [OPTIONS] grammarFilePath 9 | grammarFilePath The path to the containing the grammar specification that is 10 | to be processed 11 | 12 | Options: 13 | --outputDirectory=STRING The directory where the generated files are meant 14 | to go. (default: .) 15 | 16 | --help Show this message and exit. 17 | --version Show version and exit. 18 | ``` -------------------------------------------------------------------------------- /docs/generation.md: -------------------------------------------------------------------------------- 1 | # Generation 2 | 3 | After the lexical, syntactical, and semantic analysis of the grammar-specification file, the generation phase follows. This phase still involves a number of semantic check, but because these now always dependent on the machine type, we have lumped them together with the actual internal representation and final output generation. 4 | 5 | ## Finite automata 6 | Finite automata are the simplest (in terms of their inner working, not in terms of their construction) machines supported by Astir. They can parse regular languages specified through the use of the entire spectrum Astir's regular expressions, and produce both terminal and non-terminal output. Finite automata are further often used by parsers as dependency machines to perform selective regular lookahead where not supported by the machine. 7 | 8 | It should be noted that finite automata do not support *reference recursion*, e.g. the following, albeit a perfectly valid context-free grammar for a regular language, will result in an error 9 | 10 | ```astir 11 | production Atom = 'A'; 12 | production Tail = Body | empty; 13 | production Body = Atom Tail; 14 | ``` 15 | 16 | due to the reference recursion on the path `Body-Tail-Body` or `Tail-Body-Tail` (the path cited in the error will depend on the order in which the statements are processed). 17 | 18 | ### Input and dependency machines 19 | Finite automata accept only terminal input, be it raw or otherwise already processed input (say from another finite automaton). They can not have any machine dependencies (due to the rather simplistic nature of the DFA transition tables). 20 | 21 | ### Machine attribute defaults 22 | The following are the defaults for finite automaton machine attributes 23 | 24 | * `ProductionsTerminalByDefault` is `true` 25 | * `ProductionsRootByDefault` is `true` 26 | * `CategoriesRootByDefault` is `false` 27 | * `AmbiguityResolvedByPrecedence` is set to `false` 28 | 29 | ### Internal NFA 30 | After the basic semantic checks the finite automaton generator constructs an internal non-deterministic finite automaton. Originally (in one of the first completed commits) the Thompson's construction was used before the NFA was converted to a DFA and code generation began. This has since been changed to a custom processes that only distantly resembles Thompson's construction. Furthermore, the NFA-to-DFA conversion algorithm significantly deviates from the textbook one due to the presence of actions on transitions and states and the need for backtracking. While all epsilon-transitions are eventually eliminated with the calculation of epsilon-closures and concentration of epsilon-transition actions at accumulated states, the resulting finite automaton is still strictly not a DFA due to the multiple paths that the machine might have to take when trying to match an alternative and then resorting to backtracking. We call the result of the conversion process a pseudo-DFA (or better, Astir's DFA). 31 | 32 | ### Output generation 33 | The output generated from the pseudo-DFA is mainly in the form of tables. There is the transition table, state finality table, transition-action table, and the state-action table, all of which are run by a generated boilerplate. The generated boilerplate has been written to be easily readable and contains numerous comments explaining why the things are done in the way they are. Feel free to check out the generated code for more information, we promise it won't be too painful. 34 | 35 | ## LL(k) and LL(finite) parsers 36 | The LL(finite) parsers (or their semantically restricted versions, LL(k) parsers for all k greater than one) are predictive parsers parsing left-to-right the left-most derivation with arbitrary but necessarily finite lookahead. 37 | 38 | When one talk's about LL(k) and LL(finite) parsers are one and the same thing in Astir on the generation level, with the only difference being that the LL(k) parsers will trigger an error anytime more than k units of lookahead is needed to disambiguate between two alternatives. 39 | 40 | ### Input and dependency machines 41 | LL(finite) parsers can accept arbitrary input, and reference arbitrary dependency machines. 42 | 43 | ### Machine attribute defaults 44 | The following are the defaults for LL(finite) parser machine attributes 45 | 46 | * `ProductionsTerminalByDefault` is `false` 47 | * `ProductionsRootByDefault` is `false` 48 | * `CategoriesRootByDefault` is `false` 49 | * `AmbiguityResolvedByPrecedence` is set to `false` 50 | 51 | ### Internal lookahead register 52 | The lookahead register computation of Astir's LL(finite) parsers roughly follows the LL(1) FIRST-and-FOLLOW strategy with the obvious dynamicized extension. The lookahead register is computed for all non-terminals (in the sense of LL(k) parsing theory) and then used extensively during the generation process. 53 | 54 | ### Output generation 55 | LL(finite) parsers are generated as predictive recursive-descent parsers, in which every type-forming machine component has a separate parsing function. Patterns and regexes are generated in place, and any referenced components of dependency machines are parsed just-in-time with cached-lookahead. -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 12 | 13 | 14 | Astir 15 | 16 | 17 | 18 | 19 | 20 | 25 | 26 | 27 |
28 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /docs/output_interface.md: -------------------------------------------------------------------------------- 1 | # Output Interface 2 | 3 | The user of the generated output interfaces the classes through the `Machine` and if applicable, `Parser` classes, that can be found in the repository under the `/Resources` folder. The code is well-documented and written in a clear style that allows direct comprehension. 4 | 5 | [Have a look.](https://github.com/lexected/astir) -------------------------------------------------------------------------------- /docs/setting_up.md: -------------------------------------------------------------------------------- 1 | # Setting up 2 | 3 | In order to use Astir you need to have its binaries present at your computer. Astir has been written in C++ and as such it needs to be compiled at some point before it is used. 4 | 5 | ## Cloning the repository with submodules 6 | Astir has no dependencies aside from one compile-time dependency library, [DimCli](https://github.com/gknowles/dimcli), which is also included in the repository as a [git submodule](https://git-scm.com/book/en/v2/Git-Tools-Submodules). Hence, in order to have a successful compilation you need to initialize the submodules prior to compiling. That can be done by either specifying `--recursive` when cloning the repository or by running the following in the local copy. The following are equivalent 7 | 8 | **Recursive clone** 9 | ```git 10 | git clone --recursive https://github.com/lexected/astir.git 11 | ``` 12 | 13 | **Clone and submodule initialization** 14 | ```git 15 | git clone https://github.com/lexected/astir.git 16 | git submodule init 17 | git submodule update 18 | ``` 19 | 20 | ## General Pre-requisites 21 | Astir is written in pure C++17, so you will need a compiler that supports that standard. Those are for example 22 | 23 | * MSVC that came with Visual Studio 2017 version `15.3` or higher. We test on VS 2017 version `16.6.5`. 24 | * GCC 7 or higher. We test on GCC 9 version `9.3.0`. 25 | * Clang 5 or higher. We test on Clang `10.0.0`. 26 | 27 | The platform-specific compilation files can be generated by CMake. There we require CMake 3.15 or higher. We test on `3.16.3`. 28 | 29 | ## Compilation 30 | 31 | ### Windows 32 | If you are running on a Windows machine, there are three options available to you. You can 33 | 34 | + download the compiled binaries from the [GitHub Releases](https://github.com/lexected/astir/releases) page of the repository, 35 | + clone the source code and compile it refering to the native Visual Studio solution; that is to do 36 | ```git 37 | git clone --recursive https://github.com/lexected/astir.git 38 | ``` 39 | followed by a double-click on the `.sln` file to open the solution, and clicking `Build Solution`. 40 | + clone the source code, CMake, and then compile the sources; that is to do 41 | ```bash 42 | git clone --recursive https://github.com/lexected/astir 43 | cd astir 44 | mkdir Build 45 | cd Build 46 | cmake --configure -S ../ -B . 47 | cmake --build . 48 | ``` 49 | 50 | Once done you can test the compilation result with 51 | 52 | ```powershell 53 | ./astir.exe --help 54 | ``` 55 | 56 | Remember that in order to have a working installation the current working directory of the `astir.exe` executable will *need to contain* a copy of the `/astir/Resources` folder as well (root `/` meant with respect to the repository root). 57 | 58 | ### Linux 59 | The only option for Linux machines so far is to use `CMake` and consequently make. For that you will obviously need `build-essential`. 60 | 61 | ```bash 62 | git clone --recursive https://github.com/lexected/astir 63 | cd astir 64 | mkdir Build 65 | cd Build 66 | cmake --configure -S ../ -B . 67 | cmake --build . 68 | ``` 69 | 70 | Once done you can test the binary with 71 | 72 | ```powershell 73 | ./astir --help 74 | ``` 75 | 76 | Remember that in order to have a working installation the current working directory of the `astir` executable will *need to contain* a copy of the `/astir/Resources` folder as well (root `/` meant with respect to the repository root). 77 | 78 | ### MacOSX 79 | Similarly to Linux, the only option so far is to use `CMake` 80 | 81 | ```bash 82 | git clone --recursive https://github.com/lexected/astir 83 | cd astir 84 | mkdir Build 85 | cd Build 86 | cmake --configure -S ../ -B . 87 | cmake --build . 88 | ``` 89 | 90 | You can then test the output binary with 91 | 92 | ```powershell 93 | ./astir --help 94 | ``` 95 | 96 | Remember that in order to have a working installation the current working directory of the `astir` executable will **need to contain** a copy of the `/astir/Resources` folder as well (root `/` meant with respect to the repository root). 97 | 98 | ## Installation 99 | For astir to work one also **needs to** copy the folder `/astir/Resources` folder (root `/` meant with respect to the repository root) into the executables current working directory. That completes the installation. -------------------------------------------------------------------------------- /releases/astir-v1.0.0-Winx64.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexected/astir/6eec0d6e24c498a6ff874c0bb33928651a688e80/releases/astir-v1.0.0-Winx64.zip -------------------------------------------------------------------------------- /releases/astir-v1.0.0-Winx86.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lexected/astir/6eec0d6e24c498a6ff874c0bb33928651a688e80/releases/astir-v1.0.0-Winx86.zip --------------------------------------------------------------------------------