├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── compiler_runner.sh ├── lexical_analyzer ├── CMakeLists.txt ├── lexical_analyzer_generator │ ├── finite_automata │ │ ├── dfa.cpp │ │ ├── dfa.h │ │ ├── dfa_state.cpp │ │ ├── dfa_state.h │ │ ├── finite_automata.cpp │ │ ├── finite_automata.h │ │ ├── nfa.cpp │ │ ├── nfa.h │ │ ├── nfa_state.cpp │ │ ├── nfa_state.h │ │ ├── state.cpp │ │ ├── state.h │ │ └── util │ │ │ ├── util.cpp │ │ │ └── util.h │ ├── lexical_analyzer_generator.cpp │ ├── lexical_analyzer_generator.h │ └── nfa_tools │ │ ├── char_range.cpp │ │ ├── char_range.h │ │ ├── char_set.cpp │ │ ├── char_set.h │ │ ├── lexical_rules.cpp │ │ ├── lexical_rules.h │ │ ├── regex_processor.cpp │ │ └── regex_processor.h ├── lexical_analyzer_runner.sh └── lexical_tokenizer │ ├── lexical_tokenizer.cpp │ ├── lexical_tokenizer.h │ └── token.h ├── main.cpp ├── semantic_analyzer ├── CMakeLists.txt ├── README.md ├── intermediate_code_generation │ ├── README.md │ └── semantic_rules │ │ ├── java_bytecode.h │ │ ├── three_address_code.h │ │ └── zeros_ones_counter.h └── main.cpp ├── syntax_analyzer ├── CMakeLists.txt ├── context_free_grammar │ ├── cfg.cpp │ ├── cfg.h │ ├── cfg_production.cpp │ ├── cfg_production.h │ ├── cfg_rule.cpp │ ├── cfg_rule.h │ ├── cfg_symbol.cpp │ ├── cfg_symbol.h │ └── util │ │ ├── first_set.cpp │ │ ├── first_set.h │ │ ├── follow_set.cpp │ │ └── follow_set.h ├── main.cpp ├── parsing_table.cpp ├── parsing_table.h ├── predictive_parser.cpp ├── predictive_parser.h └── syntax_analyzer_runner.sh └── tests ├── catch_main.cpp ├── lexical_analyzer ├── regression │ ├── test_0 │ │ ├── compiler.log │ │ ├── rules.txt │ │ ├── symbol-table.txt │ │ ├── test_0.txt │ │ ├── token-file.txt │ │ └── transition_table.txt │ ├── test_1 │ │ ├── compiler.log │ │ ├── symbol-table.txt │ │ ├── test_1.txt │ │ └── token-file.txt │ ├── test_2 │ │ ├── compiler.log │ │ ├── symbol-table.txt │ │ ├── test_2.txt │ │ └── token-file.txt │ ├── test_3 │ │ ├── compiler.log │ │ ├── symbol-table.txt │ │ ├── test_3.txt │ │ └── token-file.txt │ ├── test_4 │ │ ├── compiler.log │ │ ├── symbol-table.txt │ │ ├── test_4.txt │ │ └── token-file.txt │ ├── test_5 │ │ ├── compiler.log │ │ ├── symbol-table.txt │ │ ├── test_5.txt │ │ └── token-file.txt │ └── test_6 │ │ ├── compiler.log │ │ ├── symbol-table.txt │ │ ├── test_6.txt │ │ └── token-file.txt └── unit │ └── test_1.cpp ├── lib └── catch.hpp ├── semantic_analyzer └── unit │ ├── java_bytecode.bnf │ ├── test_1.cpp │ ├── three_address_code.bnf │ └── zeros_ones_counter.bnf └── syntax_analyzer ├── TA ├── grammar.txt ├── grammar_modified.txt ├── test1.txt └── test2.txt ├── regression ├── test_0 │ ├── actions_output.log │ ├── cfg.bnf │ ├── code.txt │ ├── debug_stack.log │ ├── rules.txt │ └── transition_table.txt └── test_1 │ ├── actions_output.log │ ├── code.txt │ ├── debug_stack.log │ ├── ll1_cfg.bnf │ ├── rules.txt │ └── transition_table.txt └── unit ├── cfg_parser_tests.cpp ├── cfg_single_line_ll1.bnf ├── cfg_tests.cpp ├── complex_left_rec.bnf ├── first_follow_test.bnf ├── first_set_tests.cpp ├── follow_set_tests.cpp ├── general_test.bnf ├── invalid_ll1.bnf ├── left_rec_left_fact.bnf ├── ll1_cfg_with_synch.bnf ├── ll_1_tests.cpp ├── parsing_table_tests.cpp ├── predictive_parser_tests.cpp ├── ps_cfg_multi_line.bnf ├── ps_cfg_single_line.bnf └── ready_ll1_cfg.bnf /.gitignore: -------------------------------------------------------------------------------- 1 | # output and make files 2 | CMakeCache.txt 3 | Makefile 4 | cmake_install.cmake 5 | compiler.cbp 6 | CMakeFiles/ 7 | 8 | # macOS 9 | .DS_Store 10 | 11 | # vscode 12 | .vscode/ 13 | 14 | # Clion 15 | cmake-build-debug 16 | .idea/ 17 | 18 | # cmake 19 | build/ 20 | 21 | # ctest 22 | Testing/ 23 | 24 | # Prerequisites 25 | *.d 26 | 27 | # Compiled Object files 28 | *.slo 29 | *.lo 30 | *.o 31 | *.obj 32 | 33 | # Precompiled Headers 34 | *.gch 35 | *.pch 36 | 37 | # Compiled Dynamic libraries 38 | *.so 39 | *.dylib 40 | *.dll 41 | 42 | # Fortran module files 43 | *.mod 44 | *.smod 45 | 46 | # Compiled Static libraries 47 | *.lai 48 | *.la 49 | *.a 50 | *.lib 51 | 52 | # Executables 53 | *.exe 54 | *.out 55 | *.app 56 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | 3 | # 4 | # Build Dependencies 5 | # 6 | before_script: 7 | 8 | # 9 | # Build Folder 10 | # 11 | - mkdir build 12 | - cd build 13 | 14 | # # 15 | # # Update / Install CMake 16 | # # 17 | # - | 18 | # if [[ "${TRAVIS_OS_NAME}" == "linux" ]]; then 19 | # mkdir -p external/cmake 20 | # pushd external/cmake 21 | # wget https://cmake.org/files/v3.8/cmake-3.8.0-Linux-x86_64.sh 22 | # chmod +x cmake-*-Linux-x86_64.sh 23 | # ./cmake-*-Linux-x86_64.sh --exclude-subdir --skip-license 24 | # export PATH="${PWD}/bin:$PATH" 25 | # popd 26 | # else 27 | # if ! brew ls --version cmake &>/dev/null; then brew update; brew install cmake; fi 28 | # fi 29 | 30 | # 31 | # Build Matrix 32 | # 33 | matrix: 34 | include: 35 | # 36 | # G++ 5 37 | # 38 | - os: linux 39 | env: 40 | - TEST="G++ 5" 41 | addons: 42 | apt: 43 | sources: 44 | - ubuntu-toolchain-r-test 45 | packages: 46 | - gcc-5 47 | - g++-5 48 | script: 49 | - cmake -DCMAKE_CXX_COMPILER="g++-5" -DBUILD_REG_TEST=ON -DBUILD_UNIT_TEST=ON .. 50 | - make 51 | - make test 52 | after_failure: 53 | - cat /home/travis/build/yakout/compiler/build/Testing/Temporary/LastTest.log 54 | - cat /home/travis/build/yakout/compiler/build/actions_output.log 55 | 56 | 57 | # # 58 | # # G++ 6 59 | # # 60 | # - os: linux 61 | # env: 62 | # - TEST="G++ 6" 63 | # addons: 64 | # apt: 65 | # sources: 66 | # - ubuntu-toolchain-r-test 67 | # packages: 68 | # - gcc-6 69 | # - g++-6 70 | # script: 71 | # - cmake -DCMAKE_CXX_COMPILER="g++-6" .. 72 | # - make 73 | # - make test 74 | 75 | # # 76 | # # Clang 3.8 77 | # # 78 | # - os: linux 79 | # env: 80 | # - TEST="Clang 3.8" 81 | # addons: 82 | # apt: 83 | # sources: 84 | # - ubuntu-toolchain-r-test 85 | # - llvm-toolchain-trusty-4.0 86 | # packages: 87 | # - clang-3.8 88 | # - gcc-6 89 | # - g++-6 90 | # script: 91 | # - cmake -DCMAKE_C_COMPILER=clang-3.8 -DCMAKE_CXX_COMPILER=clang++-3.8 .. 92 | # - make 93 | # - make test 94 | 95 | # # 96 | # # Clang 3.9 97 | # # 98 | # - os: linux 99 | # env: 100 | # - TEST="Clang 3.9" 101 | # addons: 102 | # apt: 103 | # sources: 104 | # - ubuntu-toolchain-r-test 105 | # - llvm-toolchain-trusty-3.9 106 | # packages: 107 | # - clang-3.9 108 | # - gcc-6 109 | # - g++-6 110 | # script: 111 | # - cmake -DCMAKE_C_COMPILER=clang-3.9 -DCMAKE_CXX_COMPILER=clang++-3.9 .. 112 | # - make 113 | # - make test 114 | 115 | # # 116 | # # Clang 4.0 117 | # # 118 | # - os: linux 119 | # env: 120 | # - TEST="Clang 4.0" 121 | # addons: 122 | # apt: 123 | # sources: 124 | # - ubuntu-toolchain-r-test 125 | # - llvm-toolchain-trusty-4.0 126 | # packages: 127 | # - clang-4.0 128 | # - gcc-6 129 | # - g++-6 130 | # script: 131 | # - cmake -DCMAKE_C_COMPILER=clang-4.0 -DCMAKE_CXX_COMPILER=clang++-4.0 .. 132 | # - make 133 | # - make test -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.5) 2 | project (compiler) 3 | 4 | set (CMAKE_CXX_STANDARD 11) 5 | 6 | # ------------------------------------------------------------------------------ 7 | # Compiler flags 8 | # ------------------------------------------------------------------------------ 9 | 10 | # set (CMAKE_CXX_COMPILER g++-7) 11 | 12 | #set (GCC_COMPILE_FLAGS "-Wall -Wextra -g") 13 | 14 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_COMPILE_FLAGS}") 15 | 16 | option (BUILD_REG_TEST "Determines whether to build regression tests or not" OFF) 17 | 18 | # ------------------------------------------------------------------------------ 19 | # Build 20 | # ------------------------------------------------------------------------------ 21 | 22 | add_subdirectory (lexical_analyzer) 23 | add_subdirectory (syntax_analyzer) 24 | add_subdirectory (semantic_analyzer) 25 | 26 | add_executable (compiler main.cpp) 27 | target_link_libraries (compiler liblex libsyntax) 28 | 29 | # ------------------------------------------------------------------------------ 30 | # Valgrind 31 | # ------------------------------------------------------------------------------ 32 | 33 | set (MEMORYCHECK_COMMAND_OPTIONS "${MEMORYCHECK_COMMAND_OPTIONS} --leak-check=full") 34 | set (MEMORYCHECK_COMMAND_OPTIONS "${MEMORYCHECK_COMMAND_OPTIONS} --trace-children=yes") 35 | set (MEMORYCHECK_COMMAND_OPTIONS "${MEMORYCHECK_COMMAND_OPTIONS} --error-exitcode=1") 36 | 37 | # ------------------------------------------------------------------------------ 38 | # Code Coverage (not used yet) 39 | # ------------------------------------------------------------------------------ 40 | 41 | # set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/CMakeModules) 42 | 43 | # include(CodeCoverage) 44 | # setup_target_for_coverage(cov ${TEST_RUNNER} coverage) 45 | 46 | # ------------------------------------------------------------------------------ 47 | # Tests 48 | # ------------------------------------------------------------------------------ 49 | 50 | # Regression Tests 51 | 52 | if (BUILD_REG_TEST) 53 | enable_testing () 54 | 55 | # LEXICAL ANALYZER REGRESION TEST 56 | add_test (reg_test_0_lexical_analyzer_run compiler --lex -g 57 | ${PROJECT_SOURCE_DIR}/tests/lexical_analyzer/regression/test_0/rules.txt 58 | ${PROJECT_SOURCE_DIR}/tests/lexical_analyzer/regression/test_0/test_0.txt) 59 | add_test (reg_test_0_lexical_analyzer_compare_token_file ${CMAKE_COMMAND} -E compare_files 60 | ${PROJECT_SOURCE_DIR}/build/token-file.txt 61 | ${PROJECT_SOURCE_DIR}/tests/lexical_analyzer/regression/test_0/token-file.txt) 62 | add_test (reg_test_0_lexical_analyzer_compare_symbol_table ${CMAKE_COMMAND} -E compare_files 63 | ${PROJECT_SOURCE_DIR}/build/symbol-table.txt 64 | ${PROJECT_SOURCE_DIR}/tests/lexical_analyzer/regression/test_0/symbol-table.txt) 65 | add_test (reg_test_0_lexical_analyzer_compare_log ${CMAKE_COMMAND} -E compare_files 66 | ${PROJECT_SOURCE_DIR}/build/compiler.log 67 | ${PROJECT_SOURCE_DIR}/tests/lexical_analyzer/regression/test_0/compiler.log) 68 | 69 | macro (do_test test_num) 70 | add_test (reg_test_${test_num}_lexical_analyzer_run compiler --lex 71 | ${PROJECT_SOURCE_DIR}/build/transition_table.txt 72 | ${PROJECT_SOURCE_DIR}/tests/lexical_analyzer/regression/test_${test_num}/test_${test_num}.txt) 73 | 74 | add_test (reg_test_${test_num}_lexical_analyzer_compare_token_file ${CMAKE_COMMAND} 75 | -E compare_files ${PROJECT_SOURCE_DIR}/build/token-file.txt 76 | ${PROJECT_SOURCE_DIR}/tests/lexical_analyzer/regression/test_${test_num}/token-file.txt) 77 | 78 | add_test (reg_test_${test_num}_lexical_analyzer_compare_symbol_table ${CMAKE_COMMAND} 79 | -E compare_files ${PROJECT_SOURCE_DIR}/build/symbol-table.txt 80 | ${PROJECT_SOURCE_DIR}/tests/lexical_analyzer/regression/test_${test_num}/symbol-table.txt) 81 | 82 | add_test (reg_test_${test_num}_lexical_analyzer_compare_log ${CMAKE_COMMAND} 83 | -E compare_files ${PROJECT_SOURCE_DIR}/build/compiler.log 84 | ${PROJECT_SOURCE_DIR}/tests/lexical_analyzer/regression/test_${test_num}/compiler.log) 85 | endmacro (do_test) 86 | 87 | do_test (1) 88 | do_test (2) 89 | do_test (3) 90 | do_test (4) 91 | do_test (5) 92 | do_test (6) 93 | 94 | # SYNTAX ANALYZER REGRESION TEST (not ll1 grammar) 95 | add_test (reg_test_0_syntax_analyzer_run compiler --parse 96 | ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_0/transition_table.txt 97 | ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_0/code.txt 98 | ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_0/cfg.bnf) 99 | # add_test (reg_test_0_syntax_analyzer_compare_debug_stack ${CMAKE_COMMAND} -E compare_files 100 | # ${PROJECT_SOURCE_DIR}/build/debug_stack.log 101 | # ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_0/debug_stack.log) 102 | # add_test (reg_test_0_syntax_analyzer_compare_actions_output ${CMAKE_COMMAND} -E compare_files 103 | # ${PROJECT_SOURCE_DIR}/build/actions_output.log 104 | # ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_0/actions_output.log) 105 | 106 | # SYNTAX ANALYZER REGRESION TEST (ll1 grammar) 107 | add_test (reg_test_1_syntax_analyzer_run compiler --parse 108 | ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_1/transition_table.txt 109 | ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_1/code.txt 110 | ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_1/ll1_cfg.bnf) 111 | add_test (reg_test_1_syntax_analyzer_compare_debug_stack ${CMAKE_COMMAND} -E compare_files 112 | ${PROJECT_SOURCE_DIR}/build/debug_stack.log 113 | ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_1/debug_stack.log) 114 | add_test (reg_test_1_syntax_analyzer_compare_actions_output ${CMAKE_COMMAND} -E compare_files 115 | ${PROJECT_SOURCE_DIR}/build/actions_output.log 116 | ${PROJECT_SOURCE_DIR}/tests/syntax_analyzer/regression/test_1/actions_output.log) 117 | 118 | endif (BUILD_REG_TEST) 119 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ahmed Yakout 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /compiler_runner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Checks if "build" directory exists or not. If it exists then it will 4 | # not create another one to avoid unnecessary warning messages else it will create one. 5 | if [ ! -d "build" ]; then 6 | mkdir build 7 | fi 8 | 9 | cd build/ 10 | 11 | # Checks if "CMakeCache.txt" file exists or not. If it exists then it will remove it 12 | # to avoid confusion between multiple building modes. 13 | #if [ -f "CMakeCache.txt" ]; then 14 | # rm CMakeCache.txt 15 | #fi 16 | 17 | # Running cmake on the CMakeLists.txt in the "compiler" (Root) directory. 18 | 19 | # 1- With the option of building regression tests turned ON 20 | # and the option of building unit tests truned ON also. 21 | #cmake -DBUILD_REG_TEST=ON -DBUILD_UNIT_TEST=ON .. 22 | 23 | # 2- With the option of building unit tests turned OFF explicitly. 24 | cmake -DBUILD_REG_TEST=ON -DBUILD_UNIT_TEST=ON .. 25 | 26 | # 3- With both options (Building unit tests as well as regression tests) turned off 27 | # implicitly. NOTING that this option will work correctly as long as we remove 28 | # CMakeCache.txt everytime because if we build this module with -DBUILD_UNIT_TEST=ON 29 | # for example this value will be cached and if we didn't turn this option off 30 | # explicity using -DBUILD_UNIT_TEST=OFF cmake will use the cached value and will 31 | # build the tests accordingly even if we don't need them in our build. 32 | #cmake .. 33 | 34 | make 35 | 36 | # "ctest" or "make test" can be used and both will work correctly as long as we remove 37 | # CMakeCache.txt but "make test" will work correctly either we remove it or not as it 38 | # doesn't use cached test results (Dangerous! maybe misleading) as ctest; It re-builds 39 | # modified tests individually so their results are always up-to-date. 40 | make test 41 | 42 | #ctest -------------------------------------------------------------------------------- /lexical_analyzer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.5) 2 | 3 | set (CMAKE_CXX_STANDARD 11) 4 | 5 | # ------------------------------------------------------------------------------ 6 | # Compiler flags 7 | # ------------------------------------------------------------------------------ 8 | 9 | #set (GCC_COMPILE_FLAGS "-Wall -Wextra -g") 10 | 11 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_COMPILE_FLAGS}") 12 | 13 | option (BUILD_UNIT_TEST "Determines whether to build unit tests or not" OFF) 14 | 15 | # ------------------------------------------------------------------------------ 16 | # Build 17 | # ------------------------------------------------------------------------------ 18 | 19 | 20 | set (HEADER_FILES 21 | lexical_analyzer_generator/finite_automata/finite_automata.h 22 | lexical_analyzer_generator/finite_automata/state.h 23 | lexical_analyzer_generator/finite_automata/nfa.h 24 | lexical_analyzer_generator/finite_automata/dfa.h 25 | lexical_analyzer_generator/finite_automata/nfa_state.h 26 | lexical_analyzer_generator/finite_automata/dfa_state.h 27 | lexical_analyzer_generator/nfa_tools/char_set.h 28 | lexical_analyzer_generator/nfa_tools/char_range.h 29 | lexical_analyzer_generator/nfa_tools/lexical_rules.h 30 | lexical_analyzer_generator/nfa_tools/regex_processor.h 31 | lexical_analyzer_generator/lexical_analyzer_generator.h 32 | lexical_analyzer_generator/finite_automata/util/util.h 33 | lexical_tokenizer/lexical_tokenizer.h 34 | lexical_tokenizer/token.h) 35 | 36 | set (SOURCE_FILES 37 | lexical_analyzer_generator/finite_automata/finite_automata.cpp 38 | lexical_analyzer_generator/finite_automata/state.cpp 39 | lexical_analyzer_generator/finite_automata/nfa_state.cpp 40 | lexical_analyzer_generator/finite_automata/dfa_state.cpp 41 | lexical_analyzer_generator/finite_automata/nfa.cpp 42 | lexical_analyzer_generator/finite_automata/dfa.cpp 43 | lexical_analyzer_generator/nfa_tools/char_set.cpp 44 | lexical_analyzer_generator/nfa_tools/char_range.cpp 45 | lexical_analyzer_generator/nfa_tools/lexical_rules.cpp 46 | lexical_analyzer_generator/nfa_tools/regex_processor.cpp 47 | lexical_analyzer_generator/lexical_analyzer_generator.cpp 48 | lexical_analyzer_generator/finite_automata/util/util.cpp 49 | lexical_tokenizer/lexical_tokenizer.cpp) 50 | 51 | add_library (liblex SHARED ${SOURCE_FILES} ${HEADER_FILES}) 52 | 53 | # ------------------------------------------------------------------------------ 54 | # Tests 55 | # ------------------------------------------------------------------------------ 56 | 57 | # Unit Tests 58 | 59 | if (BUILD_UNIT_TEST) 60 | enable_testing () 61 | add_library (catch_lex INTERFACE) 62 | target_include_directories (catch_lex INTERFACE ../tests/lib/) 63 | add_library (unit_test_catch_lexical STATIC ../tests/catch_main.cpp) 64 | target_link_libraries (unit_test_catch_lexical catch_lex) 65 | target_link_libraries (unit_test_catch_lexical liblex) 66 | 67 | macro (do_test test_name) 68 | add_executable (unit_${test_name}_lexical 69 | ../tests/lexical_analyzer/unit/${test_name}.cpp) 70 | target_link_libraries (unit_${test_name}_lexical unit_test_catch_lexical) 71 | add_test (unit_${test_name}_lexical_analyzer unit_${test_name}_lexical) 72 | endmacro (do_test) 73 | 74 | do_test (test_1) 75 | endif (BUILD_UNIT_TEST) -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/dfa.h: -------------------------------------------------------------------------------- 1 | #ifndef DFA_H 2 | #define DFA_H 3 | 4 | 5 | #include "finite_automata.h" 6 | #include "dfa_state.h" 7 | #include "nfa.h" 8 | 9 | 10 | class dfa : public fa { 11 | public: 12 | explicit dfa(std::shared_ptr start_state, 13 | std::vector> acceptance_states, int total_states); 14 | dfa(); 15 | explicit dfa(std::shared_ptr combined_nfa); 16 | void dfs (std::shared_ptr state, std::vector &visited, 17 | std::shared_ptr vis, bool, std::shared_ptr) override; 18 | 19 | void add_state(std::shared_ptr s); 20 | const std::vector> &get_dfa_states() const; 21 | std::shared_ptr get_unmarked_state(); 22 | bool contains(std::shared_ptr s); 23 | const std::shared_ptr &get_alphabet() const; 24 | void set_alphabet(const std::shared_ptr &alphabet); 25 | std::shared_ptr minimize(); 26 | void draw_trans_table(); 27 | 28 | private: 29 | std::vector> dfa_states; 30 | std::shared_ptr alphabet; 31 | }; 32 | 33 | 34 | #endif // DFA_H 35 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/dfa_state.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "dfa_state.h" 3 | 4 | dfa_state::dfa_state (int id, state_type type, std::shared_ptr st_ip) 5 | : state (id, type, st_ip) 6 | { 7 | dfa_state::marked = false; 8 | dfa_state::state_input = std::make_shared(char_set()); 9 | dfa_state::priority = INT_MAX; 10 | } 11 | 12 | dfa_state::dfa_state(std::set> nfa_states, state_id id) { 13 | // nfa states composing dfa state. 14 | dfa_state::composing_nfa_states = nfa_states; 15 | 16 | // Setting dfa_state state_input 17 | dfa_state::state_input = std::make_shared(char_set()); 18 | for (const auto &state : dfa_state::composing_nfa_states) 19 | { 20 | auto nfa_state_characters = state->get_char_set()->get_characters(); 21 | auto nfa_state_ranges = state->get_char_set()->get_ranges(); 22 | for (auto c : nfa_state_characters) 23 | { 24 | dfa_state::state_input->add_character(c.first); 25 | } 26 | for (const auto &range : nfa_state_ranges) 27 | { 28 | dfa_state::state_input->add_range(range->get_lower(),range->get_upper()); 29 | } 30 | } 31 | 32 | // Setting id. 33 | dfa_state::id = id; 34 | 35 | // Initializing token_class 36 | dfa_state::token_class = std::string(); 37 | 38 | // Determining type. ASSUMPTION: state can't be in more than one type simultaneously. 39 | dfa_state::type = INTERMEDIATE; 40 | for (const auto &curr : nfa_states) 41 | { 42 | if (curr->get_type() == START) 43 | { 44 | dfa_state::type = START; 45 | break; 46 | } 47 | else if (curr->get_type() == ACCEPTANCE) 48 | { 49 | dfa_state::type = ACCEPTANCE; 50 | break; 51 | } 52 | } 53 | dfa_state::marked = false; 54 | dfa_state::priority = INT_MAX; 55 | } 56 | 57 | void dfa_state::insert_transition (std::string input, std::shared_ptr const& state) 58 | { 59 | transitions[input] = std::static_pointer_cast(state); 60 | } 61 | 62 | std::shared_ptr dfa_state::get_next_state(char input) 63 | { 64 | std::string key = state_input->get_string(input); 65 | if (key.empty() && input != '\0') 66 | return nullptr; 67 | return transitions[key]; 68 | } 69 | 70 | std::map> dfa_state::get_transitions() 71 | { 72 | return transitions; 73 | } 74 | 75 | const std::set> &dfa_state::get_composing_nfa_states() const { 76 | return composing_nfa_states; 77 | } 78 | 79 | bool dfa_state::is_marked() const { 80 | return marked; 81 | } 82 | 83 | void dfa_state::set_marked(bool marked) { 84 | dfa_state::marked = marked; 85 | } 86 | 87 | bool dfa_state::equals(std::shared_ptr s) { 88 | return dfa_state::composing_nfa_states == s->get_composing_nfa_states(); 89 | } 90 | 91 | std::shared_ptr dfa_state::copy() { 92 | return std::make_shared(*this); 93 | } 94 | 95 | std::shared_ptr dfa_state::get_next_state(std::string input) { 96 | return transitions[input]; 97 | } 98 | 99 | void dfa_state::set_transitions(const std::map> &transitions) { 100 | dfa_state::transitions = transitions; 101 | } 102 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/dfa_state.h: -------------------------------------------------------------------------------- 1 | #ifndef DFA_STATE_H 2 | #define DFA_STATE_H 3 | 4 | #include "state.h" 5 | #include "nfa_state.h" 6 | #include 7 | 8 | class dfa_state : public state { 9 | private: 10 | std::map > transitions; 11 | std::set> composing_nfa_states; 12 | bool marked; 13 | public: 14 | dfa_state (int, state_type, std::shared_ptr); 15 | explicit dfa_state(std::set> nfa_states, state_id id); 16 | 17 | void insert_transition (std::string input, std::shared_ptr const& state) override; 18 | void set_transitions(const std::map> &transitions); 19 | std::shared_ptr get_next_state (char input); 20 | std::shared_ptr get_next_state (std::string input); 21 | std::map > get_transitions(); 22 | const std::set> &get_composing_nfa_states() const; 23 | bool is_marked() const; 24 | void set_marked(bool marked); 25 | bool equals(std::shared_ptr s); 26 | std::shared_ptr copy() override; 27 | }; 28 | 29 | 30 | #endif // DFA_STATE_H 31 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/finite_automata.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "finite_automata.h" 8 | #include "nfa_state.h" 9 | 10 | fa::fa(std::shared_ptr start_state, std::vector> acceptance_states, int total_states) 11 | { 12 | fa::start_state = std::move(start_state); 13 | fa::acceptance_states = std::move(acceptance_states); 14 | fa::total_states = total_states; 15 | } 16 | 17 | fa::fa(const fa& fa_to_copy) 18 | : start_state(fa_to_copy.get_start_state()->copy()), 19 | acceptance_states(), 20 | total_states(fa_to_copy.get_total_states()) 21 | { 22 | 23 | } 24 | 25 | fa::fa() 26 | { 27 | fa::total_states = 0; 28 | } 29 | 30 | std::string exec(const char* cmd) 31 | { 32 | std::array buffer{}; 33 | std::string result; 34 | std::shared_ptr pipe(popen(cmd, "r"), pclose); 35 | if (!pipe) throw std::runtime_error("popen() failed!"); 36 | while (!feof(pipe.get())) { 37 | if (fgets(buffer.data(), 128, pipe.get()) != nullptr) 38 | result += buffer.data(); 39 | } 40 | return result; 41 | } 42 | 43 | void fa::visualize() 44 | { 45 | std::shared_ptr visualizer(new std::ofstream()); 46 | visualizer->open("fsm.dot"); 47 | *visualizer << 48 | "digraph finite_state_machine {\n" 49 | "\trankdir=LR;\n" 50 | "\tsize=\"500,300\"\n" 51 | "\tnode [shape = doublecircle]; "; 52 | for (const auto &s : acceptance_states) 53 | { 54 | *visualizer << s->get_id() << " "; 55 | } 56 | *visualizer << ";\n" 57 | "\tnode [shape = none] \"\";\n" 58 | "\tnode [shape = circle];\n" 59 | "\"\" -> " << start_state->get_id() << "\n"; 60 | 61 | 62 | int MAX_DFS = 1000; 63 | std::vector visited(MAX_DFS); 64 | dfs(start_state, visited, visualizer, false, nullptr); 65 | *visualizer << "}\n"; 66 | visualizer->close(); 67 | 68 | #ifdef __linux__ 69 | exec("dot -Tpng -O fsm.dot"); 70 | #elif _WIN32 71 | system ("\"C:\\\\Program Files (x86)\\\\Graphviz2.38\\\\bin\\\\dot\" -Tpng -O fsm.dot"); 72 | #elif __APPLE__ 73 | exec("dot -Tpng -O fsm.dot"); 74 | exec("open fsm.dot.png"); 75 | #endif 76 | } 77 | 78 | const std::shared_ptr &fa::get_start_state() const 79 | { 80 | return start_state; 81 | } 82 | 83 | const std::vector> &fa::get_acceptance_states() const 84 | { 85 | return acceptance_states; 86 | } 87 | 88 | int fa::get_total_states() const 89 | { 90 | return total_states; 91 | } 92 | 93 | 94 | void fa::set_start_state(std::shared_ptr new_state) 95 | { 96 | start_state = new_state; 97 | } 98 | 99 | void fa::set_acceptance_states(std::vector> new_acceptance_states) 100 | { 101 | acceptance_states = new_acceptance_states; 102 | } 103 | 104 | void fa::set_total_states(int total_states) 105 | { 106 | fa::total_states = total_states; 107 | } 108 | 109 | void fa::add_acceptance_state(std::shared_ptr s) 110 | { 111 | fa::acceptance_states.push_back(s); 112 | } 113 | 114 | void fa::update_acceptance_states() 115 | { 116 | std::vector visted(static_cast(1000)); // TODO 117 | dfs(start_state, visted, nullptr, true, nullptr); 118 | } 119 | 120 | int fa::get_max_id() { 121 | return max_id; 122 | } 123 | 124 | void fa::set_acceptance_states_priority(int pri) 125 | { 126 | for (auto state : acceptance_states) 127 | { 128 | state->set_priority(pri); 129 | } 130 | } 131 | 132 | void fa::set_acceptance_states_token_class(std::string token_class) 133 | { 134 | for (auto state : acceptance_states) 135 | { 136 | state->set_token_class(token_class); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/finite_automata.h: -------------------------------------------------------------------------------- 1 | #ifndef FINITE_AUTOMATA_H 2 | #define FINITE_AUTOMATA_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "state.h" 9 | 10 | class fa 11 | { 12 | protected: 13 | std::shared_ptr start_state; 14 | std::vector> acceptance_states; 15 | int total_states; 16 | int max_id; 17 | 18 | public: 19 | explicit fa(std::shared_ptr start_state, 20 | std::vector> acceptance_states, int total_states); 21 | fa(const fa&); 22 | fa(); 23 | 24 | virtual void dfs (std::shared_ptr state, std::vector &visited, 25 | std::shared_ptr vis, bool update_acceptance_states, 26 | std::shared_ptr alphabet) = 0; 27 | void visualize(); 28 | 29 | /** 30 | * This function traverse the graph using dfs(,,,update_states = true) 31 | * and updates the acceptance states vector. 32 | */ 33 | void update_acceptance_states(); 34 | 35 | // getters 36 | int get_total_states() const; 37 | const std::vector> &get_acceptance_states() const; 38 | const std::shared_ptr &get_start_state() const; 39 | int get_max_id(); 40 | /** 41 | * @brief returns the alphabet of the nfa. 42 | * @details returns set of all intpu characters to states in nfa. 43 | * @return char_set of all characters and ranges. 44 | */ 45 | // setters 46 | void set_start_state(std::shared_ptr); 47 | void set_acceptance_states(std::vector>); 48 | void add_acceptance_state(std::shared_ptr); 49 | void set_total_states(int total_states); 50 | void set_acceptance_states_priority(int); 51 | void set_acceptance_states_token_class(std::string); 52 | }; 53 | 54 | 55 | #endif // FINITE_AUTOMATA_H 56 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/nfa.h: -------------------------------------------------------------------------------- 1 | #ifndef NFA_H 2 | #define NFA_H 3 | 4 | 5 | #include "finite_automata.h" 6 | #include "nfa_state.h" 7 | 8 | class nfa : public fa { 9 | public: 10 | explicit nfa(std::shared_ptr start_state, 11 | std::vector> acceptance_states, int total_states); 12 | explicit nfa(std::shared_ptr st_ip, int id1, int id2); 13 | explicit nfa(std::shared_ptr c_s); 14 | nfa(); 15 | void dfs (std::shared_ptr state, std::vector &visited, 16 | std::shared_ptr vis, bool update_acceptance_states, 17 | std::shared_ptr alphabet) override; 18 | void unify(std::shared_ptr, bool unifiy_acceptance_states = true); 19 | void concat(std::shared_ptr); 20 | void plus(); 21 | void star(); 22 | 23 | std::shared_ptr copy(); 24 | void renamify(state_id starting_id); 25 | static std::shared_ptr build_epsilon_transition(); 26 | 27 | std::shared_ptr get_alphabet(); 28 | }; 29 | 30 | 31 | #endif // NFA_H 32 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/nfa_state.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "nfa_state.h" 3 | 4 | #include 5 | #include 6 | 7 | nfa_state::nfa_state (int id, state_type type, std::shared_ptr st_ip) 8 | : state (id, type, std::move(st_ip)) 9 | { 10 | nfa_state::priority = INT_MAX; 11 | } 12 | 13 | nfa_state::nfa_state(const nfa_state & s) 14 | : state::state(s), transitions() 15 | { 16 | nfa_state::priority = INT_MAX; 17 | } 18 | 19 | void nfa_state::insert_transition (std::string input, std::shared_ptr const& state) 20 | { 21 | transitions[input].push_back(std::static_pointer_cast(state)); 22 | } 23 | 24 | const std::map>>& 25 | nfa_state::get_transitions() const 26 | { 27 | return transitions; 28 | } 29 | 30 | std::vector> nfa_state::get_next_state(char input) 31 | { 32 | std::string key = state_input->get_string(input); 33 | if (key.empty() && input != '\0') 34 | return std::vector>(); 35 | return transitions[key]; 36 | } 37 | 38 | std::vector> nfa_state::get_next_state(std::string input) { 39 | return transitions[input]; 40 | } 41 | 42 | std::shared_ptr nfa_state::copy() 43 | { 44 | std::shared_ptr nfa_copy(new nfa_state(*this)); 45 | return nfa_copy; 46 | } 47 | 48 | 49 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/nfa_state.h: -------------------------------------------------------------------------------- 1 | #ifndef NFA_STATE_H 2 | #define NFA_STATE_H 3 | 4 | 5 | #include "state.h" 6 | 7 | class nfa_state : public state { 8 | private: 9 | std::map >> transitions; 10 | public: 11 | nfa_state (int id, state_type type, std::shared_ptr st_ip); 12 | nfa_state (const nfa_state&); 13 | void insert_transition (std::string input, std::shared_ptr const& state) override; 14 | std::shared_ptr copy() override; 15 | 16 | // getters 17 | std::vector> get_next_state (char input); 18 | std::vector> get_next_state (std::string input); 19 | const std::map >>& get_transitions() const; 20 | }; 21 | 22 | 23 | #endif // NFA_STATE_H 24 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/state.cpp: -------------------------------------------------------------------------------- 1 | #include "state.h" 2 | 3 | #include 4 | 5 | state::state (state_id _id, state_type _type, std::shared_ptr st_ip) 6 | : id(_id), type(_type), state_input(std::move(st_ip)) 7 | { 8 | 9 | } 10 | 11 | state::state() { 12 | 13 | } 14 | 15 | state::state(const state& s) 16 | : id(s.get_id()), type(s.get_type()), state_input(s.get_char_set()) 17 | { 18 | 19 | } 20 | 21 | const state_id& state::get_id() const 22 | { 23 | return id; 24 | } 25 | 26 | const state_type& state::get_type() const 27 | { 28 | return type; 29 | } 30 | 31 | const std::shared_ptr state::get_char_set() const 32 | { 33 | return state_input; 34 | } 35 | 36 | const std::string state::get_token_class() const { 37 | return token_class; 38 | } 39 | 40 | const int state::get_token_class_priority () const { 41 | return priority; 42 | } 43 | 44 | void state::set_type(state_type new_type) { 45 | type = new_type; 46 | } 47 | 48 | void state::set_id(state_id new_id) { 49 | id = new_id; 50 | } 51 | 52 | void state::set_char_set (std::shared_ptr ch_set) { 53 | state_input = ch_set; 54 | } 55 | 56 | void state::set_token_class (std::string new_token_class) 57 | { 58 | token_class = new_token_class; 59 | } 60 | 61 | void state::set_priority (int pri) 62 | { 63 | priority = pri; 64 | } 65 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/state.h: -------------------------------------------------------------------------------- 1 | #ifndef STATE_H 2 | #define STATE_H 3 | 4 | #include "../nfa_tools/lexical_rules.h" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #define EPSILON "" 11 | typedef unsigned int state_id; 12 | 13 | enum state_type 14 | { 15 | ACCEPTANCE, 16 | START, 17 | INTERMEDIATE 18 | }; 19 | 20 | class state 21 | { 22 | protected: 23 | state_id id; 24 | state_type type; 25 | std::shared_ptr state_input; 26 | std::string token_class; 27 | int priority; 28 | 29 | public: 30 | state (state_id id, state_type type, std::shared_ptr st_ip); 31 | state (const state&); 32 | state (); 33 | 34 | virtual void insert_transition (std::string input, std::shared_ptr const& state) = 0; 35 | /** 36 | * makes a copy of the state which used in copying nfa. 37 | * @return shared_ptr to a copy state. 38 | */ 39 | virtual std::shared_ptr copy() = 0; 40 | 41 | // getters 42 | const state_id& get_id() const; 43 | const state_type& get_type() const; 44 | const std::shared_ptr get_char_set() const; 45 | const std::string get_token_class () const; 46 | const int get_token_class_priority () const; 47 | // setters 48 | void set_type (state_type); 49 | void set_id (state_id); 50 | void set_char_set (std::shared_ptr); 51 | void set_token_class (std::string); 52 | void set_priority (int); 53 | }; 54 | 55 | #endif // STATE_H 56 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/util/util.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by awalid on 3/26/18. 3 | // 4 | 5 | #include 6 | #include 7 | #include "../nfa_state.h" 8 | #include "../dfa_state.h" 9 | 10 | bool same_group(const std::shared_ptr &s1, const std::shared_ptr &s2, 11 | const std::string &inp, std::set>> partition) 12 | { 13 | auto dest_state_1 = s1->get_next_state(inp); 14 | auto dest_state_2 = s2->get_next_state(inp); 15 | if (dest_state_1 == nullptr && dest_state_2 == nullptr) { 16 | return true; 17 | } 18 | if (dest_state_1 == nullptr || dest_state_2 == nullptr) { 19 | return false; 20 | } 21 | for (auto grp : partition) 22 | { 23 | bool found_s1, found_s2; 24 | found_s1 = found_s2 = false; 25 | for (const auto &state : grp) 26 | { 27 | if (state->get_id() == dest_state_1->get_id()) 28 | found_s1 = true; 29 | if (state->get_id() == dest_state_2->get_id()) 30 | found_s2 = true; 31 | } 32 | if (found_s1 && found_s2) 33 | return true; 34 | } 35 | return false; 36 | } 37 | 38 | bool same_group(const std::shared_ptr &s1, const std::shared_ptr &s2, 39 | const char inp, std::set>> partition) 40 | { 41 | auto dest_state_1 = s1->get_next_state(inp); 42 | auto dest_state_2 = s2->get_next_state(inp); 43 | if (dest_state_1 == nullptr && dest_state_2 == nullptr) { 44 | return true; 45 | } 46 | if (dest_state_1 == nullptr || dest_state_2 == nullptr) { 47 | return false; 48 | } 49 | for (auto grp : partition) 50 | { 51 | bool found_s1, found_s2; 52 | found_s1 = found_s2 = false; 53 | for (const auto &state : grp) 54 | { 55 | if (state->get_id() == dest_state_1->get_id()) 56 | found_s1 = true; 57 | if (state->get_id() == dest_state_2->get_id()) 58 | found_s2 = true; 59 | } 60 | if (found_s1 && found_s2) 61 | return true; 62 | } 63 | return false; 64 | } 65 | 66 | namespace util 67 | { 68 | std::set> e_closure(const std::set> nfa_states) 69 | { 70 | std::set> reachable_states = nfa_states; 71 | std::stack> nfa_states_stack; 72 | std::map visited; 73 | for (const auto &state : nfa_states) 74 | { 75 | nfa_states_stack.push(state); 76 | visited[state->get_id()] = true; 77 | } 78 | while (!nfa_states_stack.empty()) 79 | { 80 | auto curr_nfa_state = static_cast &&>(nfa_states_stack.top()); 81 | nfa_states_stack.pop(); 82 | std::vector> vec = curr_nfa_state->get_next_state('\0'); 83 | for (const auto &curr : vec) 84 | { 85 | if (!visited[curr->get_id()]) 86 | { 87 | visited[curr->get_id()] = true; 88 | reachable_states.insert(curr); 89 | nfa_states_stack.push(curr); 90 | } 91 | } 92 | } 93 | return reachable_states; 94 | } 95 | 96 | std::set> move(const std::set> nfa_states, char inp) { 97 | std::set> reachable_states; 98 | for (const auto &state : nfa_states) 99 | { 100 | std::vector> curr_reached = state->get_next_state(inp); 101 | for (const auto &curr : curr_reached) 102 | { 103 | reachable_states.insert(curr); 104 | } 105 | } 106 | return reachable_states; 107 | } 108 | 109 | std::set> move(std::set> nfa_states, std::string inp) 110 | { 111 | std::set> reachable_states; 112 | for (const auto &state : nfa_states) 113 | { 114 | std::vector> curr_reached = state->get_next_state(inp); 115 | for (const auto &curr : curr_reached) 116 | { 117 | reachable_states.insert(curr); 118 | } 119 | } 120 | return reachable_states; 121 | } 122 | 123 | std::set>> 124 | make_partition(std::set>> partition, 125 | const std::shared_ptr &alphabet) 126 | { 127 | std::set>> new_partition; 128 | std::map partitioned; 129 | for (auto group : partition) 130 | { 131 | for (const auto &state : group) 132 | { 133 | if (partitioned[state->get_id()]) 134 | { 135 | continue; 136 | } 137 | std::set> new_group; 138 | new_group.insert(state); 139 | partitioned[state->get_id()] = true; 140 | for (const auto &s : group) 141 | { 142 | if (!partitioned[s->get_id()]) 143 | { 144 | bool same_grp = true; 145 | for (auto inp : alphabet->get_characters()) 146 | { 147 | if (!same_group(state, s, inp.first, partition)) 148 | { 149 | same_grp = false; 150 | break; 151 | } 152 | } 153 | if (same_grp) 154 | { 155 | for (const auto &range : alphabet->get_ranges()) 156 | { 157 | if (!same_group(state, s, range->get_range_string(), partition)) 158 | { 159 | same_grp = false; 160 | break; 161 | } 162 | } 163 | } 164 | if (same_grp && state->get_token_class() == s->get_token_class()) 165 | { 166 | new_group.insert(s); 167 | partitioned[s->get_id()] = true; 168 | } 169 | } 170 | } 171 | new_partition.insert(new_group); 172 | } 173 | } 174 | return new_partition; 175 | } 176 | } 177 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/finite_automata/util/util.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by awalid on 3/26/18. 3 | // 4 | 5 | #include 6 | #include "../nfa_state.h" 7 | #include "../dfa_state.h" 8 | 9 | #ifndef COMPILER_UTIL_H 10 | #define COMPILER_UTIL_H 11 | 12 | namespace util 13 | { 14 | std::set> e_closure(std::set> nfa_states); 15 | std::set> move(std::set> nfa_states, char inp); 16 | std::set> move(std::set> nfa_states, std::string inp); 17 | std::set>> make_partition(std::set>> partition, 18 | const std::shared_ptr &alphabet); 19 | } 20 | 21 | #endif //COMPILER_UTIL_H 22 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/lexical_analyzer_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "lexical_analyzer_generator.h" 2 | 3 | //#include 4 | 5 | #include 6 | #include 7 | 8 | #define PUNCT_CLAUSE_START '[' 9 | #define PUNCT_CLAUSE_END ']' 10 | #define KEYWORD_CLAUSE_START '{' 11 | #define KEYWORD_CLAUSE_END '}' 12 | #define EXPRESSION_ASSIGN ':' 13 | #define DEFINITION_ASSIGN '=' 14 | 15 | #define ESCAPE '\\' 16 | 17 | std::string trim(std::string const& str) 18 | { 19 | if(str.empty()) 20 | return str; 21 | 22 | std::size_t firstScan = str.find_first_not_of(' '); 23 | std::size_t first = firstScan == std::string::npos ? str.length() : firstScan; 24 | std::size_t last = str.find_last_not_of(' '); 25 | return str.substr(first, last-first+1); 26 | } 27 | 28 | std::vector read_file (std::string rules_file) 29 | { 30 | std::vector file_lines; 31 | std::ifstream infile (rules_file); 32 | if (infile.is_open()) 33 | { 34 | std::string line; 35 | while ( getline (infile,line) ) 36 | { 37 | file_lines.push_back (trim(line)); 38 | } 39 | infile.close(); 40 | } 41 | return file_lines; 42 | } 43 | 44 | 45 | 46 | std::shared_ptr build_punctations_nfa (std::string full_line, int order) 47 | { 48 | std::string line; 49 | for (char c : full_line) 50 | { 51 | if (c != '\0' && c != SPACE && c != PUNCT_CLAUSE_START && c != PUNCT_CLAUSE_END) 52 | line += c; 53 | } 54 | std::shared_ptr punct_nfa; 55 | bool first_nfa = true; 56 | for (int i = 0; i < line.length(); i++) 57 | { 58 | std::shared_ptr cur_punct_nfa; 59 | if (line[i] == ESCAPE && i >= line.length() - 2) 60 | { 61 | //// TODO : ERROR ex : [\] or [ 62 | } 63 | else if (line[i] == ESCAPE && i <= line.length() - 2) 64 | { 65 | std::shared_ptr c_s(new char_set(line[i + 1])); 66 | std::shared_ptr 67 | p_nfa(new nfa(c_s)); 68 | cur_punct_nfa = p_nfa; 69 | cur_punct_nfa->set_acceptance_states_priority(order); 70 | cur_punct_nfa->set_acceptance_states_token_class(std::string() + line[i + 1]); 71 | i++; 72 | } 73 | else 74 | { 75 | std::shared_ptr c_s(new char_set(line[i])); 76 | std::shared_ptr p_nfa(new nfa(c_s)); 77 | cur_punct_nfa = p_nfa; 78 | cur_punct_nfa->set_acceptance_states_priority(order); 79 | cur_punct_nfa->set_acceptance_states_token_class(std::string() + line[i]); 80 | } 81 | if (first_nfa) 82 | { 83 | first_nfa = false; 84 | punct_nfa = cur_punct_nfa; 85 | } 86 | else 87 | { 88 | punct_nfa->unify(cur_punct_nfa, false); 89 | } 90 | } 91 | if (line[line.length() - 1] != PUNCT_CLAUSE_END) 92 | { 93 | //// TODO : ERROR 94 | } 95 | return punct_nfa; 96 | } 97 | 98 | std::shared_ptr build_keywords_nfa (std::string line, int order) 99 | { 100 | if (line[line.length() - 1] != KEYWORD_CLAUSE_END 101 | || line.length() <= 2) 102 | { 103 | //// TODO : Error 104 | } 105 | std::shared_ptr keywords_nfa; 106 | bool first_nfa = true; 107 | std::istringstream iss(trim(line.substr(1,line.length() - 2))); 108 | while (iss) { 109 | std::string word; 110 | iss >> word; 111 | if (word.length() == 0) continue; 112 | std::shared_ptr c_s0(new char_set(word[0])); 113 | std::shared_ptr nfa0(new nfa(c_s0)); 114 | for (int i = 1; i < word.length(); i++) 115 | { 116 | std::shared_ptr c_s(new char_set(word[i])); 117 | std::shared_ptr nfa1(new nfa(c_s)); 118 | nfa0->concat(nfa1); 119 | } 120 | nfa0->set_acceptance_states_priority(order); 121 | nfa0->set_acceptance_states_token_class(word); 122 | if (first_nfa) 123 | { 124 | keywords_nfa = nfa0; 125 | first_nfa = false; 126 | } 127 | else 128 | { 129 | keywords_nfa->unify(nfa0, false); 130 | } 131 | } 132 | return keywords_nfa; 133 | } 134 | 135 | std::shared_ptr build_regex_nfa (std::string lhs, std::string rhs, 136 | std::map > &sym_table, 138 | int order) 139 | { 140 | regular_expression regex = {lhs, rhs}; 141 | std::shared_ptr reg_def_nfa = evaluate_regex(regex, sym_table); 142 | sym_table[lhs] = reg_def_nfa; 143 | reg_def_nfa->set_acceptance_states_priority(order); 144 | reg_def_nfa->set_acceptance_states_token_class(lhs); 145 | return reg_def_nfa; 146 | } 147 | 148 | 149 | std::shared_ptr build_combined_nfa (std::vector rules_file_lines) 150 | { 151 | std::map > sym_table; 152 | std::shared_ptr combined_nfa; 153 | bool first_nfa = true; 154 | bool is_def; 155 | int order = 1; 156 | std::vector, bool>> nfas; 157 | for (auto line : rules_file_lines) 158 | { 159 | std::shared_ptr cur_nfa; 160 | if (line[0] == PUNCT_CLAUSE_START) 161 | { 162 | cur_nfa = build_punctations_nfa (line, order - 100); 163 | nfas.push_back({cur_nfa, false}); 164 | } 165 | else if (line[0] == KEYWORD_CLAUSE_START) 166 | { 167 | cur_nfa = build_keywords_nfa (line, order - 100); 168 | nfas.push_back({cur_nfa, false}); 169 | } 170 | else 171 | { 172 | bool invalid_line = false; 173 | for (int i = 0; i < line.length(); i++) 174 | { 175 | if (line[i] == DEFINITION_ASSIGN) 176 | { 177 | is_def = true; 178 | cur_nfa = build_regex_nfa (trim(line.substr(0, i)), trim(line.substr(i+1)), 179 | sym_table, order); 180 | nfas.push_back({cur_nfa, true}); 181 | break; 182 | } 183 | else if (line[i] == EXPRESSION_ASSIGN) 184 | { 185 | cur_nfa = build_regex_nfa (trim(line.substr(0, i)), trim(line.substr(i+1)), 186 | sym_table, order); 187 | nfas.push_back({cur_nfa, false}); 188 | break; 189 | } 190 | } 191 | if (invalid_line) 192 | { 193 | //// TODO : Error 194 | } 195 | } 196 | order++; 197 | } 198 | 199 | 200 | for (auto const& n : nfas) 201 | { 202 | std::shared_ptr cur_nfa = n.first; 203 | // cur_nfa->visualize(); 204 | bool is_def = n.second; 205 | 206 | if (first_nfa && !is_def) 207 | { 208 | combined_nfa = cur_nfa; 209 | first_nfa = false; 210 | } 211 | else 212 | { 213 | if (is_def){} 214 | // combined_nfa->unify(cur_nfa, false); 215 | else 216 | combined_nfa->unify(cur_nfa, false); 217 | // combined_nfa->visualize(); 218 | } 219 | } 220 | return combined_nfa; 221 | } 222 | 223 | 224 | std::shared_ptr lexical_analyzer_generator::get_lexical_analyzer_file (std::string rules_file) 225 | { 226 | // SetConsoleOutputCP( CP_UTF8 ); 227 | std::vector rules_file_lines = read_file (rules_file); 228 | std::shared_ptr combined_nfa = build_combined_nfa(rules_file_lines); 229 | 230 | // MOVE THIS LOGIC FROM HERE. 231 | for (auto s : combined_nfa->get_acceptance_states()) { 232 | if(s->get_type() != ACCEPTANCE) 233 | { 234 | s->set_type(ACCEPTANCE); 235 | } 236 | } 237 | // combined_nfa->visualize(); 238 | return combined_nfa; 239 | } 240 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/lexical_analyzer_generator.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXICAL_ANALYZER_GENERATOR_H 2 | #define LEXICAL_ANALYZER_GENERATOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "nfa_tools/lexical_rules.h" 11 | #include "nfa_tools/regex_processor.h" 12 | #include "finite_automata/nfa.h" 13 | #include "finite_automata/dfa.h" 14 | 15 | std::shared_ptr build_combined_nfa (std::vector rules_file_lines); 16 | 17 | std::shared_ptr build_regex_nfa (std::string lhs, std::string rhs, 18 | std::map > &sym_table, 20 | int order); 21 | 22 | std::shared_ptr build_keywords_nfa (std::string line, int order); 23 | 24 | std::shared_ptr build_punctations_nfa (std::string line, int order); 25 | 26 | 27 | class lexical_analyzer_generator 28 | { 29 | public: 30 | std::shared_ptr get_lexical_analyzer_file(std::string rules_file); 31 | }; 32 | 33 | #endif // LEXICAL_ANALYZER_GENERATOR_H 34 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/nfa_tools/char_range.cpp: -------------------------------------------------------------------------------- 1 | #include "char_range.h" 2 | 3 | char_range::char_range(char l, char u) 4 | { 5 | lower = l; 6 | upper = u; 7 | } 8 | 9 | char_range::char_range(const char_range & c_r): lower(c_r.get_lower()), upper(c_r.get_upper()){ 10 | 11 | } 12 | 13 | bool char_range::is_in_range(char c) 14 | { 15 | return c >= lower && c <= upper; 16 | } 17 | 18 | std::string char_range::get_range_string() 19 | { 20 | return std::string() + lower + "-" + upper; 21 | } 22 | 23 | const char char_range::get_upper() const 24 | { 25 | return upper; 26 | } 27 | 28 | const char char_range::get_lower() const 29 | { 30 | return lower; 31 | } 32 | 33 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/nfa_tools/char_range.h: -------------------------------------------------------------------------------- 1 | #ifndef CHAR_RANGE_H 2 | #define CHAR_RANGE_H 3 | 4 | #include 5 | 6 | class char_range 7 | { 8 | private: 9 | char lower; 10 | char upper; 11 | 12 | public: 13 | explicit char_range(char, char); 14 | explicit char_range(const char_range&); 15 | bool is_in_range (char c); 16 | std::string get_range_string (); 17 | const char get_lower() const ; 18 | const char get_upper() const ; 19 | }; 20 | 21 | #endif // CHAR_RANGE_H 22 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/nfa_tools/char_set.cpp: -------------------------------------------------------------------------------- 1 | #include "char_set.h" 2 | #include "../finite_automata/state.h" 3 | 4 | std::string char_set::get_string(char input) { 5 | if (characters.count(input) != 0) { 6 | return std::string("") + input; 7 | } 8 | for (auto range : ranges) { 9 | if (range->is_in_range(input)) { 10 | return range->get_range_string(); 11 | } 12 | } 13 | 14 | return EPSILON; // empty char_set i.e epsilon transition 15 | } 16 | 17 | char_set::char_set(): ranges(), characters() { 18 | 19 | } 20 | 21 | char_set::char_set(char c) : ranges(), characters() 22 | { 23 | characters[c] = true; 24 | } 25 | 26 | char_set::char_set(const char_set & c_s) 27 | : ranges(), characters(c_s.get_characters()) 28 | { 29 | for (auto const& r : c_s.get_ranges()) 30 | { 31 | ranges.push_back(std::shared_ptr(new char_range(*r))); 32 | } 33 | } 34 | 35 | void char_set::add_character(char c) { 36 | characters[c] = true; 37 | } 38 | 39 | void char_set::add_range(char l, char u) { 40 | ranges.push_back(std::shared_ptr(new char_range(l, u))); 41 | } 42 | 43 | const std::map &char_set::get_characters() const 44 | { 45 | return characters; 46 | } 47 | 48 | const std::vector > &char_set::get_ranges() const 49 | { 50 | return ranges; 51 | } 52 | 53 | bool char_set::is_empty() { 54 | return characters.empty() && ranges.empty(); 55 | } 56 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/nfa_tools/char_set.h: -------------------------------------------------------------------------------- 1 | #ifndef CHAR_SET_H 2 | #define CHAR_SET_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "char_range.h" 9 | 10 | class char_set 11 | { 12 | private: 13 | std::map characters; 14 | std::vector> ranges; 15 | public: 16 | char_set(); 17 | char_set(char); 18 | char_set(const char_set&); 19 | void add_character(char); 20 | void add_range(char, char); 21 | 22 | // getters 23 | std::string get_string (char input); 24 | bool is_empty(); 25 | const std::map &get_characters() const; 26 | const std::vector > &get_ranges() const; 27 | }; 28 | 29 | #endif // CHAR_SET_H 30 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/nfa_tools/lexical_rules.cpp: -------------------------------------------------------------------------------- 1 | #include "lexical_rules.h" 2 | 3 | lexical_rules::lexical_rules () 4 | { 5 | } 6 | 7 | void lexical_rules::add_punct_char (char punct) 8 | { 9 | punctuations.push_back (punct); 10 | } 11 | 12 | void lexical_rules::add_keyword (std::string keyword) 13 | { 14 | keywords.push_back (keyword); 15 | } 16 | 17 | void lexical_rules::add_reg_def (regular_definition definition) 18 | { 19 | definitions.push_back (definition); 20 | } 21 | 22 | void lexical_rules::add_regex (regular_expression regex) 23 | { 24 | expressions.push_back (regex); 25 | } 26 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/nfa_tools/lexical_rules.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXICAL_RULES_H 2 | #define LEXICAL_RULES_H 3 | 4 | #include "char_set.h" 5 | 6 | #include 7 | #include 8 | 9 | struct regular_definition 10 | { 11 | std::string name; 12 | char_set sequence; 13 | }; 14 | 15 | struct regular_expression 16 | { 17 | std::string name; 18 | std::string rhs; 19 | }; 20 | 21 | class lexical_rules 22 | { 23 | private: 24 | std::vector definitions; 25 | std::vector expressions; 26 | std::vector keywords; 27 | std::vector punctuations; 28 | public: 29 | lexical_rules (); 30 | void add_punct_char (char punct); 31 | void add_keyword (std::string keyword); 32 | void add_regex (regular_expression regex); 33 | void add_reg_def (regular_definition definition); 34 | 35 | }; 36 | 37 | #endif // LEXICAL_RULES_H 38 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_generator/nfa_tools/regex_processor.h: -------------------------------------------------------------------------------- 1 | #ifndef REGEX_PROCESSOR_H 2 | #define REGEX_PROCESSOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../finite_automata/nfa.h" 9 | #include "lexical_rules.h" 10 | 11 | 12 | enum STACK_OPERATOR { 13 | CONCAT, 14 | UNION, 15 | STAR, 16 | PLUS, 17 | LEFT_PAREN, 18 | }; 19 | 20 | /// Operators 21 | #define UNION_SYMBOL '|' 22 | #define STAR_SYMBOL '*' 23 | #define PLUS_SYMBOL '+' 24 | #define LEFT_PAREN_SYMBOL '(' 25 | #define RIGHT_PAREN_SYMBOL ')' 26 | 27 | /// Range separator 28 | #define RANGE_SEP '-' 29 | /// Parsing characters 30 | #define EPS '\0' 31 | #define SPACE ' ' 32 | #define ESC '\\' 33 | #define LAMBDA 'L' 34 | 35 | std::shared_ptr evaluate_regex (regular_expression regex, 36 | std::map > &sym_table); 38 | 39 | #endif // REGEX_PROCESSOR_H 40 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_analyzer_runner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Checks if "build" directory exists or not. If it exists then it will 4 | # not create another one to avoid unnecessary warning messages else it will create one. 5 | if [ ! -d "build" ]; then 6 | mkdir build 7 | fi 8 | 9 | cd build/ 10 | 11 | # Checks if "CMakeCache.txt" file exists or not. If it exists then it will remove it 12 | # to avoid confusion between multiple building modes. 13 | #if [ -f "CMakeCache.txt" ]; then 14 | # rm CMakeCache.txt 15 | #fi 16 | 17 | # Running cmake on the CMakeLists.txt in the "lexical_analyzer" (Parent) directory. 18 | 19 | # 1- With the option of building unit tests turned ON. 20 | cmake -DBUILD_UNIT_TEST=ON .. 21 | 22 | # 2- With the option of building unit tests turned OFF explicitly. 23 | #cmake -DBUILD_UNIT_TEST=OFF .. 24 | 25 | # 3- With the option of building unit tests turned OFF implicitly. 26 | # NOTING that this option will work correctly as long as we remove CMakeCache.txt 27 | # everytime because if we build this module with -DBUILD_UNIT_TEST=ON this value will be 28 | # cached and if we didn't turn this option off explicity using -DBUILD_UNIT_TEST=OFF 29 | # cmake will use the cached value and will build the tests accordingly even if we don't 30 | # need them in our build. 31 | #cmake .. 32 | 33 | make 34 | 35 | # "ctest" or "make test" can be used and both will work correctly as long as we remove 36 | # CMakeCache.txt but "make test" will work correctly either we remove it or not as it 37 | # doesn't use cached test results (Dangerous! maybe misleading) as ctest; It re-builds 38 | # modified tests individually so their results are always up-to-date. 39 | make test 40 | 41 | #ctest -------------------------------------------------------------------------------- /lexical_analyzer/lexical_tokenizer/lexical_tokenizer.h: -------------------------------------------------------------------------------- 1 | #ifndef LEXICAL_TOKENIZER_H 2 | #define LEXICAL_TOKENIZER_H 3 | 4 | #include "../lexical_analyzer_generator/finite_automata/dfa_state.h" 5 | #include "../lexical_analyzer_generator/finite_automata/dfa.h" 6 | #include "token.h" 7 | #include 8 | #include 9 | 10 | struct acceptance_state { 11 | int state_id; 12 | std::string token_class; 13 | }; 14 | 15 | class lexical_tokenizer { 16 | private: 17 | int start_state_id; 18 | int total_states; 19 | int matcher_pos; 20 | int prev_matcher_pos; 21 | std::string input_str; 22 | std::shared_ptr dfa_ptr; 23 | std::vector acceptance_states_info; 24 | std::vector transition_table_inputs; 25 | std::shared_ptr parse_lexical_analyzer_machine (char *transition_table_file); 26 | public: 27 | lexical_tokenizer (char *transition_table_file, char *code_file); 28 | lexical_tokenizer (std::shared_ptr &, char *code_file); 29 | int get_next_token (token &); 30 | const std::shared_ptr get_dfa() const; 31 | }; 32 | 33 | #endif // LEXICAL_ANALYZER_H 34 | -------------------------------------------------------------------------------- /lexical_analyzer/lexical_tokenizer/token.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_TOKEN_H 2 | #define COMPILER_TOKEN_H 3 | 4 | #include 5 | 6 | struct token { 7 | std::string lexeme; 8 | std::string token_class; 9 | int str_pos; 10 | }; 11 | 12 | #endif //COMPILER_TOKEN_H 13 | -------------------------------------------------------------------------------- /semantic_analyzer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.5) 2 | 3 | set (CMAKE_CXX_STANDARD 11) 4 | 5 | # ------------------------------------------------------------------------------ 6 | # Compiler flags 7 | # ------------------------------------------------------------------------------ 8 | 9 | #set (GCC_COMPILE_FLAGS "-Wall -Wextra -g") 10 | 11 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_COMPILE_FLAGS}") 12 | 13 | option (BUILD_UNIT_TEST "Determines whether to build unit tests or not" OFF) 14 | 15 | # ------------------------------------------------------------------------------ 16 | # Build 17 | # ------------------------------------------------------------------------------ 18 | 19 | set (HEADER_FILES 20 | intermediate_code_generation/semantic_rules/zeros_ones_counter.h 21 | intermediate_code_generation/semantic_rules/three_address_code.h 22 | intermediate_code_generation/semantic_rules/java_bytecode.h) 23 | 24 | set (SOURCE_FILES) 25 | 26 | add_library (libsemantic SHARED ${SOURCE_FILES} ${HEADER_FILES}) 27 | set_target_properties(libsemantic PROPERTIES LINKER_LANGUAGE CXX) 28 | 29 | add_executable(semantic_main main.cpp) 30 | target_link_libraries(semantic_main libsyntax) 31 | target_link_libraries(semantic_main libsemantic) 32 | 33 | 34 | 35 | # ------------------------------------------------------------------------------ 36 | # Tests 37 | # ------------------------------------------------------------------------------ 38 | 39 | # Unit Tests 40 | 41 | if (BUILD_UNIT_TEST) 42 | enable_testing () 43 | add_library (catch_semantic INTERFACE) 44 | target_include_directories (catch_semantic INTERFACE ../tests/lib/) 45 | add_library (unit_test_catch_semantic STATIC ../tests/catch_main.cpp) 46 | target_link_libraries (unit_test_catch_semantic catch_semantic) 47 | target_link_libraries (unit_test_catch_semantic libsemantic) 48 | target_link_libraries (unit_test_catch_semantic libsyntax) 49 | 50 | macro (do_test test_name) 51 | add_executable (unit_${test_name}_semantic 52 | ../tests/semantic_analyzer/unit/${test_name}.cpp) 53 | target_link_libraries (unit_${test_name}_semantic unit_test_catch_semantic) 54 | add_test (unit_${test_name}_semantic_analyzer unit_${test_name}_semantic) 55 | endmacro (do_test) 56 | 57 | 58 | do_test(test_1) 59 | 60 | endif (BUILD_UNIT_TEST) 61 | -------------------------------------------------------------------------------- /semantic_analyzer/README.md: -------------------------------------------------------------------------------- 1 | > The semantic analyzer uses the syntax tree and the information in the symbol table to check the source program for semantic consistency with the language definition. It also gathers type information and saves it in either the syntax tree or the symbol table, for subsequent use during intermediate-code generation. 2 | 3 | 4 | * Use a recursive-descent parser with one function for each nonterminal. The function for nonterminal A receives the inherited attributes of A as arguments and returns the synthesized attributes of A. 5 | 6 | * Generate code on the fly, using a recursive-descent parser. 7 | 8 | * Implement an SDT in conjunction with an LL-parser. The attributes are kept on the parsing stack, and the rules fetch the needed attributes from known locations on the stack. 9 | 10 | 11 | ## Syntax-Directed Translation 12 | > * We associate information with a language construct by attaching attributes to the grammar symbol(s) representing the construct. A syntax-directed definition specifies the values of attributes by associating semantic rules with the grammar productions. For example, an infix-to-postfix translator might have a production and rule 13 | > * The most general approach to syntax-directed translation is to construct a parse tree or a syntax tree, and then to compute the values of attributes at the nodes of the tree by visiting the nodes of the tree. In many cases, translation can be done during parsing, without building an explicit tree. We shall therefore study a class of syntax-directed translations called "L-attributed translations" (L for left-to-right), which encompass virtually all translations that can be performed during parsing. We also study a smaller class, called "S-attributed translations" (S for synthesized), which can be performed easily in connection with a bottom-up parse. 14 | 15 | 16 | ## Syntax-Directed Definitions 17 | > A syntax-directed definition (SDD) is a context-free grammar together with attributes and rules. Attributes are associated with grammar symbols and rules are associated with productions. If X is a symbol and a is one of its attributes, then we write X.a to denote the value of a at a particular parse-tree node labeled X. If we implement the nodes of the parse tree by records or objects, then the attributes of X can be implemented by data fields in the records that represent the nodes for X. Attributes may be of any kind: numbers, types, table references, or strings, for instance. The strings may even be long sequences of code, say code in the intermediate language used by a compiler. -------------------------------------------------------------------------------- /semantic_analyzer/intermediate_code_generation/README.md: -------------------------------------------------------------------------------- 1 | > * In the process of translating a source program into target code, a compiler may construct one or more intermediate representations, which can have a variety of forms. Syntax trees are a form of intermediate representation; they are commonly used during syntax and semantic analysis. 2 | > * After syntax and semantic analysis of the source program, many compilers generate an explicit low-level or machine-like intermediate representation, which we can think of as a program for an abstract machine. This intermedi- ate representation should have two important properties: it should be easy to produce and it should be easy to translate into the target machine. 3 | -------------------------------------------------------------------------------- /semantic_analyzer/intermediate_code_generation/semantic_rules/three_address_code.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_THREE_ADDRESS_CODE_H 2 | #define COMPILER_THREE_ADDRESS_CODE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | namespace three_address_code 10 | { 11 | // ********************************************************* 12 | // ************** DECLARATIONS ***************************** 13 | // ********************************************************* 14 | 15 | /** 16 | * the generated code. 17 | */ 18 | std::vector code_base; 19 | std::ofstream code_file("code.asm"); 20 | 21 | /** 22 | * to generate labels. 23 | */ 24 | int next_instr = 0; 25 | 26 | std::string gen_label (); 27 | void gen_code(std::string code); 28 | 29 | void backpatch(std::vector lists, std::string jump_target); 30 | std::vector merge(std::vector list1, std::vector list2); 31 | 32 | // ********************************************************* 33 | // ********************** UTILITIES ************************ 34 | // ********************************************************* 35 | 36 | void backpatch(std::vector lists, std::string jump_target) 37 | { 38 | for (auto list : lists) 39 | { 40 | code_base[std::atoi(list.c_str())] += jump_target; 41 | } 42 | } 43 | 44 | /** 45 | * merge two lists. 46 | */ 47 | std::vector merge(std::vector list1, std::vector list2) 48 | { 49 | std::vector res; 50 | res.reserve(list1.size() + list2.size()); // preallocate memory 51 | res.insert(res.end(), list1.begin(), list1.end()); 52 | res.insert(res.end(), list2.begin(), list2.end()); 53 | return res; 54 | } 55 | 56 | 57 | /** 58 | * generates label from the current value of next_instruction (PC). 59 | */ 60 | std::string gen_label() 61 | { 62 | return std::to_string(next_instr++) + ": "; 63 | } 64 | 65 | /** 66 | * generates code. 67 | */ 68 | void gen_code(std::string code) 69 | { 70 | // line by line 71 | code_base.push_back(gen_label() + code); 72 | } 73 | 74 | void write_code() 75 | { 76 | for (auto c : code_base) 77 | { 78 | code_file << c << std::endl; 79 | } 80 | code_file.close(); 81 | } 82 | 83 | 84 | 85 | // ********************************************************* 86 | // ********************** ACTIONS ************************** 87 | // ********************************************************* 88 | 89 | void finalize_action(std::vector &stack) 90 | { 91 | write_code(); 92 | } 93 | 94 | // =================== B -> true ================ 95 | void B_action_true(std::vector &stack) 96 | { 97 | int top = static_cast(stack.size() - 1); 98 | stack[top - 1].add_attribute("truelist", std::to_string(next_instr)); 99 | gen_code("goto "); 100 | } 101 | 102 | // =================== B -> false ================ 103 | void B_action_false(std::vector &stack) 104 | { 105 | int top = static_cast(stack.size() - 1); 106 | stack[top - 1].add_attribute("falselist", std::to_string(next_instr)); 107 | gen_code("goto "); 108 | } 109 | 110 | // =================== B -> E1 #E1 relop E2 #E2 @B ================ 111 | void E1_record_relop(std::vector &stack) 112 | { 113 | int top = static_cast(stack.size() - 1); 114 | stack[top - 4].add_attribute("E1.addr", stack.back().get_attribute("addr")); 115 | } 116 | 117 | void E2_record_relop(std::vector &stack) 118 | { 119 | int top = static_cast(stack.size() - 1); 120 | stack[top - 1].add_attribute("E2.addr", stack.back().get_attribute("addr")); 121 | } 122 | 123 | void B_action_relop(std::vector &stack) 124 | { 125 | int top = static_cast(stack.size() - 1); 126 | stack[top - 1].add_attribute("truelist", std::to_string(next_instr)); 127 | stack[top - 1].add_attribute("falselist", std::to_string(next_instr + 1)); 128 | 129 | gen_code( 130 | "if " 131 | + stack.back().get_attribute("E1.addr").front() 132 | + " relop " 133 | + stack.back().get_attribute("E2.addr").front() 134 | + " goto " 135 | ); 136 | 137 | gen_code( 138 | "goto " 139 | ); 140 | } 141 | 142 | // =================== B -> B1 #B1 || M #M B2 #B2 @B ================ 143 | 144 | void B1_record_or(std::vector &stack) 145 | { 146 | int top = static_cast(stack.size() - 1); 147 | 148 | stack[top - 7].add_attribute("B1.truelist", stack.back().get_attribute("truelist")); 149 | stack[top - 7].add_attribute("B1.falselist", stack.back().get_attribute("falselist")); 150 | } 151 | 152 | void M_record_or(std::vector &stack) 153 | { 154 | int top = static_cast(stack.size() - 1); 155 | 156 | stack[top - 3].add_attribute("M.instr", stack.back().get_attribute("instr")); 157 | } 158 | 159 | void B2_record_or(std::vector &stack) 160 | { 161 | int top = static_cast(stack.size() - 1); 162 | 163 | stack[top - 2].add_attribute("B2.truelist", stack.back().get_attribute("truelist")); 164 | stack[top - 7].add_attribute("B2.falselist", stack.back().get_attribute("falselist")); 165 | } 166 | 167 | void B_action_or(std::vector &stack) 168 | { 169 | int top = static_cast(stack.size() - 1); 170 | 171 | backpatch(stack[top - 1].get_attribute("B1.falselist"), stack.back().get_attribute("M.instr").front()); 172 | 173 | stack[top - 1].add_attribute( 174 | "truelist", 175 | merge(stack[top - 1].get_attribute("B1.truelist"), 176 | stack[top - 1].get_attribute("B2.truelist")) 177 | ); 178 | 179 | stack[top - 1].add_attribute( 180 | "falselist", stack[top - 1].get_attribute("B2.falselist") 181 | ); 182 | } 183 | 184 | 185 | // =================== S -> if ( B #B ) M #M S1 #S1 @S ================ 186 | 187 | void B_record_if(std::vector &stack) 188 | { 189 | int top = static_cast(stack.size() - 1); 190 | stack[top - 6].add_attribute("B.truelist", stack.back().get_attribute("truelist")); 191 | stack[top - 6].add_attribute("B.falselist", stack.back().get_attribute("falselist")); 192 | } 193 | 194 | void M_record_if(std::vector &stack) 195 | { 196 | int top = static_cast(stack.size() - 1); 197 | stack[top - 3].add_attribute("M.instr", stack.back().get_attribute("instr")); 198 | } 199 | 200 | void S1_record_if(std::vector &stack) 201 | { 202 | int top = static_cast(stack.size() - 1); 203 | 204 | stack[top - 1].add_attribute("S1.nextlist", stack.back().get_attribute("nextlist")); 205 | stack[top - 1].add_attribute("S1.code", stack.back().get_attribute("code")); 206 | } 207 | 208 | void S_action_if(std::vector &stack) 209 | { 210 | int top = static_cast(stack.size() - 1); 211 | 212 | backpatch(stack.back().get_attribute("B.truelist"), stack.back().get_attribute("M.instr").front()); 213 | stack[top - 1].add_attribute( 214 | "nextlist", 215 | merge(stack.back().get_attribute("S1.nextlist"), stack.back().get_attribute("B.falselist")) 216 | ); 217 | gen_code(stack.back().get_attribute("S1.code").front()); 218 | } 219 | 220 | 221 | 222 | // =================== E -> 'num' ================ 223 | 224 | void E_action_num(std::vector &stack) 225 | { 226 | int top = static_cast(stack.size() - 1); 227 | stack[top - 1].add_attribute("addr", "num"); 228 | } 229 | 230 | 231 | // =================== M -> '\L' ================ 232 | 233 | void M_action_eps(std::vector &stack) 234 | { 235 | int top = static_cast(stack.size() - 1); 236 | stack[top - 1].add_attribute("instr", std::to_string(next_instr)); 237 | 238 | } 239 | 240 | // =================== S -> 'assign' ================ 241 | 242 | void S_action_assign(std::vector &stack) 243 | { 244 | int top = static_cast(stack.size() - 1); 245 | 246 | stack[top - 1].add_attribute("code", "assign"); 247 | } 248 | 249 | 250 | } // namespace three_address_code 251 | 252 | 253 | 254 | #endif //COMPILER_THREE_ADDRESS_CODE_H 255 | -------------------------------------------------------------------------------- /semantic_analyzer/intermediate_code_generation/semantic_rules/zeros_ones_counter.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_SEMANTIC_RULES_H 2 | #define COMPILER_SEMANTIC_RULES_H 3 | 4 | #include 5 | #include "../../../syntax_analyzer/context_free_grammar/cfg_symbol.h" 6 | 7 | namespace zeros_ones_counter { 8 | 9 | int zeros = 0; 10 | int ones = 0; 11 | 12 | void record_A(std::vector &stack) { 13 | // S{print A.n1, print A.n0} 14 | zeros = std::atoi(stack.back().get_attribute("n0").front().c_str()); 15 | ones = std::atoi(stack.back().get_attribute("n1").front().c_str()); 16 | 17 | std::cout << "number of zeros = " << zeros << std::endl; 18 | std::cout << "number of ones = " << ones << std::endl; 19 | } 20 | 21 | 22 | void record_B(std::vector &stack) { 23 | // stack[top - 1].n0 = n0, stack[top - 1].n1 = n1 24 | stack[stack.size() - 2].add_attribute("n0", stack.back().get_attribute("n0").front()); 25 | stack[stack.size() - 2].add_attribute("n1", stack.back().get_attribute("n1").front()); 26 | } 27 | 28 | void record_B1_0(std::vector &stack) { 29 | // S{[top - 1].n0 = B1.n0 + 1, [top - 1].n1 = B1.n1} 30 | stack[stack.size() - 2]. 31 | add_attribute("n0", std::to_string(std::atoi(stack.back().get_attribute("n0").front().c_str()) + 1)); 32 | stack[stack.size() - 2].add_attribute("n1", stack.back().get_attribute("n1").front()); 33 | } 34 | 35 | void record_B1_1(std::vector &stack) { 36 | stack[stack.size() - 2]. 37 | add_attribute("n1", std::to_string(std::atoi(stack.back().get_attribute("n1").front().c_str()) + 1)); 38 | stack[stack.size() - 2].add_attribute("n0", stack.back().get_attribute("n0").front()); 39 | } 40 | 41 | 42 | void action_1(std::vector &stack) { 43 | // @{[top - 1].n0 = 0, [top - 1].n1 = 0} 44 | stack[stack.size() - 2].add_attribute("n0", "0"); 45 | stack[stack.size() - 2].add_attribute("n1", "0"); 46 | } 47 | } 48 | 49 | #endif //COMPILER_SEMANTIC_RULES_H 50 | -------------------------------------------------------------------------------- /syntax_analyzer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.5) 2 | 3 | set (CMAKE_CXX_STANDARD 11) 4 | 5 | # ------------------------------------------------------------------------------ 6 | # Compiler flags 7 | # ------------------------------------------------------------------------------ 8 | 9 | #set (GCC_COMPILE_FLAGS "-Wall -Wextra -g") 10 | 11 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_COMPILE_FLAGS}") 12 | 13 | option (BUILD_UNIT_TEST "Determines whether to build unit tests or not" OFF) 14 | 15 | # ------------------------------------------------------------------------------ 16 | # Build 17 | # ------------------------------------------------------------------------------ 18 | 19 | 20 | set (HEADER_FILES 21 | context_free_grammar/cfg_production.h 22 | context_free_grammar/cfg_rule.h 23 | context_free_grammar/cfg_symbol.h 24 | context_free_grammar/cfg.h 25 | context_free_grammar/util/first_set.h 26 | context_free_grammar/util/follow_set.h 27 | parsing_table.h 28 | predictive_parser.h 29 | context_free_grammar/util/follow_set.h 30 | ) 31 | 32 | set (SOURCE_FILES 33 | context_free_grammar/cfg_production.cpp 34 | context_free_grammar/cfg_rule.cpp 35 | context_free_grammar/cfg_symbol.cpp 36 | context_free_grammar/cfg.cpp 37 | context_free_grammar/util/first_set.cpp 38 | context_free_grammar/util/follow_set.cpp 39 | parsing_table.cpp 40 | predictive_parser.cpp 41 | ) 42 | 43 | add_library (libsyntax SHARED ${SOURCE_FILES} ${HEADER_FILES}) 44 | 45 | add_executable(syntax_main main.cpp) 46 | target_link_libraries(syntax_main libsyntax) 47 | 48 | # ------------------------------------------------------------------------------ 49 | # Tests 50 | # ------------------------------------------------------------------------------ 51 | 52 | # Unit Tests 53 | 54 | if (BUILD_UNIT_TEST) 55 | enable_testing () 56 | add_library (catch_syntax INTERFACE) 57 | target_include_directories (catch_syntax INTERFACE ../tests/lib/) 58 | add_library (unit_test_catch_syntax STATIC ../tests/catch_main.cpp) 59 | target_link_libraries (unit_test_catch_syntax catch_syntax) 60 | target_link_libraries (unit_test_catch_syntax libsyntax) 61 | 62 | macro (do_test test_name) 63 | add_executable (unit_${test_name}_syntax 64 | ../tests/syntax_analyzer/unit/${test_name}.cpp) 65 | target_link_libraries (unit_${test_name}_syntax unit_test_catch_syntax) 66 | add_test (unit_${test_name}_syntax_analyzer unit_${test_name}_syntax) 67 | endmacro (do_test) 68 | 69 | do_test (cfg_parser_tests) 70 | do_test (cfg_tests) 71 | do_test (ll_1_tests) 72 | do_test (first_set_tests) 73 | do_test (follow_set_tests) 74 | do_test (predictive_parser_tests) 75 | do_test (parsing_table_tests) 76 | endif (BUILD_UNIT_TEST) 77 | -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/cfg.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_CFG_H 2 | #define COMPILER_CFG_H 3 | 4 | #include "cfg_rule.h" 5 | #include "util/first_set.h" 6 | #include "util/follow_set.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define EPS "\\L" 15 | 16 | /** 17 | * utility function used in left factoring 18 | */ 19 | int longest_common_prefix(std::vector prods); 20 | /** 21 | * substitues rule1 in rule2 22 | * 23 | * @return the new resulting rule 24 | */ 25 | cfg_rule substitue(cfg_rule rule1, cfg_rule rule2); 26 | 27 | class cfg { 28 | private: 29 | std::unordered_set non_terminals; 31 | std::unordered_set terminals; 33 | cfg_symbol start_symbol; 34 | std::unordered_map grammar; 36 | std::unordered_map , cfg_symbol::hasher 37 | , cfg_symbol::comparator> cfg_symbol_productions; 38 | 39 | std::unordered_map &)>&> functions; 40 | 41 | void process_first_set(int prod_symbol_index, std::shared_ptr first_set, 42 | cfg_production *prod); 43 | void process_follow_set(cfg_symbol non_terminal, std::shared_ptr follow_set); 44 | 45 | void parse_rule (std::string &, bool); 46 | 47 | void update_rule (std::vector &); 48 | 49 | public: 50 | cfg (); 51 | explicit cfg (std::string); 52 | 53 | void parse (std::string &); 54 | const std::unordered_map, cfg_symbol::hasher, cfg_symbol::comparator> & 55 | get_cfg_symbol_productions() const; 56 | 57 | void print_cfg_symbol_productions_map(); 58 | void set_cfg_symbol_productions( 59 | const std::unordered_map, cfg_symbol::hasher, cfg_symbol::comparator> &cfg_symbol_productions); 60 | const std::unordered_map &get_grammar() const; 61 | void set_non_terminals(const std::unordered_set &); 63 | void 64 | set_grammar(const std::unordered_map &); 65 | std::shared_ptr get_first_set (); 66 | std::shared_ptr get_follow_set (); 67 | void add_rule (cfg_symbol &, std::vector &); 68 | void add_rule (cfg_rule &); 69 | void set_terminals(const std::unordered_set &terminals); 71 | void set_start_symbol(const cfg_symbol &start_symbol); 72 | void add_function(std::string name, std::function &)> func); 73 | 74 | /** Grammar Correction **/ 75 | void left_factor (); 76 | void remove_left_recursion (); 77 | 78 | /** Getters **/ 79 | std::unordered_set get_non_terminals (); 81 | std::unordered_set get_terminals (); 83 | cfg_symbol get_start_symbol (); 84 | std::vector get_rules (); 85 | 86 | bool is_last_symbol(int pos, cfg_production production); 87 | 88 | int get_next_sym_pos(int pos, cfg_production production); 89 | }; 90 | 91 | #endif //COMPILER_CFG_H -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/cfg_production.cpp: -------------------------------------------------------------------------------- 1 | #include "cfg_production.h" 2 | #include "cfg.h" 3 | 4 | cfg_production::cfg_production () 5 | : lhs_symbol (), production_symbols () 6 | { 7 | 8 | } 9 | 10 | cfg_production::cfg_production (cfg_symbol & symbol 11 | , std::vector & symbols) 12 | : lhs_symbol (symbol), production_symbols (symbols) 13 | { 14 | } 15 | 16 | void cfg_production::set_lhs_symbol (cfg_symbol & lhs_symbol) { 17 | cfg_production::lhs_symbol = lhs_symbol; 18 | } 19 | void cfg_production::add_symbol (cfg_symbol & sym) { 20 | production_symbols.push_back (sym); 21 | } 22 | 23 | cfg_symbol cfg_production::get_lhs_symbol () const 24 | { 25 | return lhs_symbol; 26 | } 27 | 28 | std::vector cfg_production::get_symbols () const 29 | { 30 | return production_symbols; 31 | } 32 | 33 | std::string cfg_production::to_string() 34 | { 35 | if (production_symbols.empty()) 36 | return ""; 37 | std::string name(lhs_symbol.get_name()); 38 | name += " -> "; 39 | for (cfg_symbol &s : production_symbols) 40 | { 41 | if (s.get_name() == EPS) 42 | name += "\\L "; 43 | else name += s.get_name() + " "; 44 | } 45 | 46 | return name.substr(0, name.size() - 1); // remove last space 47 | } 48 | 49 | std::string cfg_production::get_rhs_as_string() 50 | { 51 | if (production_symbols.empty()) 52 | return ""; 53 | std::string name(""); 54 | for (cfg_symbol &s : production_symbols) 55 | { 56 | if (s.get_name() == EPS) 57 | name += "\\L "; 58 | else name += s.get_name() + " "; 59 | } 60 | 61 | return name.substr(0, name.size() - 1); // remove last space 62 | } 63 | 64 | void cfg_production::pop_first_symbol() 65 | { 66 | production_symbols.erase(production_symbols.begin()); 67 | } 68 | 69 | int cfg_production::find(cfg_symbol symbol) 70 | { 71 | for (int i = 0; i < production_symbols.size(); i++) 72 | { 73 | if (production_symbols[i] == symbol) 74 | { 75 | return i; 76 | } 77 | } 78 | 79 | return -1; 80 | } 81 | -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/cfg_production.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_CFG_PRODUCTION_H 2 | #define COMPILER_CFG_PRODUCTION_H 3 | 4 | #include "cfg_symbol.h" 5 | #include 6 | 7 | class cfg_production 8 | { 9 | private: 10 | /** Example: A -> B a e A 11 | * lhs_symbol: A 12 | * production_symbols: {B, a, e, A} 13 | */ 14 | cfg_symbol lhs_symbol; 15 | std::vector production_symbols; 16 | public: 17 | cfg_production (); 18 | explicit cfg_production (cfg_symbol &, std::vector &); 19 | 20 | void set_lhs_symbol (cfg_symbol &); 21 | void add_symbol (cfg_symbol &); 22 | 23 | /** 24 | * removes the first symbol in the production. 25 | */ 26 | void pop_first_symbol(); 27 | /** 28 | * checks if the production has the given symbol. 29 | * @returns index of the symbol if found, otherwise returns -1. 30 | */ 31 | int find(cfg_symbol); 32 | 33 | /** Getters **/ 34 | virtual cfg_symbol get_lhs_symbol () const; 35 | virtual std::string to_string(); 36 | virtual std::string get_rhs_as_string(); 37 | virtual std::vector get_symbols () const; 38 | }; 39 | 40 | #endif //COMPILER_CFG_PRODUCTION_H 41 | -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/cfg_rule.cpp: -------------------------------------------------------------------------------- 1 | #include "cfg_rule.h" 2 | 3 | cfg_rule::cfg_rule () 4 | : lhs_symbol (), productions () { 5 | 6 | } 7 | 8 | cfg_rule::cfg_rule (cfg_symbol & symbol 9 | , std::vector & prods) 10 | : lhs_symbol (symbol), productions (prods) { 11 | 12 | } 13 | 14 | const cfg_symbol & cfg_rule::get_lhs_symbol () const { 15 | return lhs_symbol; 16 | } 17 | 18 | std::vector & cfg_rule::get_productions () 19 | { 20 | return productions; 21 | } 22 | 23 | void cfg_rule::empty_productions () 24 | { 25 | productions.clear(); 26 | } 27 | 28 | void cfg_rule::set_productions (std::vector & new_productions) 29 | { 30 | productions = new_productions; 31 | } 32 | 33 | std::string cfg_rule::to_string() { 34 | std::string s = ""; 35 | s += lhs_symbol.get_name() + " -> "; 36 | s += productions[0].get_rhs_as_string(); 37 | 38 | for (int i = 1; i < productions.size(); i++) { 39 | s += " | "; 40 | s += productions[i].get_rhs_as_string(); 41 | } 42 | 43 | return s; 44 | } 45 | 46 | void cfg_rule::add_productions (std::vector & new_productions) { 47 | productions.insert(productions.end(), new_productions.begin(), new_productions.end()); 48 | } 49 | 50 | -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/cfg_rule.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_CFG_RULE_H 2 | #define COMPILER_CFG_RULE_H 3 | 4 | #include "cfg_production.h" 5 | #include 6 | #include 7 | 8 | class cfg_rule { 9 | private: 10 | /** Example: A -> a | b c | d 11 | * lhs_symbol: A 12 | * production_symbols: {{a}, {b, c}, {d}} 13 | */ 14 | cfg_symbol lhs_symbol; 15 | std::vector productions; 16 | public: 17 | cfg_rule (); 18 | explicit cfg_rule (cfg_symbol &, std::vector &); 19 | 20 | /** Getters **/ 21 | const cfg_symbol & get_lhs_symbol () const; 22 | std::vector & get_productions (); 23 | 24 | /** 25 | * convert the rule to readable string 26 | */ 27 | std::string to_string(); 28 | /** 29 | * removes all productions. 30 | */ 31 | void empty_productions (); 32 | 33 | void set_productions (std::vector &); 34 | void add_productions (std::vector &); 35 | }; 36 | 37 | #endif //COMPILER_CFG_RULE_H 38 | -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/cfg_symbol.cpp: -------------------------------------------------------------------------------- 1 | #include "cfg_symbol.h" 2 | 3 | cfg_symbol::cfg_symbol () 4 | : name (), type () 5 | { 6 | 7 | } 8 | 9 | cfg_symbol::cfg_symbol (cfg_symbol_type symbol_type) 10 | : name (), type (symbol_type) 11 | { 12 | 13 | } 14 | 15 | cfg_symbol::cfg_symbol (const std::string & symbol_name, cfg_symbol_type symbol_type) 16 | : name (symbol_name), type (symbol_type) 17 | { 18 | 19 | } 20 | 21 | std::string cfg_symbol::get_name() const 22 | { 23 | return name; 24 | } 25 | 26 | cfg_symbol_type cfg_symbol::get_type() const 27 | { 28 | return type; 29 | } 30 | 31 | void cfg_symbol::set_action(std::function &)> action) 32 | { 33 | cfg_symbol::action = action; 34 | } 35 | 36 | void cfg_symbol::add_attribute(std::string name, std::string value) 37 | { 38 | attributes[name].push_back(value); 39 | } 40 | 41 | void cfg_symbol::add_attribute(std::string name, std::vector values) 42 | { 43 | for (auto value : values) 44 | { 45 | attributes[name].push_back(value); 46 | } 47 | } 48 | 49 | void cfg_symbol::add_inherited_attribute(std::string name, std::string value) { 50 | inherited_attributes[name].push_back(value); 51 | } 52 | 53 | std::vector cfg_symbol::get_attribute(std::string name) 54 | { 55 | return attributes[name]; 56 | } 57 | 58 | const std::function &)> &cfg_symbol::get_action() const 59 | { 60 | return action; 61 | } 62 | 63 | bool cfg_symbol::is_term_or_non_term() { 64 | return cfg_symbol::type == TERMINAL || cfg_symbol::type == NON_TERMINAL; 65 | } 66 | 67 | std::vector cfg_symbol::get_inherited_attribute(std::string name) { 68 | return inherited_attributes[name]; 69 | } 70 | 71 | bool cfg_symbol::has_inherited_attribute() { 72 | return !inherited_attributes.empty(); 73 | } 74 | 75 | std::map> cfg_symbol::get_inherited_attributes() { 76 | return inherited_attributes; 77 | } 78 | 79 | void cfg_symbol::set_inherited_attributes(std::map> inh_attr) { 80 | inherited_attributes = inh_attr; 81 | } 82 | -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/cfg_symbol.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_CFG_SYMBOL_H 2 | #define COMPILER_CFG_SYMBOL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | enum cfg_symbol_type { 12 | TERMINAL, 13 | NON_TERMINAL, 14 | END_MARKER, 15 | SYNCH, 16 | ACTION, 17 | SYNTHESISED 18 | }; 19 | 20 | class cfg_production; 21 | 22 | class cfg_symbol { 23 | private: 24 | /** Example: A 25 | * name: A 26 | * type: NON_TERMINAL 27 | */ 28 | std::string name; 29 | cfg_symbol_type type; 30 | std::function &)> action; 31 | /** 32 | * the values of the key is vector since the attribute can have multi value 33 | * e.g falselist, truelist, nextlist ... 34 | */ 35 | std::map> attributes; 36 | std::map> inherited_attributes; 37 | 38 | public: 39 | cfg_symbol (); 40 | explicit cfg_symbol (cfg_symbol_type); 41 | explicit cfg_symbol (const std::string &, cfg_symbol_type); 42 | 43 | bool operator==(const cfg_symbol& other) const 44 | { 45 | return name == other.get_name() && type == other.get_type(); 46 | } 47 | 48 | struct comparator { 49 | bool operator () (const cfg_symbol & sym_a, const cfg_symbol & sym_b) const { 50 | if (!sym_a.get_name ().compare (sym_b.get_name ())) 51 | return true; 52 | return false; 53 | } 54 | }; 55 | 56 | struct hasher { 57 | std::size_t operator () (const cfg_symbol & sym_a) const { 58 | return std::hash () (sym_a.get_name ()); 59 | } 60 | }; 61 | 62 | 63 | void set_action(std::function &)> action); 64 | void add_attribute(std::string name, std::string value); 65 | void add_attribute(std::string name, std::vector values); 66 | void add_inherited_attribute(std::string name, std::string value); 67 | void set_inherited_attributes(std::map>); 68 | bool is_term_or_non_term(); 69 | 70 | /** Getters **/ 71 | const std::function &)> &get_action() const; 72 | std::vector get_attribute(std::string name); 73 | std::vector get_inherited_attribute(std::string name); 74 | std::map> get_inherited_attributes(); 75 | bool has_inherited_attribute(); 76 | 77 | std::string get_name () const; 78 | cfg_symbol_type get_type () const; 79 | }; 80 | 81 | #endif //COMPILER_CFG_SYMBOL_H 82 | -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/util/first_set.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by awalid on 4/25/18. 3 | // 4 | 5 | #include 6 | #include "first_set.h" 7 | #include "../cfg.h" 8 | 9 | first_set::first_set() = default; 10 | 11 | const std::unordered_map, pair_hasher, pair_comparator>> &first_set::get_set_map() const { 13 | return first_set::my_set; 14 | } 15 | 16 | void first_set::add_symbol(std::string key, cfg_symbol symbol, cfg_production *parent_prod) { 17 | first_set::my_set[key].insert(std::make_pair(symbol, parent_prod)); 18 | } 19 | 20 | bool first_set::has_eps(std::string symbol) { 21 | for(auto sym : first_set::my_set[symbol]) { 22 | if (sym.first.get_name() == EPS) { 23 | return true; 24 | } 25 | } 26 | return false; 27 | } 28 | 29 | bool first_set::empty(std::string key) { 30 | return first_set::my_set[key].empty(); 31 | } 32 | 33 | void first_set::print_to_console() { 34 | std::cout << "============================PRINTING FIRST SET==================================\n"; 35 | for (auto elem : first_set::my_set) { 36 | std::cout << "FIRST(" << elem.first << ") = {"; 37 | int cnt = 0; 38 | for (auto x : elem.second) { 39 | std::cout << x.first.get_name(); 40 | // if (x.second != nullptr) { 41 | // std::cout << ", " << x.second->to_string() << ">"; 42 | // } else { 43 | // std::cout << ">"; 44 | // } 45 | if (cnt != elem.second.size() - 1) { 46 | std::cout << ", "; 47 | } 48 | cnt++; 49 | } 50 | std::cout << "}\n"; 51 | } 52 | std::cout << "=================================================================================\n"; 53 | } 54 | 55 | -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/util/first_set.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by awalid on 4/25/18. 3 | // 4 | 5 | #ifndef COMPILER_CFG_SET_H 6 | #define COMPILER_CFG_SET_H 7 | 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "../cfg_symbol.h" 14 | 15 | class cfg_symbol; 16 | 17 | struct pair_comparator { 18 | bool operator () (const std::pair & p1, 19 | const std::pair & p2) const { 20 | if (!p1.first.get_name ().compare (p2.first.get_name ()) && p1.second == p2.second) 21 | return true; 22 | return false; 23 | } 24 | }; 25 | 26 | struct pair_hasher { 27 | std::size_t operator () (const std::pair & p) const { 28 | return std::hash () (p.first.get_name()) ^ std::hash() (p.second); 29 | } 30 | }; 31 | 32 | class first_set { 33 | public: 34 | first_set(); 35 | 36 | void add_symbol(std::string, cfg_symbol symbol, cfg_production *parent_prod); 37 | const std::unordered_map, pair_hasher, pair_comparator>> &get_set_map() const; 39 | bool has_eps(std::string); 40 | bool empty(std::string); 41 | void print_to_console(); 42 | 43 | private: 44 | std::unordered_map, pair_hasher, pair_comparator>> my_set; 46 | }; 47 | 48 | 49 | #endif //COMPILER_CFG_SET_H 50 | -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/util/follow_set.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by awalid on 4/27/18. 3 | // 4 | 5 | #include 6 | #include "follow_set.h" 7 | #include "../cfg.h" 8 | 9 | follow_set::follow_set() = default; 10 | 11 | void follow_set::add_symbol(std::string key, cfg_symbol symbol) { 12 | follow_set::my_set[key].insert(symbol); 13 | } 14 | 15 | const std::unordered_map> & 16 | follow_set::get_set_map() const { 17 | return follow_set::my_set; 18 | } 19 | 20 | bool follow_set::has_eps(std::string symbol) { 21 | for(auto sym : follow_set::my_set[symbol]) { 22 | if (sym.get_name() == EPS) { 23 | return true; 24 | } 25 | } 26 | return false; 27 | } 28 | 29 | bool follow_set::empty(std::string key) { 30 | return follow_set::my_set[key].empty(); 31 | } 32 | 33 | void follow_set::print_to_console() { 34 | std::cout << "============================PRINTING FOLLOW SET==================================\n"; 35 | for (auto elem : follow_set::my_set) { 36 | std::cout << "FOLLOW(" << elem.first << ") = {"; 37 | int cnt = 0; 38 | for (auto x : elem.second) { 39 | std::cout << x.get_name() ; 40 | if (cnt != elem.second.size() - 1) { 41 | std::cout << ", "; 42 | } 43 | cnt++; 44 | } 45 | std::cout << "}\n"; 46 | } 47 | std::cout << "=================================================================================\n"; 48 | } -------------------------------------------------------------------------------- /syntax_analyzer/context_free_grammar/util/follow_set.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by awalid on 4/27/18. 3 | // 4 | 5 | #ifndef COMPILER_FOLLOW_SET_H 6 | #define COMPILER_FOLLOW_SET_H 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "../cfg_symbol.h" 13 | 14 | class cfg_symbol; 15 | 16 | class follow_set { 17 | public: 18 | follow_set(); 19 | 20 | void add_symbol(std::string, cfg_symbol symbol); 21 | const std::unordered_map> &get_set_map() const; 23 | bool has_eps(std::string); 24 | bool empty(std::string); 25 | void print_to_console(); 26 | private: 27 | std::unordered_map> my_set; 28 | }; 29 | 30 | 31 | #endif //COMPILER_FOLLOW_SET_H 32 | -------------------------------------------------------------------------------- /syntax_analyzer/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "parsing_table.h" 3 | #include "predictive_parser.h" 4 | 5 | int main (int argc, char *argv[]) { 6 | 7 | // cfg cfg_ob = cfg ("../../tests/syntax_analyzer/unit/general_test.bnf"); 8 | // cfg cfg_ob = cfg ("../../tests/syntax_analyzer/unit/complex_left_rec.bnf"); 9 | // cfg cfg_ob = cfg ("../../tests/syntax_analyzer/regression/test_1/ll1_cfg.bnf"); 10 | // cfg cfg_ob = cfg ("../../tests/syntax_analyzer/unit/cfg_single_line_ll1.bnf"); 11 | // cfg cfg_ob = cfg ("../../tests/syntax_analyzer/unit/ps_cfg_single_line.bnf"); 12 | // cfg cfg_ob = cfg ("../tests/syntax_analyzer/unit/first_follow_test.bnf"); 13 | cfg cfg_ob = cfg ("../tests/syntax_analyzer/unit/ready_ll1_cfg.bnf"); 14 | 15 | std::unordered_map grammar; 16 | /** Grammar Checking. **/ 17 | grammar = cfg_ob.get_grammar (); 18 | 19 | 20 | std::cout << "++++ grammar before +++++" << std::endl; 21 | for (auto g : cfg_ob.get_grammar()) 22 | { 23 | std::cout << g.second.to_string() << std::endl; 24 | } 25 | 26 | cfg_ob.left_factor(); 27 | 28 | std::cout << "+++++ grammar after left_factoring ++++" << std::endl; 29 | for (auto g : cfg_ob.get_grammar()) 30 | { 31 | std::cout << g.second.to_string() << std::endl; 32 | } 33 | 34 | cfg_ob.remove_left_recursion(); 35 | 36 | std::cout << "+++++ grammar after remove_left_recursion ++++" << std::endl; 37 | for (auto g : cfg_ob.get_grammar()) 38 | { 39 | std::cout << g.second.to_string() << std::endl; 40 | } 41 | 42 | std::cout << "0" << std::endl; 43 | std::shared_ptr p_table = std::make_shared(cfg_ob); 44 | 45 | std::cout << "1" << std::endl; 46 | 47 | cfg_ob.print_cfg_symbol_productions_map(); 48 | cfg_ob.get_first_set()->print_to_console(); 49 | cfg_ob.get_follow_set()->print_to_console(); 50 | 51 | std::cout << "2" << std::endl; 52 | 53 | p_table->draw ("parsing_table.txt"); 54 | 55 | std::vector input_buffer {"int", 56 | "id", 57 | ";", 58 | "if", 59 | "(", 60 | "id", 61 | "relop", 62 | "num", 63 | ")", 64 | "{", 65 | "id", 66 | "assign", 67 | "num", 68 | ";", 69 | "}", "else", "{","}", 70 | // "}", 71 | "$"}; 72 | 73 | predictive_parser parser(cfg_ob.get_start_symbol(), p_table, input_buffer); 74 | parser.parse(); 75 | 76 | std::vector stack = parser.get_debug_stack(); 77 | std::vector derivation = parser.get_derivations(); 78 | 79 | parser.write_debug_stack("debug_stack.log"); 80 | parser.write_derivations("actions_output.log"); 81 | 82 | for (auto s : stack) 83 | { 84 | std::cout << s << std::endl; 85 | } 86 | 87 | std::cout << "***********" << std::endl; 88 | std::cout << "***********" << std::endl; 89 | std::cout << "***********" << std::endl; 90 | std::cout << "***********" << std::endl; 91 | 92 | for (auto d : derivation) 93 | { 94 | std::cout << d << std::endl; 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /syntax_analyzer/parsing_table.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_PARSING_TABLE_H 2 | #define COMPILER_PARSING_TABLE_H 3 | 4 | #include "context_free_grammar/cfg.h" 5 | #include "context_free_grammar/cfg_symbol.h" 6 | #include "context_free_grammar/cfg_rule.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | /** 15 | Parsing table that contains state machine to parse input program. 16 | */ 17 | 18 | struct parsing_table_comparator { 19 | bool operator()(const std::pair &p1, 20 | const std::pair &p2) const { 21 | if (!p1.first.compare (p2.first) && p1.second == p2.second) 22 | return true; 23 | return false; 24 | } 25 | }; 26 | 27 | struct parsing_table_hasher { 28 | std::size_t operator()(const std::pair &p) const { 29 | return std::hash()(p.first) ^ std::hash()(p.second); 30 | } 31 | }; 32 | 33 | class parsing_table 34 | { 35 | private: 36 | /** Grammar that the parsing table represents.*/ 37 | cfg grammar; 38 | /** First and Follow sets for the grammar above.*/ 39 | // first_follow_sets first_follow; 40 | /** Parsing Table accessed by rule (NON_TERMINAL) and string (TOKEN) to 41 | get cfg_production used or error.*/ 42 | std::unordered_map , cfg_production, 43 | parsing_table_hasher, parsing_table_comparator> table; 44 | 45 | /** 46 | * This function builds the parsing table for the previously defined grammar. 47 | * It fills the table map defined above by productions using the first and. 48 | * follow sets supplied before. 49 | */ 50 | void build(first_set, follow_set); 51 | 52 | /** 53 | * This function builds the parsing table for the previously defined grammar. 54 | */ 55 | void build(); 56 | public: 57 | /** 58 | * Constructor for parsing table. 59 | * @param cfg grammar to build parsing table for 60 | * @param first_follow_sets to be used in building parsing table 61 | */ 62 | parsing_table (cfg); 63 | parsing_table (cfg, first_set, follow_set); 64 | /** 65 | * Constructor for parsing table, that takes the map with the productions entries in it. 66 | * used for testing. 67 | */ 68 | parsing_table (std::unordered_map , cfg_production, 69 | parsing_table_hasher, parsing_table_comparator> table); 70 | /** 71 | * Default Contructor for parsing table. 72 | */ 73 | parsing_table (); 74 | /** 75 | * 76 | * This function returns the production corresponding to the given cfg_rule. 77 | * (NON_TERMINAL) and given string (TOKEN) from the parsing table. 78 | * @returns cfg_production or NULL if none found to denote Error 79 | * 80 | */ 81 | cfg_production get_production (std::string, std::string); 82 | /** 83 | * draw the parsing table into given file name. 84 | */ 85 | void draw(std::string file_name); 86 | }; 87 | 88 | 89 | 90 | #endif //COMPILER_PARSING_TABLE_H 91 | -------------------------------------------------------------------------------- /syntax_analyzer/predictive_parser.cpp: -------------------------------------------------------------------------------- 1 | #include "predictive_parser.h" 2 | 3 | predictive_parser::predictive_parser (cfg_symbol start_symbol, std::shared_ptr ll1_table, 4 | std::vector lex_tokens) 5 | : input_buffer(lex_tokens), p_table(ll1_table), debug_stack(), output(), parser_stack() 6 | { 7 | errors_count = 0; 8 | init_stack(start_symbol); 9 | } 10 | 11 | predictive_parser::predictive_parser (cfg_symbol start_symbol, std::shared_ptr ll1_table, 12 | std::vector token_vec) 13 | : p_table(ll1_table), debug_stack(), output(), parser_stack() 14 | { 15 | errors_count = 0; 16 | init_stack(start_symbol); 17 | 18 | for (auto tok : token_vec) 19 | { 20 | input_buffer.push_back(tok.token_class); 21 | lex_values.push_back(tok.lexeme); 22 | } 23 | 24 | } 25 | 26 | predictive_parser::predictive_parser(char *cfg_file, std::vector token_vec) 27 | { 28 | errors_count = 0; 29 | for (auto tok : token_vec) 30 | { 31 | input_buffer.push_back(tok.token_class); 32 | } 33 | input_buffer.push_back("$"); 34 | 35 | cfg grammar(cfg_file); 36 | 37 | grammar.left_factor(); 38 | grammar.remove_left_recursion(); 39 | 40 | std::shared_ptr ll1_table = std::make_shared(grammar); 41 | p_table = ll1_table; 42 | init_stack(grammar.get_start_symbol()); 43 | } 44 | 45 | void predictive_parser::init_stack (cfg_symbol start_sym) 46 | { 47 | cfg_symbol end_marker("$", END_MARKER); 48 | parser_stack.push_back(end_marker); 49 | parser_stack.push_back(start_sym); 50 | } 51 | 52 | std::string predictive_parser::dump_stack () 53 | { 54 | std::stack temp_stack; 55 | std::string stack_str = ""; 56 | while (!parser_stack.empty()) 57 | { 58 | if (parser_stack.back().get_name() != "$") { 59 | stack_str += parser_stack.back().get_name() + " "; 60 | } 61 | temp_stack.push(parser_stack.back()); 62 | parser_stack.pop_back(); 63 | } 64 | 65 | while (!temp_stack.empty()) 66 | { 67 | parser_stack.push_back(temp_stack.top()); 68 | temp_stack.pop(); 69 | } 70 | 71 | return stack_str; 72 | } 73 | 74 | std::vector predictive_parser::get_debug_stack () 75 | { 76 | return debug_stack; 77 | } 78 | 79 | void predictive_parser::write_prod (cfg_production prod) 80 | { 81 | output.push_back (prod.to_string()); 82 | } 83 | 84 | 85 | std::vector predictive_parser::get_derivations () 86 | { 87 | return output; 88 | } 89 | 90 | int predictive_parser::parse() 91 | { 92 | int i = 0; 93 | while (!parser_stack.empty()) 94 | { 95 | std::string cur_token = input_buffer[i]; 96 | cfg_symbol stack_top = parser_stack.back(); 97 | debug_stack.push_back(dump_stack()); 98 | 99 | if (stack_top.get_type() == NON_TERMINAL) 100 | { 101 | cfg_production prod = p_table->get_production(stack_top.get_name(), cur_token); 102 | if (prod.get_lhs_symbol().get_type() == SYNCH) 103 | { 104 | parser_stack.pop_back(); 105 | output.push_back("SYNCH (pop_stack)"); 106 | } 107 | else if (prod.get_symbols().empty()) 108 | { 109 | // ERROR! discard curr_tok 110 | if (cur_token == "$") { 111 | output.push_back("END! Error: (illegal " + stack_top.get_name() + ") - discard " + cur_token); 112 | errors_count++; 113 | break; 114 | } 115 | output.push_back("Error: (illegal " + stack_top.get_name() + ") - discard " + cur_token); 116 | errors_count++; 117 | i++; 118 | } 119 | else 120 | { 121 | write_prod(prod); 122 | std::vector symbols = prod.get_symbols(); 123 | std::reverse(symbols.begin(), symbols.end()); 124 | 125 | if (prod.get_symbols().front().get_name() != EPS) 126 | { 127 | bool flag = parser_stack.back().has_inherited_attribute(); 128 | std::map> inh_attr; 129 | if (flag) { 130 | inh_attr = parser_stack.back().get_inherited_attributes(); 131 | } 132 | 133 | parser_stack.pop_back(); 134 | for (cfg_symbol sym : symbols) { 135 | parser_stack.push_back(sym); 136 | } 137 | 138 | if (flag) { 139 | parser_stack.back().set_inherited_attributes(inh_attr); 140 | } 141 | } 142 | else if (prod.get_symbols().front().get_name() == EPS && prod.get_symbols().size() > 1) 143 | { 144 | // ACTIONS AND SYNTHESISED ATTRIBUTES. 145 | parser_stack.pop_back(); 146 | for (int j = 0; j < symbols.size() - 1; ++j) { 147 | parser_stack.push_back(symbols[j]); 148 | } 149 | } 150 | else 151 | { 152 | // EPSILON PRODUCTION 153 | parser_stack.pop_back(); 154 | } 155 | } 156 | } 157 | else if (stack_top.get_type() == TERMINAL) 158 | { 159 | if (cur_token == stack_top.get_name()) 160 | { 161 | parser_stack.pop_back(); 162 | output.push_back("match: " + cur_token); 163 | if (!parser_stack.empty() && i < lex_values.size()) { 164 | parser_stack.back().add_attribute("lexval", lex_values[i]); 165 | } 166 | i++; 167 | } 168 | else 169 | { 170 | // ERROR! insert curr_tok 171 | output.push_back("Error: (missing " + stack_top.get_name() + ") - inserted."); 172 | errors_count++; 173 | parser_stack.pop_back(); 174 | } 175 | } 176 | else if (stack_top.get_type() == END_MARKER) 177 | { 178 | if (cur_token == stack_top.get_name()) 179 | { 180 | parser_stack.pop_back(); 181 | output.push_back("accept"); 182 | break; 183 | } else { 184 | output.push_back("Error: (illegal " + stack_top.get_name() + " - discard " + cur_token); 185 | errors_count++; 186 | i++; 187 | } 188 | } 189 | else if (stack_top.get_type() == SYNCH) // TODO this will never happen 190 | { 191 | if (cur_token == stack_top.get_name()) 192 | { 193 | parser_stack.pop_back(); 194 | } 195 | } 196 | else if (stack_top.get_type() == ACTION) 197 | { 198 | stack_top.get_action()(parser_stack); 199 | parser_stack.pop_back(); 200 | } 201 | else if (stack_top.get_type() == SYNTHESISED) 202 | { 203 | stack_top.get_action()(parser_stack); 204 | parser_stack.pop_back(); 205 | } 206 | } 207 | return errors_count; 208 | } 209 | 210 | void predictive_parser::write_debug_stack(std::string file_name) 211 | { 212 | std::ofstream output_file; 213 | output_file.open(file_name); 214 | 215 | for (auto s : debug_stack) 216 | { 217 | output_file << s << std::endl; 218 | } 219 | output_file.close(); 220 | } 221 | 222 | void predictive_parser::write_derivations(std::string file_name) 223 | { 224 | std::ofstream output_file; 225 | output_file.open(file_name); 226 | 227 | for (auto derivation : output) 228 | { 229 | output_file << derivation << std::endl; 230 | } 231 | 232 | output_file.close(); 233 | } 234 | -------------------------------------------------------------------------------- /syntax_analyzer/predictive_parser.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_PREDICTIVE_PARSER_H 2 | #define COMPILER_PREDICTIVE_PARSER_H 3 | 4 | #include "context_free_grammar/cfg_production.h" 5 | #include "parsing_table.h" 6 | #include "../lexical_analyzer/lexical_tokenizer/token.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | class predictive_parser 15 | { 16 | private: 17 | std::vector parser_stack; 18 | std::vector debug_stack; // used for debugging 19 | std::vector input_buffer; 20 | std::vector lex_values; 21 | std::vector output; 22 | std::shared_ptr p_table; 23 | /** 24 | * parsing errors counter 25 | */ 26 | int errors_count; 27 | /** 28 | * Writes the action taken in each step (derivation steps) into the output vector. 29 | */ 30 | void write_prod (cfg_production prod); 31 | /** 32 | * Initialize the stack with the end marker ($) and the start symbol. 33 | */ 34 | void init_stack (cfg_symbol); 35 | /** 36 | * converts the current stack contents into string. 37 | */ 38 | std::string dump_stack(); 39 | public: 40 | /** 41 | * Takes the start_symbol, parsing table, and the input buffer to be parsed. 42 | * 43 | * @param input_buffer the sequence of tokens produced from lexical analyzer. 44 | * // TODO change the input_buffer to lexical class and call get_next_token() function on it, rather 45 | * than fetching all the source code tokens into memory. 46 | * 47 | * @param start_symbol grammer's start symbol 48 | * @param p_table The parsing table constructed from the context free grammer production rules. 49 | * 50 | */ 51 | predictive_parser (cfg_symbol start_symbol, std::shared_ptr p_table, 52 | std::vector input_buffer); 53 | 54 | /** 55 | * for testing. 56 | * @param start_symbol 57 | * @param ll1_table 58 | * @param token_vec 59 | */ 60 | predictive_parser (cfg_symbol start_symbol, std::shared_ptr ll1_table, 61 | std::vector token_vec); 62 | /** 63 | * This is called by the compiler main 64 | * @param cfg_file 65 | * @param token_vec 66 | */ 67 | predictive_parser (char *cfg_file, std::vector token_vec); 68 | /** 69 | * returns the debug stack. 70 | */ 71 | std::vector get_debug_stack (); 72 | /** 73 | * returns the derivations steps. 74 | */ 75 | std::vector get_derivations (); 76 | /** 77 | * writes the derivations steps into output file. 78 | */ 79 | void write_derivations (std::string); 80 | /** 81 | * writes the debug stack steps into output file. 82 | */ 83 | void write_debug_stack (std::string); 84 | /** 85 | * Start parsing 86 | * @returns number of errors occured while parsing. 87 | */ 88 | int parse(); 89 | 90 | }; 91 | 92 | #endif //COMPILER_PREDICTIVE_PARSER_H -------------------------------------------------------------------------------- /syntax_analyzer/syntax_analyzer_runner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Checks if "build" directory exists or not. If it exists then it will 4 | # not create another one to avoid unnecessary warning messages else it will create one. 5 | if [ ! -d "build" ]; then 6 | mkdir build 7 | fi 8 | 9 | cd build/ 10 | 11 | # Checks if "CMakeCache.txt" file exists or not. If it exists then it will remove it 12 | # to avoid confusion between multiple building modes. 13 | #if [ -f "CMakeCache.txt" ]; then 14 | # rm CMakeCache.txt 15 | #fi 16 | 17 | # Running cmake on the CMakeLists.txt in the "syntax_analyzer" (Parent) directory. 18 | 19 | # 1- With the option of building unit tests turned ON. 20 | cmake -DBUILD_UNIT_TEST=ON .. 21 | 22 | # 2- With the option of building unit tests turned OFF explicitly. 23 | #cmake -DBUILD_UNIT_TEST=OFF .. 24 | 25 | # 3- With the option of building unit tests turned OFF implicitly. 26 | # NOTING that this option will work correctly as long as we remove CMakeCache.txt 27 | # everytime because if we build this module with -DBUILD_UNIT_TEST=ON this value will be 28 | # cached and if we didn't turn this option off explicity using -DBUILD_UNIT_TEST=OFF 29 | # cmake will use the cached value and will build the tests accordingly even if we don't 30 | # need them in our build. 31 | #cmake .. 32 | 33 | make 34 | 35 | # "ctest" or "make test" can be used and both will work correctly as long as we remove 36 | # CMakeCache.txt but "make test" will work correctly either we remove it or not as it 37 | # doesn't use cached test results (Dangerous! maybe misleading) as ctest; It re-builds 38 | # modified tests individually so their results are always up-to-date. 39 | make test 40 | 41 | #ctest -------------------------------------------------------------------------------- /tests/catch_main.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include "lib/catch.hpp" 3 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_0/compiler.log: -------------------------------------------------------------------------------- 1 | Match State Lexeme Token Class Position 2 | ----------- ------ ----------- -------- 3 | [MATCHED] int int 3 4 | [MATCHED] sum id 7 5 | [MATCHED] , , 9 6 | [MATCHED] count id 15 7 | [MATCHED] , , 17 8 | [MATCHED] pass id 22 9 | [MATCHED] , , 24 10 | [MATCHED] mnt id 29 11 | [MATCHED] ; ; 30 12 | [MATCHED] while while 36 13 | [MATCHED] ( ( 38 14 | [MATCHED] pass id 42 15 | [MATCHED] != relop 45 16 | [MATCHED] 10 num 48 17 | [MATCHED] ) ) 49 18 | [MATCHED] { { 51 19 | [MATCHED] pass id 58 20 | [MATCHED] = assign 60 21 | [MATCHED] pass id 65 22 | [MATCHED] + addop 67 23 | [MATCHED] 1 num 69 24 | [MATCHED] ; ; 71 25 | [MATCHED] } } 75 26 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_0/rules.txt: -------------------------------------------------------------------------------- 1 | letter = a-z | A-Z 2 | digit = 0-9 3 | id: letter (letter|digit)* 4 | digits = digit+ 5 | {boolean int float} 6 | num: digit+ | digit+ . digits ( \L | E digits) 7 | relop: \=\= | !\= | > | >\= | < | <\= 8 | assign: \= 9 | { if else while } 10 | [ ; , \( \) { } ] 11 | addop: \+ | \- 12 | mulop: \* | / -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_0/symbol-table.txt: -------------------------------------------------------------------------------- 1 | int int 2 | sum id 3 | , , 4 | count id 5 | pass id 6 | mnt id 7 | ; ; 8 | while while 9 | ( ( 10 | != relop 11 | 10 num 12 | ) ) 13 | { { 14 | = assign 15 | + addop 16 | 1 num 17 | } } 18 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_0/test_0.txt: -------------------------------------------------------------------------------- 1 | int sum , count , pass , 2 | mnt; while (pass != 10) 3 | { 4 | pass = pass + 1 ; 5 | } 6 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_0/token-file.txt: -------------------------------------------------------------------------------- 1 | int 2 | id 3 | , 4 | id 5 | , 6 | id 7 | , 8 | id 9 | ; 10 | while 11 | ( 12 | id 13 | relop 14 | num 15 | ) 16 | { 17 | id 18 | assign 19 | id 20 | addop 21 | num 22 | ; 23 | } 24 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_0/transition_table.txt: -------------------------------------------------------------------------------- 1 | Total States: 44 2 | Start State(s): {0} 3 | Acceptance State(s): 40 4 | {1} ; 5 | {2} relop 6 | {3} assign 7 | {4} addop 8 | {5} , 9 | {6} mulop 10 | {8} ( 11 | {9} ) 12 | {10} id 13 | {11} id 14 | {12} id 15 | {13} id 16 | {14} id 17 | {15} id 18 | {16} { 19 | {17} } 20 | {18} num 21 | {19} relop 22 | {20} id 23 | {21} id 24 | {22} id 25 | {23} if 26 | {24} id 27 | {25} id 28 | {27} id 29 | {28} id 30 | {29} id 31 | {30} int 32 | {31} id 33 | {32} num 34 | {33} id 35 | {34} else 36 | {35} id 37 | {36} id 38 | {38} id 39 | {39} float 40 | {40} while 41 | {41} num 42 | {42} id 43 | {43} boolean 44 | State ! ( ) * + , - . / ; < = > E a b e f h i l n o s t w { } 0-9 A-Z a-z 45 | {0} {7} {8} {9} {6} {4} {5} {4} {-} {6} {1} {2} {3} {2} {10} {10} {11} {12} {13} {10} {14} {10} {10} {10} {10} {10} {15} {16} {17} {18} {10} {10} 46 | {1} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 47 | {2} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 48 | {3} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 49 | {4} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 50 | {5} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 51 | {6} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 52 | {7} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 53 | {8} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 54 | {9} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 55 | {10} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 56 | {11} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {20} {10} {10} {10} {-} {-} {10} {10} {10} 57 | {12} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {21} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 58 | {13} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {22} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 59 | {14} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {23} {10} {10} {10} {24} {10} {10} {10} {10} {-} {-} {10} {10} {10} 60 | {15} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {25} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 61 | {16} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 62 | {17} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 63 | {18} {-} {-} {-} {-} {-} {-} {-} {26} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {18} {-} {-} 64 | {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 65 | {20} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {27} {10} {10} {10} {-} {-} {10} {10} {10} 66 | {21} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {28} {10} {10} {-} {-} {10} {10} {10} 67 | {22} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {29} {10} {10} {10} {-} {-} {10} {10} {10} 68 | {23} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 69 | {24} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {30} {10} {-} {-} {10} {10} {10} 70 | {25} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {31} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 71 | {26} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {32} {-} {-} 72 | {27} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {33} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 73 | {28} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {34} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 74 | {29} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {35} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 75 | {30} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 76 | {31} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {36} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 77 | {32} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {37} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {32} {-} {-} 78 | {33} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {38} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 79 | {34} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 80 | {35} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {39} {10} {-} {-} {10} {10} {10} 81 | {36} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {40} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 82 | {37} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {41} {-} {-} 83 | {38} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {42} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 84 | {39} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 85 | {40} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 86 | {41} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {41} {-} {-} 87 | {42} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {43} {10} {10} {10} {10} {-} {-} {10} {10} {10} 88 | {43} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 89 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_1/compiler.log: -------------------------------------------------------------------------------- 1 | Match State Lexeme Token Class Position 2 | ----------- ------ ----------- -------- 3 | [MATCHED] int int 3 4 | [MATCHED] int1 id 8 5 | [MATCHED] , , 9 6 | [MATCHED] int2 id 14 7 | [MATCHED] , , 15 8 | [MATCHED] int3 id 20 9 | [MATCHED] ; ; 21 10 | [MATCHED] boolean boolean 30 11 | [MATCHED] boolean1 id 39 12 | [MATCHED] , , 40 13 | [MATCHED] boolean2 id 49 14 | [MATCHED] , , 50 15 | [MATCHED] boolean3 id 59 16 | [MATCHED] ; ; 60 17 | [MATCHED] float float 67 18 | [MATCHED] float1 id 74 19 | [MATCHED] , , 75 20 | [MATCHED] float2 id 82 21 | [MATCHED] , , 83 22 | [MATCHED] float3 id 90 23 | [MATCHED] ; ; 91 24 | [MATCHED] while while 98 25 | [MATCHED] ( ( 99 26 | [MATCHED] bool1 id 104 27 | [MATCHED] ) ) 105 28 | [MATCHED] bool2 id 113 29 | [MATCHED] = assign 114 30 | [MATCHED] false id 119 31 | [MATCHED] ; ; 120 32 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_1/symbol-table.txt: -------------------------------------------------------------------------------- 1 | int int 2 | int1 id 3 | , , 4 | int2 id 5 | int3 id 6 | ; ; 7 | boolean boolean 8 | boolean1 id 9 | boolean2 id 10 | boolean3 id 11 | float float 12 | float1 id 13 | float2 id 14 | float3 id 15 | while while 16 | ( ( 17 | bool1 id 18 | ) ) 19 | bool2 id 20 | = assign 21 | false id 22 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_1/test_1.txt: -------------------------------------------------------------------------------- 1 | int int1, int2, int3; 2 | boolean boolean1, boolean2, boolean3; 3 | float float1, float2, float3; 4 | while(bool1) 5 | bool2=false; 6 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_1/token-file.txt: -------------------------------------------------------------------------------- 1 | int 2 | id 3 | , 4 | id 5 | , 6 | id 7 | ; 8 | boolean 9 | id 10 | , 11 | id 12 | , 13 | id 14 | ; 15 | float 16 | id 17 | , 18 | id 19 | , 20 | id 21 | ; 22 | while 23 | ( 24 | id 25 | ) 26 | id 27 | assign 28 | id 29 | ; 30 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_2/compiler.log: -------------------------------------------------------------------------------- 1 | Match State Lexeme Token Class Position 2 | ----------- ------ ----------- -------- 3 | [MATCHED] int int 3 4 | [MATCHED] int1 id 8 5 | [MATCHED] , , 9 6 | [MATCHED] int2 id 14 7 | [MATCHED] , , 15 8 | [MATCHED] int3 id 20 9 | [MATCHED] ; ; 21 10 | [MATCHED] boolean boolean 30 11 | [MATCHED] boolean1 id 39 12 | [MATCHED] , , 40 13 | [MATCHED] boolean2 id 49 14 | [MATCHED] , , 50 15 | [MATCHED] boolean3 id 59 16 | [MATCHED] ; ; 60 17 | [MATCHED] float float 67 18 | [MATCHED] float1 id 74 19 | [MATCHED] , , 75 20 | [MATCHED] float2 id 82 21 | [MATCHED] , , 83 22 | [MATCHED] float3 id 90 23 | [MATCHED] ; ; 91 24 | [MATCHED] while while 98 25 | [MATCHED] ( ( 99 26 | [MATCHED] boolean1 id 107 27 | [MATCHED] ) ) 108 28 | [MATCHED] { { 112 29 | [MATCHED] boolean2 id 122 30 | [MATCHED] = assign 123 31 | [MATCHED] false id 128 32 | [MATCHED] ; ; 129 33 | [MATCHED] boolean3 id 139 34 | [MATCHED] = assign 140 35 | [MATCHED] true id 144 36 | [MATCHED] ; ; 145 37 | [MATCHED] if if 150 38 | [MATCHED] ( ( 152 39 | [MATCHED] boolean1 id 163 40 | [MATCHED] ) ) 164 41 | [MATCHED] boolean1 id 176 42 | [MATCHED] = assign 178 43 | [MATCHED] false id 184 44 | [MATCHED] ; ; 185 45 | [MATCHED] } } 189 46 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_2/symbol-table.txt: -------------------------------------------------------------------------------- 1 | int int 2 | int1 id 3 | , , 4 | int2 id 5 | int3 id 6 | ; ; 7 | boolean boolean 8 | boolean1 id 9 | boolean2 id 10 | boolean3 id 11 | float float 12 | float1 id 13 | float2 id 14 | float3 id 15 | while while 16 | ( ( 17 | ) ) 18 | { { 19 | = assign 20 | false id 21 | true id 22 | if if 23 | } } 24 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_2/test_2.txt: -------------------------------------------------------------------------------- 1 | int int1, int2, int3; 2 | boolean boolean1, boolean2, boolean3; 3 | float float1, float2, float3; 4 | while(boolean1) 5 | { 6 | boolean2=false; 7 | boolean3=true; 8 | if ( 9 | boolean1) 10 | boolean1 = false; 11 | } 12 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_2/token-file.txt: -------------------------------------------------------------------------------- 1 | int 2 | id 3 | , 4 | id 5 | , 6 | id 7 | ; 8 | boolean 9 | id 10 | , 11 | id 12 | , 13 | id 14 | ; 15 | float 16 | id 17 | , 18 | id 19 | , 20 | id 21 | ; 22 | while 23 | ( 24 | id 25 | ) 26 | { 27 | id 28 | assign 29 | id 30 | ; 31 | id 32 | assign 33 | id 34 | ; 35 | if 36 | ( 37 | id 38 | ) 39 | id 40 | assign 41 | id 42 | ; 43 | } 44 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_3/compiler.log: -------------------------------------------------------------------------------- 1 | Match State Lexeme Token Class Position 2 | ----------- ------ ----------- -------- 3 | [MATCHED] int int 3 4 | [MATCHED] int1 id 8 5 | [MATCHED] , , 9 6 | [MATCHED] int2 id 14 7 | [MATCHED] , , 15 8 | [MATCHED] int3 id 20 9 | [MATCHED] ; ; 21 10 | [MATCHED] boolean boolean 30 11 | [MATCHED] boolean1 id 39 12 | [MATCHED] , , 40 13 | [MATCHED] boolean2 id 49 14 | [MATCHED] , , 50 15 | [MATCHED] boolean3 id 64 16 | [MATCHED] ; ; 65 17 | [MATCHED] float float 72 18 | [MATCHED] float1 id 79 19 | [MATCHED] , , 80 20 | [MATCHED] float2 id 87 21 | [MATCHED] , , 88 22 | [MATCHED] float3 id 99 23 | [MATCHED] ; ; 100 24 | [MATCHED] if if 104 25 | [MATCHED] ( ( 106 26 | [MATCHED] boolean1 id 114 27 | [MATCHED] = assign 116 28 | [MATCHED] = assign 118 29 | [MATCHED] boolean2 id 127 30 | [MATCHED] ) ) 128 31 | [MATCHED] { { 132 32 | [MATCHED] float1 id 138 33 | [MATCHED] = assign 146 34 | [MATCHED] float2 id 153 35 | [MATCHED] * mulop 154 36 | [MATCHED] float1 id 160 37 | [MATCHED] ; ; 161 38 | [MATCHED] } } 172 39 | [MATCHED] if if 178 40 | [MATCHED] ( ( 185 41 | [MATCHED] boolean2 id 193 42 | [MATCHED] == relop 196 43 | [MATCHED] boolean3 id 205 44 | [MATCHED] ) ) 206 45 | [MATCHED] { { 210 46 | [MATCHED] float2 id 216 47 | [MATCHED] = assign 218 48 | [MATCHED] float3 id 225 49 | [MATCHED] * mulop 226 50 | [MATCHED] float2 id 232 51 | [MATCHED] ; ; 233 52 | [MATCHED] } } 234 53 | [MATCHED] if if 238 54 | [MATCHED] ( ( 240 55 | [MATCHED] boolea3 id 247 56 | [MATCHED] = assign 249 57 | [MATCHED] boolean1 id 258 58 | [MATCHED] ) ) 259 59 | [MATCHED] { { 263 60 | [MATCHED] float3 id 274 61 | [MATCHED] = assign 276 62 | [MATCHED] float1 id 283 63 | [MATCHED] * mulop 284 64 | [MATCHED] float3 id 290 65 | [MATCHED] ; ; 291 66 | [MATCHED] } } 296 67 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_3/symbol-table.txt: -------------------------------------------------------------------------------- 1 | int int 2 | int1 id 3 | , , 4 | int2 id 5 | int3 id 6 | ; ; 7 | boolean boolean 8 | boolean1 id 9 | boolean2 id 10 | boolean3 id 11 | float float 12 | float1 id 13 | float2 id 14 | float3 id 15 | if if 16 | ( ( 17 | = assign 18 | ) ) 19 | { { 20 | * mulop 21 | } } 22 | == relop 23 | boolea3 id 24 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_3/test_3.txt: -------------------------------------------------------------------------------- 1 | int int1, int2, int3; 2 | boolean boolean1, boolean2, boolean3; 3 | float float1, float2, float3; 4 | if (boolean1 = = boolean2) 5 | {float1 = float2*float1; } 6 | if (boolean2 == boolean3) 7 | {float2 = float3*float2;} 8 | if (boolea3 = boolean1) 9 | { 10 | 11 | float3 = float1*float3; 12 | 13 | } -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_3/token-file.txt: -------------------------------------------------------------------------------- 1 | int 2 | id 3 | , 4 | id 5 | , 6 | id 7 | ; 8 | boolean 9 | id 10 | , 11 | id 12 | , 13 | id 14 | ; 15 | float 16 | id 17 | , 18 | id 19 | , 20 | id 21 | ; 22 | if 23 | ( 24 | id 25 | assign 26 | assign 27 | id 28 | ) 29 | { 30 | id 31 | assign 32 | id 33 | mulop 34 | id 35 | ; 36 | } 37 | if 38 | ( 39 | id 40 | relop 41 | id 42 | ) 43 | { 44 | id 45 | assign 46 | id 47 | mulop 48 | id 49 | ; 50 | } 51 | if 52 | ( 53 | id 54 | assign 55 | id 56 | ) 57 | { 58 | id 59 | assign 60 | id 61 | mulop 62 | id 63 | ; 64 | } 65 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_4/symbol-table.txt: -------------------------------------------------------------------------------- 1 | int int 2 | int1 id 3 | , , 4 | int2 id 5 | int3 id 6 | ; ; 7 | boolean boolean 8 | boolean1 id 9 | boolean2 id 10 | boolean3 id 11 | float float 12 | float1 id 13 | float2 id 14 | float3 id 15 | if if 16 | ( ( 17 | = assign 18 | ) ) 19 | { { 20 | * mulop 21 | } } 22 | == relop 23 | boolea3 id 24 | while while 25 | + addop 26 | - addop 27 | / mulop 28 | <= relop 29 | 0 num 30 | < relop 31 | 1423 num 32 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_4/test_4.txt: -------------------------------------------------------------------------------- 1 | int int1, int2, int3; 2 | boolean boolean1, boolean2, boolean3; 3 | float float1, float2, float3; 4 | if (boolean1 = = boolean2) 5 | {float1 = float2*float1; } 6 | if (boolean2 == boolean3) 7 | {float2 = float3*float2;} 8 | if (boolea3 = boolean1) 9 | { 10 | while (int3 + int2 - int1 / (int) float1 <= 0) 11 | { 12 | if (boolean1 < boolean3) int1 = 1423; 13 | } 14 | float3 = float1*float3; 15 | 16 | } -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_4/token-file.txt: -------------------------------------------------------------------------------- 1 | int 2 | id 3 | , 4 | id 5 | , 6 | id 7 | ; 8 | boolean 9 | id 10 | , 11 | id 12 | , 13 | id 14 | ; 15 | float 16 | id 17 | , 18 | id 19 | , 20 | id 21 | ; 22 | if 23 | ( 24 | id 25 | assign 26 | assign 27 | id 28 | ) 29 | { 30 | id 31 | assign 32 | id 33 | mulop 34 | id 35 | ; 36 | } 37 | if 38 | ( 39 | id 40 | relop 41 | id 42 | ) 43 | { 44 | id 45 | assign 46 | id 47 | mulop 48 | id 49 | ; 50 | } 51 | if 52 | ( 53 | id 54 | assign 55 | id 56 | ) 57 | { 58 | while 59 | ( 60 | id 61 | addop 62 | id 63 | addop 64 | id 65 | mulop 66 | ( 67 | int 68 | ) 69 | id 70 | relop 71 | num 72 | ) 73 | { 74 | if 75 | ( 76 | id 77 | relop 78 | id 79 | ) 80 | id 81 | assign 82 | num 83 | ; 84 | } 85 | id 86 | assign 87 | id 88 | mulop 89 | id 90 | ; 91 | } 92 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_5/compiler.log: -------------------------------------------------------------------------------- 1 | Match State Lexeme Token Class Position 2 | ----------- ------ ----------- -------- 3 | [MATCHED] int int 3 4 | [UNMATCHED] # 5 5 | [UNMATCHED] _ 6 6 | [MATCHED] sum id 9 7 | [MATCHED] , , 11 8 | [MATCHED] 111 num 15 9 | [MATCHED] count id 20 10 | [MATCHED] , , 22 11 | [MATCHED] pass id 27 12 | [MATCHED] , , 29 13 | [MATCHED] mnt id 33 14 | [MATCHED] ; ; 34 15 | [MATCHED] while while 40 16 | [MATCHED] ( ( 42 17 | [MATCHED] pass id 46 18 | [UNMATCHED] ! 48 19 | [UNMATCHED] @ 49 20 | [MATCHED] 10 num 52 21 | [MATCHED] ) ) 53 22 | [MATCHED] { { 55 23 | [MATCHED] pass id 64 24 | [MATCHED] = assign 66 25 | [MATCHED] pass id 71 26 | [MATCHED] + addop 73 27 | [MATCHED] 1 num 75 28 | [MATCHED] - addop 77 29 | [MATCHED] - addop 78 30 | [MATCHED] - addop 79 31 | [MATCHED] ; ; 80 32 | [MATCHED] } } 82 33 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_5/symbol-table.txt: -------------------------------------------------------------------------------- 1 | int int 2 | sum id 3 | , , 4 | 111 num 5 | count id 6 | pass id 7 | mnt id 8 | ; ; 9 | while while 10 | ( ( 11 | 10 num 12 | ) ) 13 | { { 14 | = assign 15 | + addop 16 | 1 num 17 | - addop 18 | } } 19 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_5/test_5.txt: -------------------------------------------------------------------------------- 1 | int #_sum , 111count , pass , mnt; 2 | while (pass !@ 10) 3 | { 4 | pass = pass + 1 ---; 5 | } 6 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_5/token-file.txt: -------------------------------------------------------------------------------- 1 | int 2 | id 3 | , 4 | num 5 | id 6 | , 7 | id 8 | , 9 | id 10 | ; 11 | while 12 | ( 13 | id 14 | num 15 | ) 16 | { 17 | id 18 | assign 19 | id 20 | addop 21 | num 22 | addop 23 | addop 24 | addop 25 | ; 26 | } 27 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_6/compiler.log: -------------------------------------------------------------------------------- 1 | Match State Lexeme Token Class Position 2 | ----------- ------ ----------- -------- 3 | [MATCHED] int int 3 4 | [UNMATCHED] # 5 5 | [UNMATCHED] _ 6 6 | [MATCHED] sum id 9 7 | [MATCHED] , , 11 8 | [MATCHED] 111 num 15 9 | [MATCHED] count id 20 10 | [MATCHED] , , 22 11 | [MATCHED] pass id 27 12 | [MATCHED] , , 29 13 | [MATCHED] mnt id 33 14 | [MATCHED] ; ; 34 15 | [MATCHED] while while 40 16 | [MATCHED] ( ( 42 17 | [MATCHED] pass id 46 18 | [UNMATCHED] ! 48 19 | [UNMATCHED] @ 49 20 | [MATCHED] 10 num 52 21 | [MATCHED] ) ) 53 22 | [MATCHED] { { 55 23 | [MATCHED] while while 65 24 | [MATCHED] ( ( 67 25 | [UNMATCHED] ! 69 26 | [UNMATCHED] # 70 27 | [UNMATCHED] @ 71 28 | [MATCHED] abc id 74 29 | [UNMATCHED] _ 75 30 | [MATCHED] 1 num 76 31 | [UNMATCHED] ! 77 32 | [MATCHED] - addop 78 33 | [MATCHED] - addop 79 34 | [MATCHED] = assign 80 35 | [MATCHED] ) ) 82 36 | [MATCHED] { { 84 37 | [MATCHED] int int 96 38 | [MATCHED] boolean boolean 104 39 | [MATCHED] float float 110 40 | [MATCHED] gloat id 116 41 | [MATCHED] r id 118 42 | [MATCHED] - addop 119 43 | [MATCHED] > relop 120 44 | [MATCHED] d id 121 45 | [MATCHED] < relop 122 46 | [UNMATCHED] \ 123 47 | [MATCHED] z id 124 48 | [MATCHED] ; ; 125 49 | [MATCHED] ; ; 126 50 | [MATCHED] ; ; 127 51 | [MATCHED] ; ; 128 52 | [MATCHED] } } 134 53 | [MATCHED] } } 136 54 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_6/symbol-table.txt: -------------------------------------------------------------------------------- 1 | int int 2 | sum id 3 | , , 4 | 111 num 5 | count id 6 | pass id 7 | mnt id 8 | ; ; 9 | while while 10 | ( ( 11 | 10 num 12 | ) ) 13 | { { 14 | abc id 15 | 1 num 16 | - addop 17 | = assign 18 | boolean boolean 19 | float float 20 | gloat id 21 | r id 22 | > relop 23 | d id 24 | < relop 25 | z id 26 | } } 27 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_6/test_6.txt: -------------------------------------------------------------------------------- 1 | int #_sum , 111count , pass , mnt; 2 | while (pass !@ 10) 3 | { 4 | while ( !#@abc_1!--= ) { 5 | int boolean float gloat r->d<\z;;;; 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/regression/test_6/token-file.txt: -------------------------------------------------------------------------------- 1 | int 2 | id 3 | , 4 | num 5 | id 6 | , 7 | id 8 | , 9 | id 10 | ; 11 | while 12 | ( 13 | id 14 | num 15 | ) 16 | { 17 | while 18 | ( 19 | id 20 | num 21 | addop 22 | addop 23 | assign 24 | ) 25 | { 26 | int 27 | boolean 28 | float 29 | id 30 | id 31 | addop 32 | relop 33 | id 34 | relop 35 | id 36 | ; 37 | ; 38 | ; 39 | ; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /tests/lexical_analyzer/unit/test_1.cpp: -------------------------------------------------------------------------------- 1 | #include "../../lib/catch.hpp" 2 | #include "../../../lexical_analyzer/lexical_analyzer_generator/nfa_tools/regex_processor.h" 3 | 4 | TEST_CASE("test regex_processor") 5 | { 6 | std::map > sym_table; 7 | regular_expression regex1 = {"letter", "a-z | A-Z"}; 8 | std::shared_ptr letter_nfa = evaluate_regex (regex1, sym_table); 9 | REQUIRE(letter_nfa->get_start_state()->get_id() == 0); 10 | } 11 | 12 | 13 | TEST_CASE("dummy test") 14 | { 15 | REQUIRE(1+1-2 == 0); 16 | } -------------------------------------------------------------------------------- /tests/semantic_analyzer/unit/java_bytecode.bnf: -------------------------------------------------------------------------------- 1 | # METHOD_BODY = 2 | @INIT_CODE 3 | STATEMENT_LIST 4 | #STATEMENT_LIST_RECORD 5 | M 6 | #M_RECORD 7 | @METHOD_BODY_ACTION 8 | @FINALIZE_CODE 9 | 10 | # STATEMENT_LIST = 11 | STATMENT 12 | #STATMENT_RECORD 13 | M 14 | #M_RECORD 15 | STATEMENT_LIST1 16 | #STATEMENT_LIST1_RECORD 17 | @STATEMENT_LIST1_ACTION 18 | 19 | # STATEMENT_LIST1 = 20 | STATEMENT 21 | #STATEMENT_RECORD_STATEMENT_LIST1 22 | M 23 | #M_RECORD_STATEMENT_LIST1 24 | STATEMENT_LIST1 25 | #STATEMENT_LIST1_RECORD_STATEMENT_LIST1 26 | @STATEMENT_LIST1_ACTION_STATEMENT_LIST1 27 | | 28 | '\L' 29 | 30 | 31 | # STATEMENT = 32 | DECLARATION 33 | | 34 | IF 35 | #IF_RECORD 36 | | 37 | WHILE 38 | #WHILE_RECORD 39 | | 40 | ASSIGNMENT 41 | 42 | 43 | 44 | # DECLARATION = 45 | PRIMITIVE_TYPE 46 | #PRIMITIVE_TYPE_RECORD_DECLARATION 47 | 'id' 48 | @id_ACTION_DECLARATION 49 | ';' 50 | @DECLARATION_ACTION_DECLARATION 51 | 52 | 53 | # PRIMITIVE_TYPE = 54 | 'int' 55 | @int_ACTION_PRIMITIVE_TYPE 56 | | 57 | 'float' 58 | @float_ACTION_PRIMITIVE_TYPE 59 | 60 | 61 | # IF = 62 | 'if' 63 | '(' 64 | EXPRESSION 65 | #EXPRESSION_RECORD_IF 66 | ')' 67 | '{' 68 | M 69 | #M1_RECORD_IF 70 | STATEMENT 71 | #STATEMENT1_RECORD_IF 72 | @GOTO_ACTION_IF 73 | '}' 74 | 'else' 75 | '{' 76 | M 77 | #M2_RECORD_IF 78 | STATEMENT 79 | #STATEMENT2_RECORD_IF 80 | '}' 81 | @IF_ACTION_IF 82 | 83 | # WHILE = 84 | M 85 | #M1_RECORD_WHILE 86 | 'while' 87 | '(' 88 | EXPRESSION 89 | #EXPRESSION_RECORD_WHILE 90 | ')' 91 | '{' 92 | M 93 | #M2_RECORD_WHILE 94 | STATEMENT 95 | #STATEMENT_RECORD_WHILE 96 | '}' 97 | @WHILE_ACTION_WHILE 98 | 99 | # ASSIGNMENT = 100 | 'id' 101 | @id_ACTION_ASSIGNMENT 102 | '=' 103 | EXPRESSION 104 | #EXPRESSION_RECORD_ASSIGNMENT 105 | ';' 106 | @ASSIGNMENT_ACTION_ASSIGNMENT 107 | 108 | # EXPRESSION = 109 | SIMPLE_EXPRESSION 110 | #SIMPLE_EXPRESSION_RECORD_EXPRESSION 111 | EXPRESSION1 112 | #EXPRESSION1_RECORD_EXPRESSION 113 | 114 | # EXPRESSION1 = 115 | @EXPRESSION1_IN_relop 116 | 'relop' 117 | @relop_ACTION 118 | SIMPLE_EXPRESSION 119 | #SIMPLE_EXPRESSION_RECORD_EXPRESSION1 120 | @EXPRESSION1_ACTION_relop 121 | | 122 | @EXPRESSION1_IN_eps 123 | '\L' 124 | EXPRESSION1_ACTION_eps 125 | 126 | 127 | # SIMPLE_EXPRESSION = 128 | TERM 129 | #TERM_RECORD_SIMPLE_EXPRESSION 130 | SIMPLE_EXPRESSION1 131 | #SIMPLE_EXPRESSION1_RECORD_SIMPLE_EXPRESSION 132 | 133 | 134 | # SIMPLE_EXPRESSION1 = 135 | @SIMPLE_EXPRESSION1_IN_addop 136 | 'addop' 137 | @addop_ACTION 138 | TERM 139 | #TERM_RECORD_SIMPLE_EXPRESSION1 140 | @SIMPLE_EXPRESSION1_ACTION_addop 141 | #SIMPLE_EXPRESSION1_RECORD_SIMPLE_EXPRESSION1 142 | | 143 | @SIMPLE_EXPRESSION1_IN_eps 144 | '\L' 145 | @SIMPLE_EXPRESSION1_ACTION_eps 146 | 147 | 148 | # TERM = 149 | FACTOR 150 | #FACTOR_RECORD_TERM 151 | TERM1 152 | #TERM1_RECORD_TERM 153 | 154 | 155 | 156 | # TERM1 = 157 | @TERM1_IN_mulop 158 | 'mulop' 159 | @mulop_ACTION 160 | FACTOR 161 | #FACTOR_RECORD_TERM1 162 | @TERM1_ACTION_mulop 163 | TERM1 164 | #TERM1_RECORD_TERM1 165 | | 166 | @TERM1_IN_eps 167 | '\L' 168 | @TERM1_ACTION_eps 169 | 170 | # FACTOR = 171 | 'id' 172 | @id_ACTION_FACTOR 173 | @FACTOR_ACTION_FACTOR_id 174 | | 175 | 'num' 176 | @num_ACTION_FACTOR 177 | @FACTOR_ACTION_FACTOR_num 178 | | 179 | '(' 180 | SIMPLE_EXPRESSION 181 | #SIMPLE_EXPRESSION_RECORD 182 | ')' 183 | @FACTOR_ACTION_FACTOR_PAREN 184 | 185 | # M = '\L' 186 | 187 | -------------------------------------------------------------------------------- /tests/semantic_analyzer/unit/test_1.cpp: -------------------------------------------------------------------------------- 1 | #include "../../lib/catch.hpp" 2 | #include "../../../syntax_analyzer/predictive_parser.h" 3 | #include "../../../semantic_analyzer/intermediate_code_generation/semantic_rules/three_address_code.h" 4 | 5 | #include 6 | 7 | TEST_CASE ("TEST 1") { 8 | // initial stack: 9 | // A S{print A.n1, print A.n0} $ 10 | 11 | // A -> B S{stack[top - 1].n0 = n0, stack[top - 1].n1 = n1} S{print A.n1, print A.n0} 12 | // B -> 0 B1 S{[top - 1].n0 = B1.n0 + 1, [top - 1].n1 = B1.n1} 13 | // B -> 1 B1 S{[top - 1].n1 = B1.n1 + 1, [top - 1].n0 = B1.n0} 14 | // B -> '\L' @{[top - 1].n0 = 0, [top - 1].n1 = 0} 15 | 16 | 17 | cfg_symbol A("A", NON_TERMINAL); 18 | cfg_symbol B("B", NON_TERMINAL); 19 | cfg_symbol B1("B", NON_TERMINAL); 20 | 21 | cfg_symbol s_0("0", TERMINAL); 22 | cfg_symbol s_1("1", TERMINAL); 23 | cfg_symbol eps(EPS, TERMINAL); 24 | 25 | int ones; 26 | int zeros; 27 | 28 | // SYNTHESISE RECORDS 29 | cfg_symbol synthesize_record1("record_B", SYNTHESISED); 30 | synthesize_record1.set_action( 31 | [] (std::vector& stack) { 32 | // stack[top - 1].n0 = n0, stack[top - 1].n1 = n1 33 | stack[stack.size() - 2].add_attribute("n0", stack.back().get_attribute("n0").front()); 34 | stack[stack.size() - 2].add_attribute("n1", stack.back().get_attribute("n1").front()); 35 | } 36 | ); 37 | 38 | cfg_symbol synthesize_record2("record_A", SYNTHESISED); 39 | synthesize_record2.set_action( 40 | [&ones, &zeros] (std::vector& stack) { 41 | // S{print A.n1, print A.n0} 42 | zeros = std::atoi(stack.back().get_attribute("n0").front().c_str()); 43 | ones = std::atoi(stack.back().get_attribute("n1").front().c_str()); 44 | } 45 | ); 46 | 47 | cfg_symbol synthesize_record3("record_B1_0", SYNTHESISED); 48 | synthesize_record3.set_action( 49 | [] (std::vector& stack) { 50 | // S{[top - 1].n0 = B1.n0 + 1, [top - 1].n1 = B1.n1} 51 | stack[stack.size() - 2]. 52 | add_attribute("n0", std::to_string(std::atoi(stack.back().get_attribute("n0").front().c_str()) + 1)); 53 | stack[stack.size() - 2].add_attribute("n1", stack.back().get_attribute("n1").front()); 54 | } 55 | ); 56 | 57 | cfg_symbol synthesize_record4("record_B1_1", SYNTHESISED); 58 | synthesize_record4.set_action( 59 | [] (std::vector& stack) { 60 | stack[stack.size() - 2]. 61 | add_attribute("n1", std::to_string(std::atoi(stack.back().get_attribute("n1").front().c_str()) + 1)); 62 | stack[stack.size() - 2].add_attribute("n0", stack.back().get_attribute("n0").front()); 63 | } 64 | ); 65 | 66 | cfg_symbol action1("@action1", ACTION); 67 | action1.set_action( 68 | [] (std::vector& stack) { 69 | // @{[top - 1].n0 = 0, [top - 1].n1 = 0} 70 | stack[stack.size() - 2].add_attribute("n0", "0"); 71 | stack[stack.size() - 2].add_attribute("n1", "0"); 72 | } 73 | ); 74 | 75 | 76 | std::vector A_prod_vector; 77 | 78 | std::vector B_prod1_vector; 79 | std::vector B_prod2_vector; 80 | std::vector B_prod3_vector; 81 | 82 | A_prod_vector.push_back(B); 83 | A_prod_vector.push_back(synthesize_record1); 84 | A_prod_vector.push_back(synthesize_record2); 85 | 86 | B_prod1_vector.push_back(s_0); 87 | B_prod1_vector.push_back(B1); 88 | B_prod1_vector.push_back(synthesize_record3); 89 | 90 | 91 | B_prod2_vector.push_back(s_1); 92 | B_prod2_vector.push_back(B1); 93 | B_prod2_vector.push_back(synthesize_record4); 94 | 95 | B_prod3_vector.push_back(eps); 96 | B_prod3_vector.push_back(action1); 97 | 98 | cfg_production prod_A(A, A_prod_vector); 99 | 100 | cfg_production prod1_B(B, B_prod1_vector); 101 | cfg_production prod2_B(B, B_prod2_vector); 102 | cfg_production prod3_B(B, B_prod3_vector); 103 | 104 | 105 | std::unordered_map, cfg_production, 106 | parsing_table_hasher, parsing_table_comparator> table; 107 | 108 | table[{"A", "0"}] = prod_A; 109 | table[{"A", "1"}] = prod_A; 110 | table[{"A", "$"}] = prod_A; 111 | 112 | table[{"B", "0"}] = prod1_B; 113 | table[{"B", "1"}] = prod2_B; 114 | table[{"B", "$"}] = prod3_B; 115 | 116 | std::shared_ptr p_table = std::make_shared(table); 117 | p_table->draw("parsing_table.txt"); 118 | 119 | std::vector input_buffer{ 120 | "0", 121 | "1", 122 | "1", 123 | "1", 124 | "1", 125 | "1", 126 | "$"}; 127 | 128 | predictive_parser parser(A, p_table, input_buffer); 129 | parser.parse(); 130 | 131 | parser.write_derivations("actions.log"); 132 | parser.write_debug_stack("debug_stack.log"); 133 | 134 | REQUIRE(zeros == 1); 135 | REQUIRE(ones == 5); 136 | } 137 | 138 | 139 | 140 | TEST_CASE("TEST 2") 141 | { 142 | cfg cfg_ob = cfg (); 143 | // cfg_ob.add_function ("action_1", zeros_ones_counter::action_1); 144 | // cfg_ob.add_function ("record_A", zeros_ones_counter::record_A); 145 | // cfg_ob.add_function ("record_B", zeros_ones_counter::record_B); 146 | // cfg_ob.add_function ("record_B1_0", zeros_ones_counter::record_B1_0); 147 | // cfg_ob.add_function ("record_B1_1", zeros_ones_counter::record_B1_1); 148 | 149 | 150 | 151 | cfg_ob.add_function ("finalize_action", three_address_code::finalize_action); 152 | 153 | cfg_ob.add_function ("B_action_true", three_address_code::B_action_true); 154 | cfg_ob.add_function ("B_action_false", three_address_code::B_action_false); 155 | cfg_ob.add_function ("E1_record_relop", three_address_code::E1_record_relop); 156 | cfg_ob.add_function ("E2_record_relop", three_address_code::E2_record_relop); 157 | cfg_ob.add_function ("B_action_relop", three_address_code::B_action_relop); 158 | 159 | cfg_ob.add_function ("B1_record_or", three_address_code::B1_record_or); 160 | cfg_ob.add_function ("M_record_or", three_address_code::M_record_or); 161 | cfg_ob.add_function ("B2_record_or", three_address_code::B2_record_or); 162 | cfg_ob.add_function ("B_action_or", three_address_code::B_action_or); 163 | 164 | cfg_ob.add_function ("B_record_if", three_address_code::B_record_if); 165 | cfg_ob.add_function ("M_record_if", three_address_code::M_record_if); 166 | cfg_ob.add_function ("S1_record_if", three_address_code::S1_record_if); 167 | cfg_ob.add_function ("S_action_if", three_address_code::S_action_if); 168 | 169 | cfg_ob.add_function ("E_action_num", three_address_code::E_action_num); 170 | 171 | cfg_ob.add_function ("M_action_eps", three_address_code::M_action_eps); 172 | 173 | cfg_ob.add_function ("S_action_assign", three_address_code::S_action_assign); 174 | 175 | 176 | // std::string grammar_file ("../../tests/semantic_analyzer/unit/zeros_ones_counter.bnf"); 177 | std::string grammar_file ("../../tests/semantic_analyzer/unit/three_address_code.bnf"); 178 | 179 | cfg_ob.parse(grammar_file); 180 | 181 | for (auto g : cfg_ob.get_grammar()) { 182 | std::cout << g.second.to_string() << std::endl; 183 | } 184 | 185 | // cfg_ob.left_factor(); 186 | // cfg_ob.remove_left_recursion(); 187 | 188 | std::cout << "********************************" << std::endl; 189 | 190 | for (auto g : cfg_ob.get_grammar()) { 191 | std::cout << g.second.to_string() << std::endl; 192 | } 193 | 194 | std::shared_ptr p_table = std::make_shared(cfg_ob); 195 | p_table->draw("parsing_table.txt"); 196 | 197 | cfg_ob.get_first_set()->print_to_console(); 198 | cfg_ob.get_follow_set()->print_to_console(); 199 | 200 | std::vector input_buffer { 201 | "if", 202 | "(", 203 | "id", 204 | "relop", 205 | "id", 206 | "&&", 207 | "true", 208 | ")", 209 | "assign", 210 | "$"}; 211 | 212 | predictive_parser parser(cfg_ob.get_start_symbol(), p_table, input_buffer); 213 | parser.parse(); 214 | } -------------------------------------------------------------------------------- /tests/semantic_analyzer/unit/three_address_code.bnf: -------------------------------------------------------------------------------- 1 | # P = S @finalize_action 2 | # E = 'id' @E_action_num 3 | # S = 'if' '(' B #B_record_if ')' M #M_record_if S #S1_record_if @S_action_if 4 | # S = 'assign' @S_action_assign 5 | # B = B1 #B1_record_or '&&' M #M_record_or B1 #B2_record_or @B_action_or 6 | # B1 = 'true' @B_action_true 7 | # B1 = 'false' @B_action_false 8 | # B1 = E #E1_record_relop 'relop' E #E2_record_relop @B_action_relop 9 | # M = '\L' @M_action_eps -------------------------------------------------------------------------------- /tests/semantic_analyzer/unit/zeros_ones_counter.bnf: -------------------------------------------------------------------------------- 1 | # A = B #record_B #record_A 2 | # B = '0' B #record_B1_0 3 | # B = '1' B #record_B1_1 4 | # B = '\L' @action_1 -------------------------------------------------------------------------------- /tests/syntax_analyzer/TA/grammar.txt: -------------------------------------------------------------------------------- 1 | # METHOD_BODY = STATEMENT_LIST 2 | # STATEMENT_LIST = STATEMENT | STATEMENT_LIST STATEMENT 3 | # STATEMENT = DECLARATION 4 | | IF 5 | | WHILE 6 | | ASSIGNMENT 7 | # DECLARATION = PRIMITIVE_TYPE 'id' ';' 8 | # PRIMITIVE_TYPE = 'int' | 'float' 9 | # IF = 'if' '(' EXPRESSION ')' '{' STATEMENT '}' 'else' '{' STATEMENT '}' 10 | # WHILE = 'while' '(' EXPRESSION ')' '{' STATEMENT '}' 11 | # ASSIGNMENT = 'id' 'assign' EXPRESSION ';' 12 | # EXPRESSION = SIMPLE_EXPRESSION 13 | | SIMPLE_EXPRESSION 'relop' SIMPLE_EXPRESSION 14 | # SIMPLE_EXPRESSION = TERM | SIGN TERM | SIMPLE_EXPRESSION 'addop' TERM 15 | # TERM = FACTOR | TERM 'mulop' FACTOR 16 | # FACTOR = 'id' | 'num' | '(' EXPRESSION ')' 17 | # SIGN = '+' | '-' -------------------------------------------------------------------------------- /tests/syntax_analyzer/TA/grammar_modified.txt: -------------------------------------------------------------------------------- 1 | # METHOD_BODY = STATEMENT_LIST 2 | # STATEMENT_LIST = STATEMENT STATEMENT_LIST& 3 | # STATEMENT_LIST& = STATEMENT STATEMENT_LIST& | \L 4 | # STATEMENT = DECLARATION 5 | | IF 6 | | WHILE 7 | | ASSIGNMENT 8 | # DECLARATION = PRIMITIVE_TYPE 'id' ';' 9 | # PRIMITIVE_TYPE = 'int' | 'float' 10 | # IF = 'if' '(' EXPRESSION ')' '{' STATEMENT '}' 'else' '{' STATEMENT '}' 11 | # WHILE = 'while' '(' EXPRESSION ')' '{' STATEMENT '}' 12 | # ASSIGNMENT = 'id' 'assign' EXPRESSION ';' 13 | # EXPRESSION = SIMPLE_EXPRESSION EXPRESSION& 14 | # EXPRESSION& = 'relop' SIMPLE_EXPRESSION | \L 15 | # SIMPLE_EXPRESSION = TERM SIMPLE_EXPRESSION& | SIGN TERM SIMPLE_EXPRESSION& 16 | # SIMPLE_EXPRESSION& = 'addop' TERM SIMPLE_EXPRESSION& | \L 17 | # TERM = FACTOR TERM& 18 | # TERM& = 'mulop' FACTOR TERM& | \L 19 | # FACTOR = 'id' | 'num' | '(' EXPRESSION ')' 20 | # SIGN = '+' | '-' 21 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/TA/test1.txt: -------------------------------------------------------------------------------- 1 | int count ; 2 | while (pass != 10) { 3 | pass = pass + 1 ; 4 | } 5 | if (mnt <= 0) { 6 | count = count + 1.234; 7 | } 8 | else{ 9 | sum = sum + mnt; 10 | } 11 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/TA/test2.txt: -------------------------------------------------------------------------------- 1 | int count ; 2 | while (pass != 10) { 3 | pass = pass + 1 ; 4 | 5 | if (mnt <= 0) ) { 6 | count = count + 1.234; 7 | } 8 | else{ 9 | sum = sum + mnt; 10 | } 11 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_0/actions_output.log: -------------------------------------------------------------------------------- 1 | METHOD_BODY -> STATEMENT_LIST 2 | STATEMENT_LIST -> DECLARATION STATEMENT_LIST' 3 | DECLARATION -> int id ; 4 | match: int 5 | match: id 6 | match: ; 7 | STATEMENT_LIST' -> STATEMENT STATEMENT_LIST' 8 | STATEMENT -> ASSIGNMENT 9 | ASSIGNMENT -> id assign EXPRESSION ; 10 | match: id 11 | match: assign 12 | EXPRESSION -> SIMPLE_EXPRESSION EXPRESSION' 13 | SIMPLE_EXPRESSION -> FACTOR TERM' SIMPLE_EXPRESSION' 14 | FACTOR -> num 15 | match: num 16 | TERM' -> \L 17 | SIMPLE_EXPRESSION' -> \L 18 | EXPRESSION' -> \L 19 | match: ; 20 | STATEMENT_LIST' -> STATEMENT STATEMENT_LIST' 21 | STATEMENT -> IF 22 | IF -> if ( EXPRESSION ) { STATEMENT } else { STATEMENT } 23 | match: if 24 | match: ( 25 | EXPRESSION -> SIMPLE_EXPRESSION EXPRESSION' 26 | SIMPLE_EXPRESSION -> FACTOR TERM' SIMPLE_EXPRESSION' 27 | FACTOR -> id 28 | match: id 29 | TERM' -> \L 30 | SIMPLE_EXPRESSION' -> \L 31 | EXPRESSION' -> relop SIMPLE_EXPRESSION 32 | match: relop 33 | SIMPLE_EXPRESSION -> FACTOR TERM' SIMPLE_EXPRESSION' 34 | FACTOR -> num 35 | match: num 36 | TERM' -> \L 37 | SIMPLE_EXPRESSION' -> \L 38 | match: ) 39 | match: { 40 | STATEMENT -> ASSIGNMENT 41 | ASSIGNMENT -> id assign EXPRESSION ; 42 | match: id 43 | match: assign 44 | EXPRESSION -> SIMPLE_EXPRESSION EXPRESSION' 45 | SIMPLE_EXPRESSION -> FACTOR TERM' SIMPLE_EXPRESSION' 46 | FACTOR -> num 47 | match: num 48 | TERM' -> \L 49 | SIMPLE_EXPRESSION' -> \L 50 | EXPRESSION' -> \L 51 | match: ; 52 | match: } 53 | Error: (missing else) - inserted. 54 | Error: (missing {) - inserted. 55 | SYNCH (pop_stack) 56 | Error: (missing }) - inserted. 57 | STATEMENT_LIST' -> \L 58 | accept 59 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_0/cfg.bnf: -------------------------------------------------------------------------------- 1 | # METHOD_BODY = STATEMENT_LIST 2 | # STATEMENT_LIST = STATEMENT | STATEMENT_LIST STATEMENT 3 | # STATEMENT = DECLARATION | IF | WHILE | ASSIGNMENT 4 | # DECLARATION = PRIMITIVE_TYPE 'id' ';' 5 | # PRIMITIVE_TYPE = 'int' | 'float' 6 | # IF = 'if' '(' EXPRESSION ')' '{' STATEMENT '}' 'else' '{' STATEMENT '}' 7 | # WHILE = 'while' '(' EXPRESSION ')' '{' STATEMENT '}' 8 | # ASSIGNMENT = 'id' 'assign' EXPRESSION ';' 9 | # EXPRESSION = SIMPLE_EXPRESSION | SIMPLE_EXPRESSION 'relop' SIMPLE_EXPRESSION 10 | # SIMPLE_EXPRESSION = TERM | SIGN TERM | SIMPLE_EXPRESSION 'addop' TERM 11 | # TERM = FACTOR | TERM 'mulop' FACTOR 12 | # FACTOR = 'id' | 'num' | '(' EXPRESSION ')' 13 | # SIGN = '+' | '-' 14 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_0/code.txt: -------------------------------------------------------------------------------- 1 | int x; 2 | x = 5; 3 | if (x > 2){ 4 | x = 0; 5 | } 6 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_0/debug_stack.log: -------------------------------------------------------------------------------- 1 | METHOD_BODY 2 | STATEMENT_LIST 3 | DECLARATION STATEMENT_LIST' 4 | int id ; STATEMENT_LIST' 5 | id ; STATEMENT_LIST' 6 | ; STATEMENT_LIST' 7 | STATEMENT_LIST' 8 | STATEMENT STATEMENT_LIST' 9 | ASSIGNMENT STATEMENT_LIST' 10 | id assign EXPRESSION ; STATEMENT_LIST' 11 | assign EXPRESSION ; STATEMENT_LIST' 12 | EXPRESSION ; STATEMENT_LIST' 13 | SIMPLE_EXPRESSION EXPRESSION' ; STATEMENT_LIST' 14 | FACTOR TERM' SIMPLE_EXPRESSION' EXPRESSION' ; STATEMENT_LIST' 15 | num TERM' SIMPLE_EXPRESSION' EXPRESSION' ; STATEMENT_LIST' 16 | TERM' SIMPLE_EXPRESSION' EXPRESSION' ; STATEMENT_LIST' 17 | SIMPLE_EXPRESSION' EXPRESSION' ; STATEMENT_LIST' 18 | EXPRESSION' ; STATEMENT_LIST' 19 | ; STATEMENT_LIST' 20 | STATEMENT_LIST' 21 | STATEMENT STATEMENT_LIST' 22 | IF STATEMENT_LIST' 23 | if ( EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 24 | ( EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 25 | EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 26 | SIMPLE_EXPRESSION EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 27 | FACTOR TERM' SIMPLE_EXPRESSION' EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 28 | id TERM' SIMPLE_EXPRESSION' EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 29 | TERM' SIMPLE_EXPRESSION' EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 30 | SIMPLE_EXPRESSION' EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 31 | EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 32 | relop SIMPLE_EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 33 | SIMPLE_EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 34 | FACTOR TERM' SIMPLE_EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 35 | num TERM' SIMPLE_EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 36 | TERM' SIMPLE_EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 37 | SIMPLE_EXPRESSION' ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 38 | ) { STATEMENT } else { STATEMENT } STATEMENT_LIST' 39 | { STATEMENT } else { STATEMENT } STATEMENT_LIST' 40 | STATEMENT } else { STATEMENT } STATEMENT_LIST' 41 | ASSIGNMENT } else { STATEMENT } STATEMENT_LIST' 42 | id assign EXPRESSION ; } else { STATEMENT } STATEMENT_LIST' 43 | assign EXPRESSION ; } else { STATEMENT } STATEMENT_LIST' 44 | EXPRESSION ; } else { STATEMENT } STATEMENT_LIST' 45 | SIMPLE_EXPRESSION EXPRESSION' ; } else { STATEMENT } STATEMENT_LIST' 46 | FACTOR TERM' SIMPLE_EXPRESSION' EXPRESSION' ; } else { STATEMENT } STATEMENT_LIST' 47 | num TERM' SIMPLE_EXPRESSION' EXPRESSION' ; } else { STATEMENT } STATEMENT_LIST' 48 | TERM' SIMPLE_EXPRESSION' EXPRESSION' ; } else { STATEMENT } STATEMENT_LIST' 49 | SIMPLE_EXPRESSION' EXPRESSION' ; } else { STATEMENT } STATEMENT_LIST' 50 | EXPRESSION' ; } else { STATEMENT } STATEMENT_LIST' 51 | ; } else { STATEMENT } STATEMENT_LIST' 52 | } else { STATEMENT } STATEMENT_LIST' 53 | else { STATEMENT } STATEMENT_LIST' 54 | { STATEMENT } STATEMENT_LIST' 55 | STATEMENT } STATEMENT_LIST' 56 | } STATEMENT_LIST' 57 | STATEMENT_LIST' 58 | 59 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_0/rules.txt: -------------------------------------------------------------------------------- 1 | letter = a-z | A-Z 2 | digit = 0-9 3 | id: letter (letter|digit)* 4 | digits = digit+ 5 | {boolean int float} 6 | num: digit+ | digit+ . digits ( \L | E digits) 7 | relop: \=\= | !\= | > | >\= | < | <\= 8 | assign: \= 9 | { if else while } 10 | [ ; , \( \) { } ] 11 | addop: \+ | \- 12 | mulop: \* | / -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_0/transition_table.txt: -------------------------------------------------------------------------------- 1 | Total States: 44 2 | Start State(s): {0} 3 | Acceptance State(s): 40 4 | {1} ; 5 | {2} relop 6 | {3} assign 7 | {4} addop 8 | {5} , 9 | {6} mulop 10 | {8} ( 11 | {9} ) 12 | {10} id 13 | {11} id 14 | {12} id 15 | {13} id 16 | {14} id 17 | {15} id 18 | {16} { 19 | {17} } 20 | {18} num 21 | {19} relop 22 | {20} id 23 | {21} id 24 | {22} id 25 | {23} if 26 | {24} id 27 | {25} id 28 | {27} id 29 | {28} id 30 | {29} id 31 | {30} int 32 | {31} id 33 | {32} num 34 | {33} id 35 | {34} else 36 | {35} id 37 | {36} id 38 | {38} id 39 | {39} float 40 | {40} while 41 | {41} num 42 | {42} id 43 | {43} boolean 44 | State ! ( ) * + , - . / ; < = > E a b e f h i l n o s t w { } 0-9 A-Z a-z 45 | {0} {7} {8} {9} {6} {4} {5} {4} {-} {6} {1} {2} {3} {2} {10} {10} {11} {12} {13} {10} {14} {10} {10} {10} {10} {10} {15} {16} {17} {18} {10} {10} 46 | {1} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 47 | {2} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 48 | {3} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 49 | {4} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 50 | {5} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 51 | {6} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 52 | {7} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 53 | {8} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 54 | {9} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 55 | {10} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 56 | {11} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {20} {10} {10} {10} {-} {-} {10} {10} {10} 57 | {12} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {21} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 58 | {13} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {22} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 59 | {14} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {23} {10} {10} {10} {24} {10} {10} {10} {10} {-} {-} {10} {10} {10} 60 | {15} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {25} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 61 | {16} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 62 | {17} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 63 | {18} {-} {-} {-} {-} {-} {-} {-} {26} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {18} {-} {-} 64 | {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 65 | {20} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {27} {10} {10} {10} {-} {-} {10} {10} {10} 66 | {21} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {28} {10} {10} {-} {-} {10} {10} {10} 67 | {22} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {29} {10} {10} {10} {-} {-} {10} {10} {10} 68 | {23} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 69 | {24} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {30} {10} {-} {-} {10} {10} {10} 70 | {25} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {31} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 71 | {26} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {32} {-} {-} 72 | {27} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {33} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 73 | {28} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {34} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 74 | {29} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {35} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 75 | {30} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 76 | {31} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {36} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 77 | {32} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {37} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {32} {-} {-} 78 | {33} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {38} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 79 | {34} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 80 | {35} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {39} {10} {-} {-} {10} {10} {10} 81 | {36} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {40} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 82 | {37} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {41} {-} {-} 83 | {38} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {42} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 84 | {39} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 85 | {40} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 86 | {41} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {41} {-} {-} 87 | {42} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {43} {10} {10} {10} {10} {-} {-} {10} {10} {10} 88 | {43} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 89 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_1/actions_output.log: -------------------------------------------------------------------------------- 1 | METHOD_BODY -> STATEMENT_LIST 2 | STATEMENT_LIST -> STATEMENT STATEMENT_LIST1 3 | STATEMENT -> DECLARATION 4 | DECLARATION -> PRIMITIVE_TYPE id ; 5 | PRIMITIVE_TYPE -> int 6 | match: int 7 | match: id 8 | match: ; 9 | STATEMENT_LIST1 -> STATEMENT STATEMENT_LIST1 10 | STATEMENT -> ASSIGNMENT 11 | ASSIGNMENT -> id assign EXPRESSION ; 12 | match: id 13 | match: assign 14 | EXPRESSION -> SIMPLE_EXPRESSION EXPRESSION1 15 | SIMPLE_EXPRESSION -> TERM SIMPLE_EXPRESSION1 16 | TERM -> FACTOR TERM1 17 | FACTOR -> num 18 | match: num 19 | TERM1 -> \L 20 | SIMPLE_EXPRESSION1 -> \L 21 | EXPRESSION1 -> \L 22 | match: ; 23 | STATEMENT_LIST1 -> STATEMENT STATEMENT_LIST1 24 | STATEMENT -> IF 25 | IF -> if ( EXPRESSION ) { STATEMENT } else { STATEMENT } 26 | match: if 27 | match: ( 28 | EXPRESSION -> SIMPLE_EXPRESSION EXPRESSION1 29 | SIMPLE_EXPRESSION -> TERM SIMPLE_EXPRESSION1 30 | TERM -> FACTOR TERM1 31 | FACTOR -> id 32 | match: id 33 | TERM1 -> \L 34 | SIMPLE_EXPRESSION1 -> \L 35 | EXPRESSION1 -> relop SIMPLE_EXPRESSION 36 | match: relop 37 | SIMPLE_EXPRESSION -> TERM SIMPLE_EXPRESSION1 38 | TERM -> FACTOR TERM1 39 | FACTOR -> num 40 | match: num 41 | TERM1 -> \L 42 | SIMPLE_EXPRESSION1 -> \L 43 | match: ) 44 | match: { 45 | STATEMENT -> ASSIGNMENT 46 | ASSIGNMENT -> id assign EXPRESSION ; 47 | match: id 48 | match: assign 49 | EXPRESSION -> SIMPLE_EXPRESSION EXPRESSION1 50 | SIMPLE_EXPRESSION -> TERM SIMPLE_EXPRESSION1 51 | TERM -> FACTOR TERM1 52 | FACTOR -> num 53 | match: num 54 | TERM1 -> \L 55 | SIMPLE_EXPRESSION1 -> \L 56 | EXPRESSION1 -> \L 57 | match: ; 58 | match: } 59 | Error: (missing else) - inserted. 60 | Error: (missing {) - inserted. 61 | SYNCH (pop_stack) 62 | Error: (missing }) - inserted. 63 | STATEMENT_LIST1 -> \L 64 | accept 65 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_1/code.txt: -------------------------------------------------------------------------------- 1 | int x; 2 | x = 5; 3 | if (x > 2){ 4 | x = 0; 5 | } 6 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_1/debug_stack.log: -------------------------------------------------------------------------------- 1 | METHOD_BODY 2 | STATEMENT_LIST 3 | STATEMENT STATEMENT_LIST1 4 | DECLARATION STATEMENT_LIST1 5 | PRIMITIVE_TYPE id ; STATEMENT_LIST1 6 | int id ; STATEMENT_LIST1 7 | id ; STATEMENT_LIST1 8 | ; STATEMENT_LIST1 9 | STATEMENT_LIST1 10 | STATEMENT STATEMENT_LIST1 11 | ASSIGNMENT STATEMENT_LIST1 12 | id assign EXPRESSION ; STATEMENT_LIST1 13 | assign EXPRESSION ; STATEMENT_LIST1 14 | EXPRESSION ; STATEMENT_LIST1 15 | SIMPLE_EXPRESSION EXPRESSION1 ; STATEMENT_LIST1 16 | TERM SIMPLE_EXPRESSION1 EXPRESSION1 ; STATEMENT_LIST1 17 | FACTOR TERM1 SIMPLE_EXPRESSION1 EXPRESSION1 ; STATEMENT_LIST1 18 | num TERM1 SIMPLE_EXPRESSION1 EXPRESSION1 ; STATEMENT_LIST1 19 | TERM1 SIMPLE_EXPRESSION1 EXPRESSION1 ; STATEMENT_LIST1 20 | SIMPLE_EXPRESSION1 EXPRESSION1 ; STATEMENT_LIST1 21 | EXPRESSION1 ; STATEMENT_LIST1 22 | ; STATEMENT_LIST1 23 | STATEMENT_LIST1 24 | STATEMENT STATEMENT_LIST1 25 | IF STATEMENT_LIST1 26 | if ( EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 27 | ( EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 28 | EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 29 | SIMPLE_EXPRESSION EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 30 | TERM SIMPLE_EXPRESSION1 EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 31 | FACTOR TERM1 SIMPLE_EXPRESSION1 EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 32 | id TERM1 SIMPLE_EXPRESSION1 EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 33 | TERM1 SIMPLE_EXPRESSION1 EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 34 | SIMPLE_EXPRESSION1 EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 35 | EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 36 | relop SIMPLE_EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 37 | SIMPLE_EXPRESSION ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 38 | TERM SIMPLE_EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 39 | FACTOR TERM1 SIMPLE_EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 40 | num TERM1 SIMPLE_EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 41 | TERM1 SIMPLE_EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 42 | SIMPLE_EXPRESSION1 ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 43 | ) { STATEMENT } else { STATEMENT } STATEMENT_LIST1 44 | { STATEMENT } else { STATEMENT } STATEMENT_LIST1 45 | STATEMENT } else { STATEMENT } STATEMENT_LIST1 46 | ASSIGNMENT } else { STATEMENT } STATEMENT_LIST1 47 | id assign EXPRESSION ; } else { STATEMENT } STATEMENT_LIST1 48 | assign EXPRESSION ; } else { STATEMENT } STATEMENT_LIST1 49 | EXPRESSION ; } else { STATEMENT } STATEMENT_LIST1 50 | SIMPLE_EXPRESSION EXPRESSION1 ; } else { STATEMENT } STATEMENT_LIST1 51 | TERM SIMPLE_EXPRESSION1 EXPRESSION1 ; } else { STATEMENT } STATEMENT_LIST1 52 | FACTOR TERM1 SIMPLE_EXPRESSION1 EXPRESSION1 ; } else { STATEMENT } STATEMENT_LIST1 53 | num TERM1 SIMPLE_EXPRESSION1 EXPRESSION1 ; } else { STATEMENT } STATEMENT_LIST1 54 | TERM1 SIMPLE_EXPRESSION1 EXPRESSION1 ; } else { STATEMENT } STATEMENT_LIST1 55 | SIMPLE_EXPRESSION1 EXPRESSION1 ; } else { STATEMENT } STATEMENT_LIST1 56 | EXPRESSION1 ; } else { STATEMENT } STATEMENT_LIST1 57 | ; } else { STATEMENT } STATEMENT_LIST1 58 | } else { STATEMENT } STATEMENT_LIST1 59 | else { STATEMENT } STATEMENT_LIST1 60 | { STATEMENT } STATEMENT_LIST1 61 | STATEMENT } STATEMENT_LIST1 62 | } STATEMENT_LIST1 63 | STATEMENT_LIST1 64 | 65 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_1/ll1_cfg.bnf: -------------------------------------------------------------------------------- 1 | # METHOD_BODY = STATEMENT_LIST 2 | # STATEMENT_LIST = STATEMENT STATEMENT_LIST1 3 | # STATEMENT_LIST1 = STATEMENT STATEMENT_LIST1 | '\L' 4 | # STATEMENT = DECLARATION | IF | WHILE | ASSIGNMENT 5 | # DECLARATION = PRIMITIVE_TYPE 'id' ';' 6 | # PRIMITIVE_TYPE = 'int' | 'float' 7 | # IF = 'if' '(' EXPRESSION ')' '{' STATEMENT '}' 'else' '{' STATEMENT '}' 8 | # WHILE = 'while' '(' EXPRESSION ')' '{' STATEMENT '}' 9 | # ASSIGNMENT = 'id' 'assign' EXPRESSION ';' 10 | # EXPRESSION = SIMPLE_EXPRESSION EXPRESSION1 11 | # EXPRESSION1 = 'relop' SIMPLE_EXPRESSION | '\L' 12 | # SIMPLE_EXPRESSION = TERM SIMPLE_EXPRESSION1 | SIGN TERM SIMPLE_EXPRESSION1 13 | # SIMPLE_EXPRESSION1 = 'addop' TERM SIMPLE_EXPRESSION1 | '\L' 14 | # TERM = FACTOR TERM1 15 | # TERM1 = 'mulop' FACTOR TERM1 | '\L' 16 | # FACTOR = 'id' | 'num' | '(' EXPRESSION ')' 17 | # SIGN = '+' | '-' 18 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_1/rules.txt: -------------------------------------------------------------------------------- 1 | letter = a-z | A-Z 2 | digit = 0-9 3 | id: letter (letter|digit)* 4 | digits = digit+ 5 | {boolean int float} 6 | num: digit+ | digit+ . digits ( \L | E digits) 7 | relop: \=\= | !\= | > | >\= | < | <\= 8 | assign: \= 9 | { if else while } 10 | [ ; , \( \) { } ] 11 | addop: \+ | \- 12 | mulop: \* | / -------------------------------------------------------------------------------- /tests/syntax_analyzer/regression/test_1/transition_table.txt: -------------------------------------------------------------------------------- 1 | Total States: 44 2 | Start State(s): {0} 3 | Acceptance State(s): 40 4 | {1} ; 5 | {2} relop 6 | {3} assign 7 | {4} addop 8 | {5} , 9 | {6} mulop 10 | {8} ( 11 | {9} ) 12 | {10} id 13 | {11} id 14 | {12} id 15 | {13} id 16 | {14} id 17 | {15} id 18 | {16} { 19 | {17} } 20 | {18} num 21 | {19} relop 22 | {20} id 23 | {21} id 24 | {22} id 25 | {23} if 26 | {24} id 27 | {25} id 28 | {27} id 29 | {28} id 30 | {29} id 31 | {30} int 32 | {31} id 33 | {32} num 34 | {33} id 35 | {34} else 36 | {35} id 37 | {36} id 38 | {38} id 39 | {39} float 40 | {40} while 41 | {41} num 42 | {42} id 43 | {43} boolean 44 | State ! ( ) * + , - . / ; < = > E a b e f h i l n o s t w { } 0-9 A-Z a-z 45 | {0} {7} {8} {9} {6} {4} {5} {4} {-} {6} {1} {2} {3} {2} {10} {10} {11} {12} {13} {10} {14} {10} {10} {10} {10} {10} {15} {16} {17} {18} {10} {10} 46 | {1} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 47 | {2} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 48 | {3} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 49 | {4} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 50 | {5} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 51 | {6} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 52 | {7} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 53 | {8} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 54 | {9} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 55 | {10} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 56 | {11} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {20} {10} {10} {10} {-} {-} {10} {10} {10} 57 | {12} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {21} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 58 | {13} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {22} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 59 | {14} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {23} {10} {10} {10} {24} {10} {10} {10} {10} {-} {-} {10} {10} {10} 60 | {15} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {25} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 61 | {16} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 62 | {17} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 63 | {18} {-} {-} {-} {-} {-} {-} {-} {26} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {18} {-} {-} 64 | {19} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} 65 | {20} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {27} {10} {10} {10} {-} {-} {10} {10} {10} 66 | {21} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {28} {10} {10} {-} {-} {10} {10} {10} 67 | {22} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {29} {10} {10} {10} {-} {-} {10} {10} {10} 68 | {23} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 69 | {24} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {30} {10} {-} {-} {10} {10} {10} 70 | {25} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {31} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 71 | {26} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {32} {-} {-} 72 | {27} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {33} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 73 | {28} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {34} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 74 | {29} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {35} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 75 | {30} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 76 | {31} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {36} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 77 | {32} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {37} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {32} {-} {-} 78 | {33} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {38} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 79 | {34} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 80 | {35} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {39} {10} {-} {-} {10} {10} {10} 81 | {36} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {40} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 82 | {37} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {41} {-} {-} 83 | {38} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {42} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 84 | {39} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 85 | {40} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 86 | {41} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {41} {-} {-} 87 | {42} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {43} {10} {10} {10} {10} {-} {-} {10} {10} {10} 88 | {43} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {-} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {10} {-} {-} {10} {10} {10} 89 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/cfg_single_line_ll1.bnf: -------------------------------------------------------------------------------- 1 | # METHOD_BODY = STATEMENT_LIST 2 | # STATEMENT_LIST = STATEMENT STATEMENT_LIST1 3 | # STATEMENT_LIST1 = STATEMENT STATEMENT_LIST1 | '\L' 4 | # STATEMENT = DECLARATION | IF | WHILE | ASSIGNMENT 5 | # DECLARATION = PRIMITIVE_TYPE 'id' ';' 6 | # PRIMITIVE_TYPE = 'int' | 'float' 7 | # IF = 'if' '(' EXPRESSION ')' '{' STATEMENT '}' 'else' '{' STATEMENT '}' 8 | # WHILE = 'while' '(' EXPRESSION ')' '{' STATEMENT '}' 9 | # ASSIGNMENT = 'id' 'assign' EXPRESSION ';' 10 | # EXPRESSION = SIMPLE_EXPRESSION EXPRESSION1 11 | # EXPRESSION1 = 'relop' SIMPLE_EXPRESSION | '\L' 12 | # SIMPLE_EXPRESSION = TERM SIMPLE_EXPRESSION1 | SIGN TERM SIMPLE_EXPRESSION1 13 | # SIMPLE_EXPRESSION1 = 'addop' TERM SIMPLE_EXPRESSION1 | '\L' 14 | # TERM = FACTOR TERM1 15 | # TERM1 = 'mulop' FACTOR TERM1 | '\L' 16 | # FACTOR = 'id' | 'num' | '(' EXPRESSION ')' 17 | # SIGN = '+' | '-' 18 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/complex_left_rec.bnf: -------------------------------------------------------------------------------- 1 | # E = T E E '+' T 2 | # E = T E 3 | # E = T 'id' 4 | # E = E 'id' 5 | # T = T E T '*' F 6 | # T = E F 7 | # T = '\L' 8 | # F = '(' E ')' 9 | # F = 'id' 10 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/first_follow_test.bnf: -------------------------------------------------------------------------------- 1 | # S = R T 2 | # R = 's' U R 'b' | \L 3 | # U = 'u' U | \L 4 | # V = 'v' V | \L 5 | # T = V 't' T | \L -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/general_test.bnf: -------------------------------------------------------------------------------- 1 | # F = F B a | c D S | c -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/invalid_ll1.bnf: -------------------------------------------------------------------------------- 1 | # S = 'i' C 't' S E | 'a' 2 | # E = 'e' S | '\L' 3 | # C = 'b' 4 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/left_rec_left_fact.bnf: -------------------------------------------------------------------------------- 1 | # E = E T 2 | # E = E 'id' 3 | # E = 'int' 4 | # T = ',' 'id' 5 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/ll1_cfg_with_synch.bnf: -------------------------------------------------------------------------------- 1 | # S = A 'b' S | 'e' | '\L' 2 | # A = 'a' | 'c' A 'd' 3 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/ll_1_tests.cpp: -------------------------------------------------------------------------------- 1 | #include "../../lib/catch.hpp" 2 | #include "../../../syntax_analyzer/context_free_grammar/cfg.h" 3 | 4 | TEST_CASE ("left factoring test 1") { 5 | // grammar: 6 | // S -> if E then S | if E then S else S | a 7 | // E -> b 8 | 9 | // result: 10 | // S -> if E then S S' | a 11 | // S -> else S | eps 12 | // E -> b 13 | } 14 | 15 | 16 | TEST_CASE ("left factoring test 2") { 17 | // grammar: 18 | // A -> aX | aY 19 | 20 | // result: 21 | // A -> aZ 22 | // Z -> X | Y 23 | } 24 | 25 | 26 | TEST_CASE ("left factoring test 3") { 27 | // grammar: 28 | 29 | // X -> aE | IXE | (X)E 30 | // E -> IE | BXE | ϵ 31 | // I -> ++ | -- 32 | // B -> + | - | ϵ 33 | 34 | // result: 35 | // steps: 36 | // X -> aE ∣ ++XE ∣ --XE ∣ (X)E 37 | // E -> ++E ∣ --E ∣ +XE ∣ -XE ∣ XE | eps // sub x 38 | 39 | // X -> aE ∣ ++XE ∣ --XE ∣ (X)E 40 | // E -> ++E ∣ --E ∣ +XE ∣ -XE ∣ aEE ∣ ++XEE ∣ --XEE ∣ (X)EE | eps 41 | 42 | // X -> aE ∣ ++XE ∣ --XE ∣ (X)E 43 | // E -> +E1 | -E2 | aEE | (X)EE | eps 44 | // E1 -> +E | XE | +XEE 45 | // E2 -> -E | XE | -XEE 46 | // we can sub X again and keep doing so we will find that this grammar is not even LL 47 | } 48 | 49 | TEST_CASE ("longest common prefix test 1") { 50 | 51 | std::vector prod1_vec; 52 | std::vector prod2_vec; 53 | std::vector prod3_vec; 54 | 55 | // LHS NON_TERMINALS SYMBOLS 56 | cfg_symbol S("S", NON_TERMINAL); 57 | 58 | // TERMINALS SYMBOLS 59 | cfg_symbol plus("+", TERMINAL); 60 | cfg_symbol id("id", TERMINAL); 61 | 62 | cfg_symbol term1("term1", TERMINAL); 63 | cfg_symbol term2("term2", TERMINAL); 64 | cfg_symbol term3("term3", TERMINAL); 65 | 66 | // FILL THE PRODUCTIONS VECTORS ********************************** 67 | // S -> term1 term2 term3 | term1 term2 term3 | term1 term2 id 68 | 69 | prod1_vec.push_back(term1); 70 | prod1_vec.push_back(term2); 71 | prod1_vec.push_back(term3); 72 | 73 | prod2_vec.push_back(term1); 74 | prod2_vec.push_back(term2); 75 | prod2_vec.push_back(term3); 76 | 77 | prod3_vec.push_back(term1); 78 | prod3_vec.push_back(term2); 79 | prod3_vec.push_back(id); 80 | 81 | // **************************************************************** 82 | 83 | // CONSTRUCT PRODUCTION 84 | cfg_production prod1(S, prod1_vec); 85 | cfg_production prod2(S, prod2_vec); 86 | cfg_production prod3(S, prod3_vec); 87 | 88 | 89 | std::vector prods; 90 | prods.push_back(prod1); 91 | prods.push_back(prod2); 92 | prods.push_back(prod3); 93 | 94 | REQUIRE(longest_common_prefix(prods) == 2); 95 | } 96 | 97 | TEST_CASE ("longest common prefix test 2") { 98 | 99 | std::vector prod1_vec; 100 | std::vector prod2_vec; 101 | std::vector prod3_vec; 102 | 103 | // LHS NON_TERMINALS SYMBOLS 104 | cfg_symbol S("S", NON_TERMINAL); 105 | 106 | // TERMINALS SYMBOLS 107 | cfg_symbol plus("+", TERMINAL); 108 | cfg_symbol id("id", TERMINAL); 109 | 110 | cfg_symbol term1("term1", TERMINAL); 111 | cfg_symbol term2("term2", TERMINAL); 112 | cfg_symbol term3("term3", TERMINAL); 113 | 114 | // FILL THE PRODUCTIONS VECTORS ********************************** 115 | // S -> term1 term2 term3 | term1 term2 term3 | term1 term3 id 116 | 117 | prod1_vec.push_back(term1); 118 | prod1_vec.push_back(term2); 119 | prod1_vec.push_back(term3); 120 | 121 | prod2_vec.push_back(term1); 122 | prod2_vec.push_back(term2); 123 | prod2_vec.push_back(term3); 124 | 125 | prod3_vec.push_back(term1); 126 | prod3_vec.push_back(term3); 127 | prod3_vec.push_back(id); 128 | 129 | // **************************************************************** 130 | 131 | // CONSTRUCT PRODUCTION 132 | cfg_production prod1(S, prod1_vec); 133 | cfg_production prod2(S, prod2_vec); 134 | cfg_production prod3(S, prod3_vec); 135 | 136 | 137 | std::vector prods; 138 | prods.push_back(prod1); 139 | prods.push_back(prod2); 140 | prods.push_back(prod3); 141 | 142 | REQUIRE(longest_common_prefix(prods) == 1); 143 | } 144 | 145 | TEST_CASE ("longest common prefix test 3") { 146 | 147 | std::vector prod1_vec; 148 | std::vector prod2_vec; 149 | std::vector prod3_vec; 150 | 151 | // LHS NON_TERMINALS SYMBOLS 152 | cfg_symbol S("S", NON_TERMINAL); 153 | 154 | // TERMINALS SYMBOLS 155 | cfg_symbol plus("+", TERMINAL); 156 | cfg_symbol id("id", TERMINAL); 157 | 158 | cfg_symbol term1("term1", TERMINAL); 159 | cfg_symbol term2("term2", TERMINAL); 160 | cfg_symbol term3("term3", TERMINAL); 161 | 162 | // FILL THE PRODUCTIONS VECTORS ********************************** 163 | // S -> term1 term2 term3 | term2 term3 | term1 term3 id 164 | 165 | prod1_vec.push_back(term1); 166 | prod1_vec.push_back(term2); 167 | prod1_vec.push_back(term3); 168 | 169 | prod2_vec.push_back(term2); 170 | prod2_vec.push_back(term3); 171 | 172 | prod3_vec.push_back(term1); 173 | prod3_vec.push_back(term3); 174 | prod3_vec.push_back(id); 175 | 176 | // **************************************************************** 177 | 178 | // CONSTRUCT PRODUCTION 179 | cfg_production prod1(S, prod1_vec); 180 | cfg_production prod2(S, prod2_vec); 181 | cfg_production prod3(S, prod3_vec); 182 | 183 | 184 | std::vector prods; 185 | prods.push_back(prod1); 186 | prods.push_back(prod2); 187 | prods.push_back(prod3); 188 | 189 | REQUIRE(longest_common_prefix(prods) == 0); 190 | } -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/ps_cfg_multi_line.bnf: -------------------------------------------------------------------------------- 1 | # METHOD_BODY = STATEMENT_LIST 2 | # STATEMENT_LIST = STATEMENT 3 | | STATEMENT_LIST STATEMENT 4 | # STATEMENT = DECLARATION 5 | | IF 6 | | WHILE 7 | | ASSIGNMENT 8 | # DECLARATION = PRIMITIVE_TYPE 9 | 'id' 10 | ';' 11 | # PRIMITIVE_TYPE = 'int' 12 | | 'float' 13 | | \L 14 | # IF = 'if' '(' EXPRESSION ')' '{' STATEMENT '}' 'else' '{' STATEMENT '}' 15 | # WHILE = 'while' 16 | '(' 17 | EXPRESSION ')' '{' STATEMENT '}' 18 | # ASSIGNMENT = 'id' '=' EXPRESSION ';' 19 | # EXPRESSION = SIMPLE_EXPRESSION 20 | | SIMPLE_EXPRESSION 'relop' SIMPLE_EXPRESSION 21 | # SIMPLE_EXPRESSION = TERM 22 | | SIGN TERM 23 | | SIMPLE_EXPRESSION 'addop' TERM 24 | # TERM = FACTOR 25 | | TERM 'mulop' FACTOR 26 | # FACTOR = 'id' 27 | | 'num' 28 | | '(' EXPRESSION ')' 29 | # SIGN = '+' 30 | | \L 31 | # SIGN = '-' 32 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/ps_cfg_single_line.bnf: -------------------------------------------------------------------------------- 1 | # METHOD_BODY = STATEMENT_LIST 2 | # STATEMENT_LIST = STATEMENT | STATEMENT_LIST STATEMENT 3 | # STATEMENT = DECLARATION | IF | WHILE | ASSIGNMENT 4 | # DECLARATION = PRIMITIVE_TYPE 'id' ';' 5 | # PRIMITIVE_TYPE = 'int' | 'float' 6 | # IF = 'if' '(' EXPRESSION ')' '{' STATEMENT '}' 'else' '{' STATEMENT '}' 7 | # WHILE = 'while' '(' EXPRESSION ')' '{' STATEMENT '}' 8 | # ASSIGNMENT = 'id' '=' EXPRESSION ';' 9 | # EXPRESSION = SIMPLE_EXPRESSION | SIMPLE_EXPRESSION 'relop' SIMPLE_EXPRESSION 10 | # SIMPLE_EXPRESSION = TERM | SIGN TERM | SIMPLE_EXPRESSION 'addop' TERM 11 | # TERM = FACTOR | TERM 'mulop' FACTOR 12 | # FACTOR = 'id' | 'num' | '(' EXPRESSION ')' 13 | # SIGN = '+' | '-' 14 | -------------------------------------------------------------------------------- /tests/syntax_analyzer/unit/ready_ll1_cfg.bnf: -------------------------------------------------------------------------------- 1 | # E = T E' 2 | # E' = '+' T E' | '\L' 3 | # T = F T' 4 | # T' = '*' F T' | '\L' 5 | # F = '(' E ')' | 'id' 6 | --------------------------------------------------------------------------------