├── .clang-format ├── .clang-tidy ├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ └── jucc-issue-template.md ├── pull_request_template.md └── workflows │ └── build_tests.yml ├── .gitignore ├── CMakeLists.txt ├── Dockerfile ├── LICENSE ├── README.md ├── benchmark ├── README.md ├── input │ ├── benchmark_grammar.g │ └── in1.cc ├── main.cpp └── main │ └── jucc_benchmark.cpp ├── build_support ├── benchmark_CMakeLists.txt.in ├── clang_format_exclusions.txt ├── gtest_CMakeLists.txt.in ├── nlohmann_json_CMakeLists.txt.in ├── run_clang_format.py ├── run_clang_tidy.py └── run_clang_tidy_extra.py ├── codecov.yml ├── docs ├── README.md ├── cpp_guidelines.md ├── cpp_guidelines_code_style.md ├── tech_clangtools.md ├── tech_docker.md └── tech_git.md ├── grammar.g ├── script └── installation │ └── packages.sh ├── server ├── package.json ├── public │ ├── LICENSE │ ├── README.md │ ├── Treant.css │ ├── Treant.js │ ├── favicon.ico │ ├── jquery.easing.js │ ├── jquery.min.js │ ├── jquery.mousewheel.js │ ├── package.json │ ├── perfect-scrollbar.css │ ├── perfect-scrollbar.js │ ├── raphael.js │ └── tree.css ├── server.js └── views │ └── index.ejs ├── src ├── grammar │ └── grammar.cpp ├── include │ ├── grammar │ │ └── grammar.h │ ├── lexer │ │ └── lexer.h │ ├── main │ │ └── jucc.h │ ├── parser │ │ ├── parser.h │ │ └── parsing_table.h │ ├── symbol_table │ │ └── symbol_table.h │ └── utils │ │ ├── first_follow.h │ │ ├── left_factoring.h │ │ ├── left_recursion.h │ │ ├── trie │ │ └── memory_efficient_trie.h │ │ └── utils.h ├── lexer │ └── lexer.cpp ├── main │ ├── jucc.cpp │ └── main.cpp ├── parser │ ├── parser.cpp │ └── parsing_table.cpp ├── symbol_table │ └── symbol_table.cpp └── utils │ ├── first_follow.cpp │ ├── left_factoring.cpp │ ├── left_recursion.cpp │ ├── trie │ └── memory_efficient_trie.cpp │ └── utils.cpp ├── test ├── README.md ├── grammar │ ├── grammar_test.cpp │ ├── grammar_test_0.g │ ├── grammar_test_1.g │ ├── grammar_test_10.g │ ├── grammar_test_11.g │ ├── grammar_test_12.g │ ├── grammar_test_13.g │ ├── grammar_test_14.g │ ├── grammar_test_15.g │ ├── grammar_test_16.g │ ├── grammar_test_17.g │ ├── grammar_test_18.g │ ├── grammar_test_2.g │ ├── grammar_test_3.g │ ├── grammar_test_4.g │ ├── grammar_test_5.g │ ├── grammar_test_6.g │ ├── grammar_test_7.g │ ├── grammar_test_8.g │ └── grammar_test_9.g ├── lexer │ ├── arithmetic.txt │ ├── comments.txt │ ├── input.txt │ ├── input2.txt │ ├── input3.txt │ ├── input_err1.txt │ ├── input_err2.txt │ ├── lexer_test.cpp │ └── scope_error.txt ├── main.cpp ├── main │ └── jucc_test.cpp ├── parser │ ├── grammar.g │ ├── parser_test.cpp │ └── parsing_table_test.cpp ├── symbol_table │ └── symbol_table_test.cpp └── utils │ ├── trie │ └── trie_test.cpp │ └── utils_test.cpp └── third_party └── README.md /.clang-format: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | # 19 | BasedOnStyle: Google 20 | DerivePointerAlignment: false 21 | PointerAlignment: Right 22 | ColumnLimit: 120 23 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | # 18 | --- 19 | Checks: ' 20 | bugprone-*, 21 | clang-analyzer-*, 22 | google-*, 23 | modernize-*, 24 | performance-*, 25 | portability-*, 26 | readability-*, 27 | -modernize-avoid-c-arrays, 28 | -modernize-use-trailing-return-type, 29 | -readability-magic-numbers, 30 | ' 31 | CheckOptions: 32 | - { key: readability-identifier-naming.ClassCase, value: CamelCase } 33 | - { key: readability-identifier-naming.EnumCase, value: CamelCase } 34 | - { key: readability-identifier-naming.FunctionCase, value: CamelCase } 35 | - { key: readability-identifier-naming.GlobalConstantCase, value: UPPER_CASE } 36 | - { key: readability-identifier-naming.MemberCase, value: lower_case } 37 | - { key: readability-identifier-naming.MemberSuffix, value: _ } 38 | - { key: readability-identifier-naming.NamespaceCase, value: lower_case } 39 | - { key: readability-identifier-naming.StructCase, value: CamelCase } 40 | - { key: readability-identifier-naming.UnionCase, value: CamelCase } 41 | - { key: readability-identifier-naming.VariableCase, value: lower_case } 42 | 43 | # Turn all the warnings from the checks above into errors. 44 | WarningsAsErrors: '*' 45 | HeaderFilterRegex: '(benchmark|src|test|util)/include' 46 | AnalyzeTemporaryDtors: true 47 | 48 | #### Disabled checks and why: ##### 49 | # 50 | # -modernize-avoid-c-arrays, 51 | # Not feasible in the storage layer 52 | # -modernize-use-trailing-return-type 53 | # gtest issues 54 | # -readability-magic-numbers, 55 | # Blows up in tests. 56 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Don't send any build context to Docker. 2 | 3 | # additional 4 | .gitignore 5 | Dockerfile 6 | LICENSE 7 | README.md 8 | 9 | # build dir 10 | build 11 | cmake-build-debug/ 12 | 13 | # configs 14 | .idea 15 | .vscode 16 | 17 | # pycache 18 | __pycache__ 19 | *.pyc 20 | 21 | # Prerequisites 22 | *.d 23 | 24 | # Compiled Object files 25 | *.slo 26 | *.lo 27 | *.o 28 | *.obj 29 | 30 | # Precompiled Headers 31 | *.gch 32 | *.pch 33 | 34 | # Compiled Dynamic libraries 35 | *.so 36 | *.dylib 37 | *.dll 38 | 39 | # Fortran module files 40 | *.mod 41 | *.smod 42 | 43 | # Compiled Static libraries 44 | *.lai 45 | *.la 46 | *.a 47 | *.lib 48 | 49 | # Executables 50 | *.exe 51 | *.out 52 | *.app 53 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/jucc-issue-template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: JuCC Issue Template 3 | about: Please follow this template when creating an issue for JuCC! 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Welcome to the issue tracker for **JuCC**! We're excited that you're interested in improving our system. Below, please **choose either a Feature Request or Bug Report** and replace the sample text below to describe the issue! Additionally, please choose the appropriate labels on the Github panel. If you wish to and are able to solve the issue, feel free to assign yourself; otherwise we will manage this later! 11 | 12 | # Feature Request 13 | ## Summary 14 | Please provide a short summary of the feature you would like implemented. 15 | 16 | ## Solution 17 | If possible, include a description of the desired solution you have in mind. Ideally, a series of steps outlining what is required and a plan to implement would be the most helpful for our developers! 18 | 19 | Feel free to suggest more or raise concerns with the existing items. When we're convinced of the list, we should make an overall project with issues for each item in the list to spread out the tasks and track progress. 20 | 21 | ### Alternatives 22 | If you are aware of any alternatives to the solution you presented, please describe them here! 23 | 24 | ___ 25 | 26 | # Bug Report 27 | **Note**: Before filing a bug report, please make sure to check whether the bug has already been filed. If it has, please do not re-file the report, our developers are already hard at work fixing it! 28 | 29 | ## Summary 30 | Please provide a short summary of the bug observed. 31 | 32 | ## Environment 33 | To address the bug, especially if it environment specific, we need to know what kind of configuration you are running on. Please include the following: 34 | 35 | **OS**: Ubuntu (LTS) 20.04 or macOS 10.14+ (please specify version). 36 | 37 | **Compiler**: GCC 7.0+ or Clang 8.0+. 38 | 39 | **CMake Profile**: `Debug`, `Release`, `FastDebug`, or `Relwithdebinfo`. If exists across all cmake profiles on a platform-compiler combo, then please say so. 40 | 41 | **CI**: If the bug has been observed in CI, please link to the CI build so that the bug can be referenced. Please make sure that if the issue has appeared on a PR branch that the PR itself is not causing the issue. 42 | 43 | ## Steps to Reproduce 44 | Whenever possible, retrace how you came about the problem and describe so here. 45 | 46 | If you have an understanding of why the bug occurred, that would be awesome to include as well! In this case, be as descriptive as you can and provide references to lines of code in the code whenever possible! 47 | 48 | ### Expected Behavior 49 | Fill in the behavior expected from our system, as well as the reference you used. 50 | 51 | ### Actual Behavior 52 | Fill in the behavior you actually observed for our system. 53 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Welcome to the PR tracker for **JuCC**! We're excited that you're interested in improving our system. 2 | 3 | **PRs that do not follow our guidelines** will be immediately closed. In general, you should avoid creating a PR until you are reasonably confident the tests should pass, having tested locally first. 4 | 5 | Please choose the appropriate labels on the Github panel and feel free to assign yourself. Additionally, if your PR solves an open issue, please link the issue on the Github panel. Feel free to assign reviewers to your PR or we will decide who best to assign for a review. 6 | 7 | # Heading 8 | Please choose an appropriate heading for your PR, relevant to the changes you have made. 9 | 10 | ## Description 11 | Please create a description of the issue your PR solves, and how you went about implementing your solution. 12 | 13 | ### Remaining Tasks 14 | Again, you should only create PR once you are reasonably confident you are near completion. However, if there are some tasks still remaining before the PR is ready to merge, please create a checklist to track active progress. 15 | :pushpin: TODOs: 16 | - [x] ~~Stash limit in Parser for pushdown (INVALID)~~ 17 | - [x] Add optional property support 18 | - [ ] Fix memory leaks 19 | 20 | ## Further Work 21 | If your PR unlocked the potential for further improvement, please note them here and create additional issues! Do the same if you discovered bugs in the process of development. 22 | 23 | --- 24 | Here's an empty template to format yourself! 25 | # Heading 26 | 27 | ## Description 28 | 29 | ## Remaining tasks 30 | 31 | - [ ] Foo 32 | - [ ] Bar 33 | - [ ] Baz 34 | 35 | ## Further work 36 | -------------------------------------------------------------------------------- /.github/workflows/build_tests.yml: -------------------------------------------------------------------------------- 1 | name: Build Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | linux-build: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: setup environment 19 | run: | 20 | export DEBIAN_FRONTEND="noninteractive" 21 | mkdir build 22 | sudo apt-get -y update 23 | 24 | - name: installing dependecies 25 | run: | 26 | echo y | sudo ./script/installation/packages.sh all 27 | 28 | - name: build 29 | working-directory: ./build 30 | run: | 31 | cmake -GNinja .. 32 | ninja -j2 33 | 34 | - name: check linting & formatting 35 | working-directory: ./build 36 | run: | 37 | ninja check-format 38 | ninja check-lint 39 | ninja check-clang-tidy 40 | 41 | - name: run unit-tests 42 | working-directory: ./build 43 | run: | 44 | ninja test 45 | 46 | - name: codecov report 47 | working-directory: ./build 48 | run: | 49 | bash <(curl -s https://codecov.io/bash) 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # build dir 2 | build 3 | cmake-build-debug/ 4 | node_modules/ 5 | .DS_Store 6 | package-lock.json 7 | 8 | # configs 9 | .idea 10 | .vscode 11 | 12 | # pycache 13 | __pycache__ 14 | *.pyc 15 | 16 | # Prerequisites 17 | *.d 18 | 19 | # Compiled Object files 20 | *.slo 21 | *.lo 22 | *.o 23 | *.obj 24 | 25 | # Precompiled Headers 26 | *.gch 27 | *.pch 28 | 29 | # Compiled Dynamic libraries 30 | *.so 31 | *.dylib 32 | *.dll 33 | 34 | # Fortran module files 35 | *.mod 36 | *.smod 37 | 38 | # Compiled Static libraries 39 | *.lai 40 | *.la 41 | *.a 42 | *.lib 43 | 44 | # Executables 45 | *.exe 46 | *.out 47 | *.app 48 | 49 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | CMD bash 3 | 4 | # Install Ubuntu packages. 5 | # Please add packages in alphabetical order. 6 | ARG DEBIAN_FRONTEND=noninteractive 7 | 8 | RUN apt-get -y update 9 | RUN apt-get -y install sudo 10 | COPY . /JuCC 11 | WORKDIR /JuCC 12 | 13 | RUN echo y | ./script/installation/packages.sh all 14 | RUN mkdir build 15 | WORKDIR /JuCC/build 16 | RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. 17 | 18 | WORKDIR /JuCC 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JuCC 2 | 3 | JuCC logo 4 | 5 | ----------------- 6 | 7 | ![build](https://github.com/TheSYNcoder/JuCC/actions/workflows/build_tests.yml/badge.svg?branch=main) 8 | [![codecov](https://codecov.io/gh/TheSYNcoder/JuCC/branch/main/graph/badge.svg?token=1EIRMRKODX)](https://codecov.io/gh/TheSYNcoder/JuCC) 9 | 10 | This is the official Jadavpur University Compiler Compiler repository. 11 | 12 | ## Key Features 13 | * Supports a subset of the C language for now. 14 | * Custom grammar files to easily extend the language. 15 | * LL(1) parsing with panic mode error recovery. 16 | * Generates .json parse tree outputs for easy visualization with [Treant.js](https://fperucic.github.io/treant-js/). 17 | * 100% Open Source (Apache-2.0 License) 18 | 19 | ## Quickstart 20 | The JuCC project is built and tested on **Ubuntu 20.04**. 21 | 22 | ``` 23 | $ git clone https://github.com/TheSYNcoder/JuCC 24 | $ cd JuCC 25 | $ sudo ./script/installation/packages.sh 26 | $ cd server 27 | $ npm i 28 | $ cd .. 29 | $ mkdir build 30 | $ cd build 31 | $ cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. 32 | $ ninja jucc 33 | $ ./bin/jucc -g -f -o 34 | ``` 35 | 36 | To run the unit tests provided, 37 | 38 | ``` 39 | $ mkdir build 40 | $ cd build 41 | $ cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. 42 | $ ninja 43 | $ ./bin/jucc_test 44 | ``` 45 | 46 | To run the benchmarks, 47 | Note: `-DCMAKE_BUILD_TYPE=Release` is needed 48 | 49 | ``` 50 | $ mkdir build 51 | $ cd build 52 | $ cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. 53 | $ ninja 54 | $ ./bin/jucc_benchmark 55 | ``` 56 | 57 | Before pushing or making a pull request ( The tests must pass, compulsory !! ) 58 | 59 | ``` 60 | $ ninja 61 | $ ninja check-format 62 | $ ninja check-clang-tidy 63 | $ ninja check-lint 64 | $ ninja test 65 | ``` 66 | 67 | To add a new unit test, make a folder with the same relative path as in the src folder, and define your test. Please refer to [docs](https://github.com/TheSYNcoder/JuCC/tree/main/docs/) for more details about writing tests using the [googletest](https://github.com/google/googletest) framework. 68 | 69 | 70 | Additional Notes: 71 | - If you know what you're doing, install the prerequisite packages from `./script/installation/packages.sh` manually. 72 | 73 | 74 | ## For Developers 75 | 76 | Please see the [docs](https://github.com/TheSYNcoder/JuCC/tree/main/docs/). 77 | 78 | ## Contributing 79 | 80 | Contributions from everyone are welcome! 81 | -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | # Benchmarks Directory 2 | 3 | These are the microbenchmarks for the compiler. We use the [benchmark](https://github.com/google/benchmark) framework. 4 | -------------------------------------------------------------------------------- /benchmark/input/benchmark_grammar.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % , 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | main cin cout 12 | % 13 | 14 | ## Non Terminals 15 | %non_terminals 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | % 25 | 26 | ## Start Symbol 27 | %start 28 | 29 | % 30 | 31 | ## Grammar for the language 32 | %rules 33 | ## Expressions 34 | : identifier 35 | : 36 | : ( ) 37 | : integer_constant 38 | : float_constant 39 | : + 40 | : - 41 | : 42 | : 43 | : 44 | : * 45 | : / 46 | : % 47 | : 48 | : + 49 | : - 50 | : 51 | : cin >> 52 | : cout << 53 | : << 54 | : >> 55 | : 56 | : < 57 | : > 58 | : <= 59 | : >= 60 | : 61 | : == 62 | : != 63 | : 64 | : 65 | 66 | ## Declarations 67 | : ; 68 | : 69 | : , 70 | : EPSILON 71 | : 72 | : = 73 | : void 74 | : int 75 | : float 76 | : 77 | : identifier 78 | : ( ) 79 | : 80 | 81 | ## Statements 82 | : 83 | : 84 | : 85 | : { } 86 | : 87 | : 88 | : EPSILON 89 | : 90 | : 91 | : ; 92 | : ; 93 | : if ( ) 94 | : if ( ) else 95 | 96 | ## Main 97 | : main ( ) 98 | % 99 | -------------------------------------------------------------------------------- /benchmark/input/in1.cc: -------------------------------------------------------------------------------- 1 | // input file for benchmarking 2 | 3 | int main() { 4 | int x, y; 5 | cin >> x >> y; 6 | if (x != 0) { 7 | if (y > 0) { 8 | cout << y; 9 | } else { 10 | cout << -y; 11 | } 12 | } 13 | float z = 1 + 2 + 3 + 1000/ 50 * 23.2 * (x * y * 10); 14 | cout << x << y << z; 15 | 16 | // other stuff for benchmarking 17 | float z0 = 1 + 2 + 3 + 1000/ 50 * 23.2 * (x * y * 10); 18 | float z1 = 1 + 2 - 3 + 1000/ 50 * 23.2 * (x * y * 10); 19 | float z2 = 1 + 2 / 3 + 1000/ 50 * 23.2 * (x * y * 10); 20 | float z3 = 1 + 2 * 3 + 1000/ 50 * 23.2 * (x * y * 10); 21 | float z4 = 1 + 2 % 3 + 1000/ 50 * 23.2 * (x * y * 10); 22 | float z5 = 1 + 2 > 3 + 1000/ 50 * 23.2 * (x * y * 10); 23 | float z6 = 1 + 2 == 3 + 1000/ 50 * 23.2 * (x * y * 10); 24 | float z7 = 1 + 2 != 3 + 1000/ 50 * 23.2 * (x * y * 10); 25 | float z8 = 1 + 2 >= 3 + 1000/ 50 * 23.2 * (x * y * 10); 26 | float z9 = 1 + 2 <= 3 + 1000/ 50 * 23.2 * (x * y * 10); 27 | cout << z0 << z1 << z2 << z3 << z4; 28 | cout << z5 << z6 << z7 << z8 << z9; 29 | } 30 | -------------------------------------------------------------------------------- /benchmark/main.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/benchmark.h" 2 | 3 | BENCHMARK_MAIN(); 4 | -------------------------------------------------------------------------------- /benchmark/main/jucc_benchmark.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/benchmark.h" 2 | #include "main/jucc.h" 3 | 4 | // NOLINTNEXTLINE 5 | static void BenchmarkRadd(benchmark::State &state) { 6 | for (const auto &_ : state) { 7 | jucc::Radd(0, 1000000); 8 | } 9 | } 10 | 11 | BENCHMARK(BenchmarkRadd); 12 | 13 | // NOLINTNEXTLINE 14 | static void BenchmarkJuCC(benchmark::State &state) { 15 | std::string file_grammar = "../benchmark/input/benchmark_grammar.g"; 16 | std::string file_input = "../benchmark/input/in1.cc"; 17 | for (const auto &_ : state) { 18 | jucc::grammar::Parser grammar_parser = jucc::grammar::Parser(file_grammar.c_str()); 19 | grammar_parser.Parse(); 20 | jucc::grammar::Productions raw_productions = grammar_parser.GetProductions(); 21 | jucc::grammar::Productions productions = jucc::utils::RemoveAllPossibleAmbiguity(raw_productions); 22 | auto nullables = jucc::utils::CalcNullables(productions); 23 | auto firsts = jucc::utils::CalcFirsts(productions, nullables); 24 | auto follows = jucc::utils::CalcFollows(productions, firsts, nullables, grammar_parser.GetStartSymbol()); 25 | auto terminals = grammar_parser.GetTerminals(); 26 | auto non_terminals = jucc::utils::GetAllNonTerminals(productions); 27 | jucc::parser::ParsingTable parsing_table = jucc::parser::ParsingTable(terminals, non_terminals); 28 | parsing_table.SetFirsts(firsts); 29 | parsing_table.SetFollows(follows); 30 | parsing_table.SetProductions(productions); 31 | parsing_table.BuildTable(); 32 | 33 | std::ifstream ifs(file_input); 34 | std::vector input_tokens; 35 | jucc::lexer::Lexer lexer = jucc::lexer::Lexer(); 36 | int token; 37 | while ((token = lexer.GetToken(ifs)) != jucc::lexer::TOK_EOF) { 38 | std::string ret_string = jucc::lexer::Lexer::GetTokenType(token); 39 | if (ret_string == "ignore") { 40 | continue; 41 | } 42 | input_tokens.emplace_back(ret_string); 43 | } 44 | 45 | jucc::parser::Parser parser = jucc::parser::Parser(); 46 | parser.SetInputString(input_tokens); 47 | parser.SetStartSymbol(grammar_parser.GetStartSymbol()); 48 | parser.SetParsingTable(parsing_table); 49 | while (!parser.IsComplete()) { 50 | parser.ParseNextStep(); 51 | } 52 | parser.BuildParseTree(); 53 | } 54 | } 55 | 56 | BENCHMARK(BenchmarkJuCC); 57 | 58 | /* 59 | * Same as BenchmarkJuCC but without disk operations 60 | */ 61 | // NOLINTNEXTLINE 62 | static void BenchmarkJuCCCompute(benchmark::State &state) { 63 | std::string file_grammar = "../benchmark/input/benchmark_grammar.g"; 64 | std::string file_input = "../benchmark/input/in1.cc"; 65 | 66 | jucc::grammar::Parser grammar_parser = jucc::grammar::Parser(file_grammar.c_str()); 67 | grammar_parser.Parse(); 68 | 69 | std::ifstream ifs(file_input); 70 | std::vector input_tokens; 71 | jucc::lexer::Lexer lexer = jucc::lexer::Lexer(); 72 | int token; 73 | while ((token = lexer.GetToken(ifs)) != jucc::lexer::TOK_EOF) { 74 | std::string ret_string = jucc::lexer::Lexer::GetTokenType(token); 75 | if (ret_string == "ignore") { 76 | continue; 77 | } 78 | input_tokens.emplace_back(ret_string); 79 | } 80 | 81 | for (const auto &_ : state) { 82 | jucc::grammar::Productions raw_productions = grammar_parser.GetProductions(); 83 | jucc::grammar::Productions productions = jucc::utils::RemoveAllPossibleAmbiguity(raw_productions); 84 | auto nullables = jucc::utils::CalcNullables(productions); 85 | auto firsts = jucc::utils::CalcFirsts(productions, nullables); 86 | auto follows = jucc::utils::CalcFollows(productions, firsts, nullables, grammar_parser.GetStartSymbol()); 87 | auto terminals = grammar_parser.GetTerminals(); 88 | auto non_terminals = jucc::utils::GetAllNonTerminals(productions); 89 | jucc::parser::ParsingTable parsing_table = jucc::parser::ParsingTable(terminals, non_terminals); 90 | parsing_table.SetFirsts(firsts); 91 | parsing_table.SetFollows(follows); 92 | parsing_table.SetProductions(productions); 93 | parsing_table.BuildTable(); 94 | 95 | jucc::parser::Parser parser = jucc::parser::Parser(); 96 | parser.SetInputString(input_tokens); 97 | parser.SetStartSymbol(grammar_parser.GetStartSymbol()); 98 | parser.SetParsingTable(parsing_table); 99 | while (!parser.IsComplete()) { 100 | parser.ParseNextStep(); 101 | } 102 | parser.BuildParseTree(); 103 | } 104 | } 105 | 106 | BENCHMARK(BenchmarkJuCCCompute); 107 | -------------------------------------------------------------------------------- /build_support/benchmark_CMakeLists.txt.in: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | project(benchmark-download NONE) 4 | 5 | include(ExternalProject) 6 | ExternalProject_Add(benchmark 7 | GIT_REPOSITORY https://github.com/google/benchmark.git 8 | GIT_TAG master 9 | SOURCE_DIR "${CMAKE_BINARY_DIR}/benchmark-src" 10 | BINARY_DIR "${CMAKE_BINARY_DIR}/benchmark-build" 11 | CONFIGURE_COMMAND "" 12 | BUILD_COMMAND "" 13 | INSTALL_COMMAND "" 14 | TEST_COMMAND "" 15 | ) 16 | -------------------------------------------------------------------------------- /build_support/clang_format_exclusions.txt: -------------------------------------------------------------------------------- 1 | *third_party* 2 | -------------------------------------------------------------------------------- /build_support/gtest_CMakeLists.txt.in: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | project(googletest-download NONE) 4 | 5 | include(ExternalProject) 6 | ExternalProject_Add(googletest 7 | GIT_REPOSITORY https://github.com/google/googletest.git 8 | GIT_TAG master 9 | SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src" 10 | BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build" 11 | CONFIGURE_COMMAND "" 12 | BUILD_COMMAND "" 13 | INSTALL_COMMAND "" 14 | TEST_COMMAND "" 15 | ) 16 | -------------------------------------------------------------------------------- /build_support/nlohmann_json_CMakeLists.txt.in: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | 3 | project(nlohmann-json-download NONE) 4 | 5 | include(ExternalProject) 6 | ExternalProject_Add(nlohmann_json 7 | GIT_REPOSITORY https://github.com/nlohmann/json.git 8 | GIT_TAG develop 9 | SOURCE_DIR "${CMAKE_BINARY_DIR}/nlohmann-json-src" 10 | BINARY_DIR "${CMAKE_BINARY_DIR}/nlohmann-json-build" 11 | CONFIGURE_COMMAND "" 12 | BUILD_COMMAND "" 13 | INSTALL_COMMAND "" 14 | TEST_COMMAND "" 15 | ) 16 | -------------------------------------------------------------------------------- /build_support/run_clang_format.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | # Modified from the Apache Arrow project for the Terrier project. 20 | # 21 | # Modified from the CMU-DB NoisePage project for the BiscuitDB project. 22 | # 23 | # Taken from JUDB Group BiscuitDB project. 24 | 25 | 26 | import argparse 27 | import codecs 28 | import difflib 29 | import fnmatch 30 | import os 31 | import subprocess 32 | import sys 33 | 34 | 35 | def check(arguments, source_dir): 36 | formatted_filenames = [] 37 | error = False 38 | for directory, subdirs, filenames in os.walk(source_dir): 39 | fullpaths = (os.path.join(directory, filename) 40 | for filename in filenames) 41 | source_files = [x for x in fullpaths 42 | if x.endswith(".h") or x.endswith(".cpp")] 43 | formatted_filenames.extend( 44 | # Filter out files that match the globs in the globs file 45 | [filename for filename in source_files 46 | if not any((fnmatch.fnmatch(filename, exclude_glob) 47 | for exclude_glob in exclude_globs))]) 48 | 49 | if arguments.fix: 50 | if not arguments.quiet: 51 | # Print out each file on its own line, but run 52 | # clang format once for all of the files 53 | print("\n".join(map(lambda x: "Formatting {}".format(x), 54 | formatted_filenames))) 55 | subprocess.check_call([arguments.clang_format_binary, 56 | "-i"] + formatted_filenames) 57 | else: 58 | for filename in formatted_filenames: 59 | if not arguments.quiet: 60 | print("Checking {}".format(filename)) 61 | # 62 | # Due to some incompatibilities between Python 2 and 63 | # Python 3, there are some specific actions we take here 64 | # to make sure the difflib.unified_diff call works. 65 | # 66 | # In Python 2, the call to subprocess.check_output return 67 | # a 'str' type. In Python 3, however, the call returns a 68 | # 'bytes' type unless the 'encoding' argument is 69 | # specified. Unfortunately, the 'encoding' argument is not 70 | # in the Python 2 API. We could do an if/else here based 71 | # on the version of Python we are running, but it's more 72 | # straightforward to read the file in binary and do utf-8 73 | # conversion. In Python 2, it's just converting string 74 | # types to unicode types, whereas in Python 3 it's 75 | # converting bytes types to utf-8 encoded str types. This 76 | # approach ensures that the arguments to 77 | # difflib.unified_diff are acceptable string types in both 78 | # Python 2 and Python 3. 79 | with open(filename, "rb") as reader: 80 | # Run clang-format and capture its output 81 | formatted = subprocess.check_output( 82 | [arguments.clang_format_binary, 83 | filename]) 84 | formatted = codecs.decode(formatted, "utf-8") 85 | # Read the original file 86 | original = codecs.decode(reader.read(), "utf-8") 87 | # Run the equivalent of diff -u 88 | diff = list(difflib.unified_diff( 89 | original.splitlines(True), 90 | formatted.splitlines(True), 91 | fromfile=filename, 92 | tofile="{} (after clang format)".format( 93 | filename))) 94 | if diff: 95 | print("{} had clang-format style issues".format(filename)) 96 | # Print out the diff to stderr 97 | error = True 98 | sys.stderr.writelines(diff) 99 | 100 | return error 101 | 102 | 103 | if __name__ == "__main__": 104 | parser = argparse.ArgumentParser( 105 | description="Runs clang format on all of the source " 106 | "files. If --fix is specified, and compares the output " 107 | "with the existing file, outputting a unifiied diff if " 108 | "there are any necessary changes") 109 | parser.add_argument("clang_format_binary", 110 | help="Path to the clang-format binary") 111 | parser.add_argument("exclude_globs", 112 | help="Filename containing globs for files " 113 | "that should be excluded from the checks") 114 | parser.add_argument("--source_dirs", 115 | help="Comma-separated root directories of the code") 116 | parser.add_argument("--fix", default=False, 117 | action="store_true", 118 | help="If specified, will re-format the source " 119 | "code instead of comparing the re-formatted " 120 | "output, defaults to %(default)s") 121 | parser.add_argument("--quiet", default=False, 122 | action="store_true", 123 | help="If specified, only print errors") 124 | 125 | args = parser.parse_args() 126 | 127 | had_err = False 128 | exclude_globs = [line.strip() for line in open(args.exclude_globs)] 129 | for source_dir in args.source_dirs.split(','): 130 | if len(source_dir) > 0: 131 | had_err = had_err or check(args, source_dir) 132 | 133 | sys.exit(1 if had_err else 0) 134 | -------------------------------------------------------------------------------- /build_support/run_clang_tidy_extra.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Taken from JUDB Group BiscuitDB project. 4 | 5 | """ 6 | A helper class, to suppress execution of clang-tidy. 7 | 8 | In clang-tidy-6.0, if the clang-tidy configuration file suppresses ALL checks, 9 | (e.g. via a .clang-tidy file), clang-tidy will print usage information and 10 | exit with a return code of 0. Harmless but verbose. In later versions of 11 | clang-tidy the return code becomes 1, making this a bigger problem. 12 | 13 | This helper addresses the problem by suppressing execution according to 14 | the configuration in this file. 15 | """ 16 | 17 | import re 18 | 19 | class CheckConfig(object): 20 | """ Check paths against the built-in config """ 21 | 22 | def __init__(self): 23 | self._init_config() 24 | # debug prints 25 | self.debug = False 26 | return 27 | 28 | def _init_config(self): 29 | """ Any path matching one of the ignore_pats regular expressions, 30 | denotes that we do NOT want to run clang-tidy on that item. 31 | """ 32 | self.ignore_pats = [".*/third_party/.*", ] 33 | return 34 | 35 | def should_skip(self, path): 36 | """ Should execution of clang-tidy be skipped? 37 | path - to check, against the configuration. 38 | Typically the full path. 39 | returns - False if we want to run clang-tidy 40 | True of we want to skip execution on this item 41 | """ 42 | for pat in self.ignore_pats: 43 | if re.match(pat, path): 44 | if self.debug: 45 | print("match pat: {}, {} => don't run".format(pat, path)) 46 | return True 47 | return False 48 | 49 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: yes 3 | 4 | coverage: 5 | precision: 2 6 | round: down 7 | range: "85...95" # coverage lower than 85 is red, higher than 95 green, between color code no change. 8 | status: 9 | project: 10 | default: 11 | target: auto 12 | threshold: 5.0 # allow for 5% reduction of coverage without failing 13 | 14 | # do not run coverage on patch nor changes. Check overall quality. 15 | patch: off 16 | 17 | parsers: 18 | gcov: 19 | branch_detection: 20 | conditional: yes 21 | loop: yes 22 | method: no 23 | macro: no 24 | 25 | comment: 26 | layout: "reach, diff, flags, files, footer" 27 | behavior: default 28 | require_changes: no 29 | 30 | ignore: 31 | # ignore the third_party directory 32 | - "third_party" 33 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # JuCC Developer Docs 2 | 3 | **Table of Contents** 4 | 5 | - [Getting Started](#getting-started) 6 | - [Development](#development) 7 | 8 | ## Getting Started 9 | 10 | Hi! Welcome to JuCC. 11 | 12 | ### System setup 13 | 14 | 1. **GitHub** We use GitHub for all our development. 15 | - **Account** [Sign up](https://github.com/join) for a GitHub account. 16 | 2. **OS** Make sure you are running [Ubuntu 20.04](https://releases.ubuntu.com/20.04/). If not, the recommended approach is to dual boot or to use a VM. 17 | 3. **IDE** We recommend [Visual Studio Code](https://code.visualstudio.com/download). 18 | 4. **Packages** Install using 19 | - Go to the folder: `cd ~/jucc/script/installation` 20 | - Install all the necessary packages: `sudo bash ./packages.sh` 21 | 22 | ### Further reading 23 | 24 | You should learn a little about the following: 25 | 26 | 1. [git](https://github.com/TheSYNcoder/JuCC/tree/main/docs/tech_git.md) 27 | 2. [C++ and how we use it](https://github.com/TheSYNcoder/JuCC/tree/main/docs/cpp_guidelines.md) 28 | 29 | ## Configuration 30 | 31 | ### CMake flags to know 32 | 33 | - We try our best to list all available options in [CMakeLists.txt](https://github.com/TheSYNcoder/JuCC/blob/main/CMakeLists.txt). Search for `# HEADER CMake options and global variables.` 34 | 35 | ### CMake targets to know 36 | 37 | - You should know these targets. 38 | - `jucc`: Building will build the `jucc` binary and all its dependencies. 39 | - `jucc_benchmark`: Building will build and link the `jucc` object file to the `benchmark` library. Running will run the benchmarks. 40 | - `test`: Building will run all unit tests using `gtest`. This will not show specifics of failed test, run `build/bin/jucc_test` for detailed info. 41 | - `format`: Building will run the formatter `clang-format` on the codebase with our rules. Use this every time right before you commit and right before you make a pull request! 42 | - `check-format`: Building will check if the codebase is correctly formatted according to `clang-format` with our rules. 43 | - `check-clang-tidy`: Building will check if the codebase passes the `clang-tidy` static analyzer tests with our rules. 44 | - `check-lint`: Building will check if the codebase passes the `build-support/cpplint.py` checks. 45 | 46 | If you run into issues, you may need your default `python` to point to a `python3` install. For example, add this to your `~/.zshrc`: `alias python=python3` 47 | 48 | ## Development 49 | 50 | ### Workflow 51 | 52 | 1. Check out the latest version of the JuCC repository. 53 | - `git checkout main` 54 | - `git pull upstream main` 55 | 2. Create a new branch. 56 | - `git checkout -b my_new_branch` 57 | 3. Work on your code. Add features, add documentation, add tests, add remove bugs, and so on. 58 | 4. Push your code. 59 | - Make sure you run tests locally! See below. 60 | - `git push -u origin my_new_branch` 61 | 5. Go to GitHub and open a [new pull request](https://github.com/TheSYNcoder/JuCC/compare). 62 | 6. When a pull request is opened, this triggers our Continuous Integration environment on circle-ci. 63 | - CI will clone the repo, apply your changes, and make sure that formatting, linting, tests, etc pass. 64 | - Code has to pass all the checks for it to be merged! 65 | 66 | ### Running tests locally 67 | 68 | Use `make test` or `ninja test` to run inside your build folder to run all tests. 69 | 70 | For detailed test info: 71 | 1. Go to the folder: `cd ~/JuCC/build` 72 | 2. Generate optimized config with `cmake -GNinja .. -DCMAKE_BUILD_TYPE=Release` 73 | 3. Build project with `ninja` 74 | 4. Run `./bin/jucc_test` 75 | 76 | ### Benchmarks 77 | 78 | 1. Go to the folder: `cd ~/JuCC/build` 79 | 2. Generate optimized config with `cmake -GNinja .. -DCMAKE_BUILD_TYPE=Release` 80 | 3. Build project with `ninja` 81 | 4. Run `./bin/jucc_benchmark [...options]` 82 | -------------------------------------------------------------------------------- /docs/cpp_guidelines.md: -------------------------------------------------------------------------------- 1 | # C++ Guidelines 2 | 3 | **Table of Contents** 4 | 5 | - [Language](#language) 6 | - [C++ Crash Course](#c++-crash-course) 7 | - [C++ Project Structure](#c++-project-structure) 8 | - [C++ Code Style](#c++-code-style) 9 | - [Compiler](#compiler) 10 | - [Debugging](#debugging) 11 | - [Testing](#testing) 12 | - [Documentation](#documentation) 13 | 14 | ## Language 15 | 16 | JuCC is developed in `C++17`. 17 | 18 | ### C++ Crash Course 19 | 20 | C++ provides a lot of leeway in compiler development compared to other high-level languages. For instance, it supports both manual and automated memory management, varied styles of programming, stronger type checking, different kinds of polymorphism etc. 21 | 22 | Here's a list of useful references : 23 | 24 | * [cppreference](http://en.cppreference.com/w/cpp) is an online reference of the powerful `Standard Template Library` (STL). 25 | * [C++ FAQ](https://isocpp.org/faq) covers a lots of topics. 26 | 27 | Here's a list of modern features that you might want to make use of: 28 | 29 | * `auto` type inference 30 | * Range-based `for` loops 31 | * Smart pointers, in particular `unique_ptr`. 32 | * STL data structures, such as `unordered_set`, `unordered_map`, etc. 33 | * Threads, deleted member functions, lambdas, etc. 34 | * `static_assert` and/or `type_traits` such as `std::enable_if`, `std::is_same`. 35 | 36 | ### C++ Project Structure 37 | 38 | #### Directory Structure 39 | 40 | Organize source code files into relevant folders based on their functionality. Separate binary files from source files, and production code from testing code. 41 | 42 | **Code directories** 43 | * `src`: This is where the bulk of the code for JuCC lives. Anything that you expect to be compiled into the release should be here. 44 | * `benchmark`: This is where Google Benchmarks and their utility code reside. `src` should not have dependencies going into `benchmark`. 45 | * `test`: This is where Google Tests and their utility code reside. `src` should not have dependencies going into `test`. `benchmark` may have dependencies going into `test`. 46 | * `third_party`: This is where we add code which was not written by us but which we need to modify. 47 | 48 | **Infrastructure directories** 49 | * `script`: Scripts that support development and testing lives here. (e.g. dependency installation). 50 | * `build-support`: Files that support Continuous Integration CMake targets (lint, format, etc.). 51 | 52 | ##### src 53 | There can be at most 2-levels of directories under `src`, the first level will be general system components (e.g. storage, execution, network, sql, common), and the second level will be either for a class of similar files, or for a self-contained sub-component. 54 | 55 | Translated into coding guidelines, you should rarely need to create a new first-level subdirectory, and should probably ask on Slack if you believe you do. To create a new secondary directory, make sure you meet the following criteria: 56 | * There are more than 2 (exclusive) files you need to put into this folder 57 | * Each file is stand-alone, i.e. either the contents don't make sense living in a single file, or that putting them in a single file makes the file large and difficult to navigate. (This is open to interpretation, but if, for example, you have 3 files containing 10-line class definitions, maybe they should not be spread out that much). 58 | 59 | And one of the two: 60 | * The subdirectory is a self-contained sub-component. This probably means that the folder only has one outward facing API. A good rule of thumb is when outside code files only need to include one header from this folder, where said API is defined. 61 | * The subdirectory contains a logical grouping of files, and there are enough of them that leaving them ungrouped makes the upper level hard to navigate. (e.g. all the plans, all the common data structures, etc.) 62 | 63 | A good rule of thumb is if you have subdirectory `As`, you should be able to say with a straight face that everything under `As` is an A. (e.g. Everything under `containers` is a container) 64 | 65 | Every class and/or function under these directories should be in namespaces. All code will be under namespace `JuCC`, and namespace the same as their first-level directory name (e.g `common`, `storage`). Secondary sub-directories do not have associated namespaces. 66 | 67 | ##### test 68 | The directory structure of the `test` folder should generally reflect the directory structure of the `src` folder, ignoring the `include`. Each test should be under the same path as the file they test, and named "XXX_test". 69 | 70 | Generally, there can be no code sharing between tests since they are different build targets. There are cases, however, where it makes sense for tests to share some common utility function. In that case, you can write a utility file under `test/util`. 71 | 72 | The `test/util` folder should have no sub-directories. In most cases, one test util file for every directory under `src` should suffice. (e.g. `test/util/storage_test_util.h`) Sometimes it will make sense to have a `util` file for stand-alone modules (e.g. `test/include/util/random_test_util.h`). 73 | 74 | ### C++ Code Style 75 | 76 | See [here](https://github.com/TheSYNcoder/JuCC/tree/main/docs/cpp_guidelines_code_style.md). 77 | 78 | ## Compiler 79 | 80 | We support GCC and LLVM Clang. **We do NOT support AppleClang** aka whatever compiler comes on macOS by default. 81 | 82 | How is the compiler actually invoked? 83 | 84 | 1. CMake is a *build system generator* that is commonly used for C++ development. 85 | - CMake does not compile your program. 86 | - Running `cmake ` generates a system that will compile your program. 87 | 2. CMake uses either [make](https://en.wikipedia.org/wiki/Make_(software)) (the default) or [ninja](https://ninja-build.org/) (requested by passing `-GNinja` as an argument to `cmake`). 88 | - We strongly encourage using `ninja`, which is faster and can intelligently build in parallel by default. 89 | 90 | For example, to manually compile JuCC, this is what you would do: 91 | 92 | 1. Clone the JuCC repo: `git clone https://github.com/TheSYNcoder/JuCC.git` 93 | 2. Create a build folder to build everything in: `mkdir build` 94 | 3. Go to the build folder: `cd build` 95 | 4. Generate a build system with CMake, passing in whatever arguments are desired: `cmake -GNinja -DCMAKE_BUILD_TYPE=Release` 96 | 5. Invoke the build system: `ninja jucc`, more generally `ninja ` for any valid target. 97 | 98 | We have configured the build system so that it will produce a `compile_commands.json` file. This contains the exact compiler invocations that will be used. You can check this file if you're curious. This file is also used by tools like `clang-tidy` to check for correctness. If you are not sure what a compiler flag does, look it up on Google or on the `man` page for `gcc` or `clang`. 99 | 100 | ## Debugging 101 | 102 | You should use a debugger to find any bugs where possible. 103 | 104 | If you need to do complex debugging, you may want to check out the following links: 105 | 106 | - [gdb](https://www.gnu.org/software/gdb/): General GDB documentation. 107 | - [lldb](https://lldb.llvm.org/): General LLDB documentation. A competitor to GDB. 108 | - [rr](https://rr-project.org/): A reversible debugger that lets you go backwards in time. Very powerful, but requires some level of hardware support. 109 | 110 | ## Testing 111 | 112 | Unit tests are critical for ensuring the correct functionality of your modules and reduce time spent on debugging. It can help prevent regressions. We use [googletest](https://github.com/google/googletest), a nice unit-testing framework for C++ projects. 113 | 114 | You should write unit test cases for each class/algorithm that you have added or modified. See the testing section for detail. Try to come up with test cases that make sure that the module exhibits the desired behavior. Some developers even suggest writing the unit tests before implementing the code. Make sure that you include corner cases, and try to find off-by-one errors. 115 | 116 | ## Documentation 117 | 118 | See [here](https://github.com/TheSYNcoder/JuCC/tree/main/docs/cpp_guidelines_code_style.md). 119 | -------------------------------------------------------------------------------- /docs/cpp_guidelines_code_style.md: -------------------------------------------------------------------------------- 1 | # C++ Guidelines: Code Style 2 | 3 | ## Comments, Formatting, and Libraries 4 | 5 | Please **comment** your code. Comment all the class definitions, non-trivial member functions and variables, and all the steps in your algorithms. 6 | We generally follow the [Google C++ style guide](https://google.github.io/styleguide/cppguide.html). As they mention in that guide, these rules exist to keep the code base manageable while still allowing coders to use C++ language features productively. 7 | Make sure that you follow the [naming rules](https://google.github.io/styleguide/cppguide.html#General_Naming_Rules). For instance, use `class UpperCaseCamelCase` for type names, `int lower_case_with_underscores` for variable/method/function names. 8 | 9 | Please refrain from using any libraries other than the `STL` (and `googletest` for unit testing) without contacting us. 10 | 11 | ## Code Organization and Best Practice 12 | We strive to write modern C++17 code, but C++ is a language with a lot of legacy features from the 80s. Certain guidelines must be followed to make the use of language features tasteful and elegant. 13 | 14 | ### `.h` and `.cpp` files 15 | 16 | Surprisingly, there is no universal standards on what to call c++ code files. In this project, we will use `.h` for headers, inside the various `/include` directories, and `.cpp` for the implementation. 17 | 18 | When possible, implementation should be separated between `.h` and `.cpp` files, as this will make the compilation process much faster and hassle-free. Documentation should be written in the `.h` files. There are a couple of exceptions to this rule: 19 | - One-liners or otherwise boilerplate code that is unlikely to change. What constitutes a one-liner is somewhat ambiguous and can be subjective. 20 | - Templates. The c++ compiler generates instantiations of actual code based on the template given, and may need this information in the compilation units themselves, thus requiring all definition to be present in the included header. There are two solutions to this: either write all of the template code in the header or explicitly instantiate templates. Because doing the latter is painful, we generally will write those definitions in-place. 21 | 22 | ### Forward Declarations 23 | 24 | When referring to some object only by reference, object or some template arguments, it is not necessary that the code knows its structure. As a result, we do not necessarily need to provide its complete declaration with members and methods (i.e. #include), but can get away with a hint that such a class exist. Example is given below: 25 | 26 | ```c++ 27 | class Foo; 28 | ... 29 | void DoSomething(Foo *foo); // compiles 30 | void DoSomethingElse(Foo *foo) { 31 | foo->bar(); // error, member access into opaque type 32 | } 33 | ... 34 | ``` 35 | Doing this saves re-compilation time. As a rule of thumb, forward declare when possible in the header, but always include the actual headers in the `.cpp` file. 36 | 37 | ### Concurrent Data Structures 38 | 39 | There are many implementations of concurrent data structures online, of differing quality. Before you bring in any external implementation, ask yourself: 40 | - Do I need a concurrent data structure? 41 | - Is this data structure implementation the right choice for my use case and workload? 42 | - Will someone in the future want to swap this out for a different implementation? 43 | 44 | Concurrent data structures, especially lock-free ones, are not magic. They perform well only in the environment they are designed for. Our advice is to always start simple, and ensure correctness with latches and other simple mechanisms. Performance gain needs to be measured and verified on representative benchmarks, taken against multiple alternatives. 45 | 46 | Finally, always prefer a wrapper whose underlying implementation can be swapped out with minimal effort to direct invocation of third-party code, when it comes to data structures. 47 | -------------------------------------------------------------------------------- /docs/tech_clangtools.md: -------------------------------------------------------------------------------- 1 | # Clang Tools 2 | 3 | ## clang-tidy 4 | 5 | Important clang-tidy points are emphasized here. Read this if you're planning on changing how we use clang-tidy. 6 | 7 | The [official documentation](https://clang.llvm.org/extra/clang-tidy/index.html) and [source code](https://clang.llvm.org/extra/doxygen/dir_83d3dc8f7afce718e8cda93164271fb8.html) are your best bet. 8 | 9 | **Overview** 10 | 11 | 1. clang-tidy is a static analyzer and linter. It checks for common code smells. 12 | 2. When you run clang-tidy, it searches parent directories for a .clang-tidy file. 13 | - `clang-tidy -dump-config` will print out the current configuration. 14 | 3. clang-tidy reads the output of compile_commands.json, which is generated when you run cmake. 15 | 1. compile_commands.json only contains .cpp files. 16 | 2. Therefore, clang-tidy can only process C++ (.cpp) files. It cannot process header (.h) files by themselves. 17 | 3. clang-tidy will however process .h files which are included in a .cpp file. 18 | 4. You might not want warnings from all header files, e.g. only src/, not third_party/. 19 | 5. clang-tidy only supports whitelisting headers via a single regex string. 20 | - HeaderFilterRegex: 'REGEX' in .clang-tidy 21 | - -header-filter=REGEX on the command line, **note that this overrides your .clang-tidy setting**. 22 | - The (undocumented) format of the regex is POSIX ERE. See the implementation of [HeaderFilterRegex](https://clang.llvm.org/extra/doxygen/ClangTidyDiagnosticConsumer_8cpp_source.html#l00533) and [llvm::Regex](http://llvm.org/doxygen/Regex_8h_source.html#l00040). 23 | 4. clang-tidy separates the notion of Warning, Error, Compilation Error. 24 | - clang-tidy will only display warnings for enabled [checks](https://clang.llvm.org/extra/clang-tidy/checks/list.html). 25 | - clang-tidy will convert all warnings that match the WarningsAsErrors regex to errors. 26 | - **WarningsAsErrors does not enable any checks on its own**. 27 | - The list of checks are parsed as regex. **If you make a typo, it silently ignores that check**. 28 | - Run `clang-tidy -list-checks` to confirm which checks are enabled. 29 | 5. You can get clang-tidy to leave certain lines alone. 30 | - To ignore the same line, put `// NOLINT` at the end 31 | - To ignore the next line, put `// NOLINTNEXTLINE` on the immediate preceding line 32 | - Wherever possible, avoid using line-filter. Nobody wants to maintain line numbers when code gets added/deleted. 33 | - A `clang-diagnostic-error` may mean a compilation problem. No amount of NOLINT or disabling checks will shut that up. If you're pulling in third-party header dependencies, make sure they're a dependency for our check-clang-tidy make target too. 34 | 35 | **Gotchas** 36 | 37 | 1. Running clang-tidy on a list of files will run clang-tidy on each file sequentially. 38 | 2. If you include a header file which needs fixing in multiple .cpp files, clang-tidy will repeat the fix multiple times. An example which was included in three files: 39 | ``` 40 | Original code : if (last_errno_) 41 | "Fixed" code : if (last_errno_ != 0 != 0 != 0) 42 | ``` 43 | 44 | For both of the above problems, LLVM recommends using their [run-clang-tidy.py](https://github.com/llvm-mirror/clang-tools-extra/blob/master/clang-tidy/tool/run-clang-tidy.py) script. It will gather all the code fixes and apply them at once to prevent this issue. It also supports parallelism. 45 | 46 | However, if you use LLVM's default run-clang-tidy.py, caveat emptor: 47 | 48 | - It doesn't work on [Python 3](https://github.com/llvm-mirror/clang-tools-extra/blob/master/clang-tidy/tool/run-clang-tidy.py#L166). 49 | - It will override .clang-tidy's [HeaderFilterRegex](https://github.com/llvm-mirror/clang-tools-extra/blob/master/clang-tidy/tool/run-clang-tidy.py#L86). 50 | - From a Pythonic/PyLint point of view, the code quality isn't great. 51 | - That said, it is actively being developed and the above criticisms may no longer be valid. 52 | 53 | We currently use a modified version of run-clang-tidy.py. 54 | 55 | ## clang-format 56 | 57 | This section discusses clang-format and how we use it. We use the Google C++ style guide. 58 | 59 | Check the [official documentation](https://clang.llvm.org/docs/ClangFormat.html). 60 | 61 | **Overview** 62 | 63 | 1. clang-format is a code formatter and format checker. 64 | 2. When you run clang-format, it searches parent directories for a .clang-format file. 65 | 3. USAGE: `clang-format [options] [ ...]` 66 | 67 | **Gotchas** 68 | 69 | 1. We use the [run_clang_format.py](https://github.com/TheSYNcoder/JuCC/blob/main/build_support/run_clang_format.py) to automate the workflow. 70 | 71 | ## Summary 72 | 73 | Before creating a pull request make sure these tests pass. 74 | 75 | - `ninja check-format` 76 | - `ninja check-clang-tidy` 77 | - `ninja check-lint` 78 | -------------------------------------------------------------------------------- /docs/tech_docker.md: -------------------------------------------------------------------------------- 1 | # Docker 2 | 3 | ## Installation 4 | 5 | ### Getting Docker 6 | #### Mac (Homebrew) 7 | 8 | 1. `brew cask install docker` 9 | 2. Launch /Applications/Docker.app. 10 | 11 | #### Other (older Ubuntu versions) 12 | 13 | See [Install Docker CE](https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository). The recommended approach is to install using the repository. See section "Install Docker CE", subsection "Install using the repository" and follow the instructions for: 14 | 15 | * SET UP THE REPOSITORY followed by 16 | * INSTALL DOCKER CE 17 | 18 | See [Docker CE](https://www.docker.com/community-edition) if additional information is required. 19 | 20 | ### Setup 21 | 22 | 1. Launch Docker. 23 | 2. From the folder containing the Dockerfile, build the Docker image. 24 | - `docker build -t jucc .` 25 | - docker will load your local repo into a `/jucc` directory in the image 26 | 27 | ## Usage 28 | 29 | 1. Run the Docker image: `docker run -itd --name build jucc` 30 | 2. Run CMake: 31 | - `docker exec build cmake ..` 32 | - `docker exec build make` 33 | 34 | You can interact with the Docker image with 35 | - single commands: `docker exec [-w WORKING_DIRECTORY] build BASH_COMMAND` 36 | - interactive: `docker exec -it build bash` 37 | - by default, the docker image starts in the `/jucc/build` directory 38 | 39 | 40 | **Note: The below step DELETES all the work you have on Docker.** 41 | 42 | To stop the Docker image, run both: 43 | 1. `docker container stop build` 44 | 2. `docker rm build` 45 | 46 | 47 | ## Quirks 48 | 49 | Docker on Windows and Docker on Mac do not behave nicely with LSAN. LSAN needs to be allowed to spawn a ptrace thread. You'll need to `docker run --cap-add SYS_PTRACE ...` to get it working. 50 | -------------------------------------------------------------------------------- /docs/tech_git.md: -------------------------------------------------------------------------------- 1 | # Git 2 | 3 | There are lots of guides and documents on the internet, but there are too many and many are confusing. Here is a mini guide to use git with a minimal number of commands and parameters. You won't find any details or explanation of Git's internal mechanisms here. 4 | 5 | ### Remote Transfer or how to communicate with the world 6 | * Get a fresh repository: git clone `` 7 | * Update current repository to latest: git fetch -v 8 | * Update current repository with commit from a fork: git fetch -v `` `` 9 | * Send your new commit to the remote: git push `` `` 10 | 11 | ### Commit or how to communicate with your local repository 12 | * stage your change with dynamic selection: git add/rm -p `` 13 | * commit your change: git commit 14 | * uncommit previous commit: git reset --soft HEAD~1 15 | * unstage your change: git reset HEAD -- 16 | * discard your change **forever** with dynamic selection: git checkout -p -- `` 17 | 18 | ### Stash or how to save your precious work 19 | Stash is very useful. For example, you will use it before/after (push/pop) merge/rebase action 20 | * Push pending update on the stack: git stash 21 | * Get back your update: git stash pop 22 | * view content of your stash: git stash show -p `stash@\{0\}` 23 | 24 | ### Rebase or how to screw the history 25 | **Never** rebase commits that were pushed remotely. Rebase can be used to improve your current patch set, or to fast-forward-merge after a fetch. For better software engineering we **never** directly merge the upstream main when doing local development. When accepting a PR, we expect you to fetch the upstream main to your local repository, and rebase all the changes in your local branch on top of the upstream main. 26 | * The rebase command: git rebase -i `` 27 | * Cancel it : git rebase --abort 28 | * Resolve conflict: git mergetool `` 29 | * Continue rebase: git rebase --continue 30 | 31 | ### Branch or how to separate your work by feature 32 | Please note that main is actually the default branch 33 | * List branches: git branch -v 34 | * Switch to another branch: git checkout `` 35 | * Creates: git branch `` 36 | * Delete branches: git branch -d `` 37 | * Set the base reference of the branch (for rebase): git branch --set-upstream-to=`` `` 38 | 39 | # Git use case example 40 | 41 | ### Branch management 42 | Let's say you want to rebase your current branch topic-v1 to topic-v2 with new additions. Note: topic-v1 could also be main too. 43 | * Go to current branch: git checkout topic-v1 44 | * Create a new one: git branch topic-v2 45 | * Go into the new branch: git checkout topic-v2 46 | * Set the reference: git branch --set-upstream-to=origin/main topic-v2 47 | * Rebase: git rebase -i 48 | * ... 49 | 50 | ### Split commit 51 | * Copy your repository if you're not confident with this kind of operation: cp -a `` `` 52 | * Do a rebase: git rebase -i 53 | * Use edit on the commit that you want to split 54 | ... rebase on-going... 55 | * Uncommit: git reset --soft HEAD~1 56 | * Unstage: git reset HEAD -- 57 | 58 | At this stage of operation, you get all your changes in the local files, but nothing is ready to be committed. 59 | 60 | Repeat the 2 next commands for each new commits that you want to create 61 | * Stage your change with dynamic selection: git add/rm -p `` 62 | * Commit your change: git commit 63 | 64 | Once you have finished to split your commit: 65 | * Finish the rebase: git rebase --continue 66 | -------------------------------------------------------------------------------- /grammar.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % , 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | main cin cout 12 | % 13 | 14 | ## Non Terminals 15 | %non_terminals 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | % 25 | 26 | ## Start Symbol 27 | %start 28 | 29 | % 30 | 31 | ## Grammar for the language 32 | %rules 33 | ## Expressions 34 | : identifier 35 | : 36 | : ( ) 37 | : integer_constant 38 | : float_constant 39 | : + 40 | : - 41 | : 42 | : 43 | : 44 | : * 45 | : / 46 | : % 47 | : 48 | : + 49 | : - 50 | : 51 | : cin >> 52 | : cout << 53 | : << 54 | : >> 55 | : 56 | : < 57 | : > 58 | : <= 59 | : >= 60 | : 61 | : == 62 | : != 63 | : 64 | : 65 | 66 | ## Declarations 67 | : ; 68 | : 69 | : , 70 | : EPSILON 71 | : 72 | : = 73 | : void 74 | : int 75 | : float 76 | : 77 | : identifier 78 | : ( ) 79 | : 80 | 81 | ## Statements 82 | : 83 | : 84 | : 85 | : { } 86 | : 87 | : 88 | : EPSILON 89 | : 90 | : 91 | : ; 92 | : ; 93 | : if ( ) 94 | : if ( ) else 95 | 96 | ## Main 97 | : main ( ) 98 | % 99 | -------------------------------------------------------------------------------- /script/installation/packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## ================================================================= 4 | ## JUCC PACKAGE INSTALLATION 5 | ## 6 | ## This script will install all the packages that are needed to 7 | ## build and run the compiler. 8 | ## 9 | ## Supported environments: 10 | ## * Ubuntu 20.04 11 | ## * macOS Big Sur 12 | ## ================================================================= 13 | 14 | LINUX_BUILD_PACKAGES=(\ 15 | "build-essential" \ 16 | "clang-11" \ 17 | "clang-format-11" \ 18 | "clang-tidy-11" \ 19 | "cmake" \ 20 | "git" \ 21 | "llvm-11" \ 22 | "pkg-config" \ 23 | "python3-pip" \ 24 | "python-is-python3" \ 25 | "ninja-build" \ 26 | "wget" \ 27 | "time" \ 28 | ) 29 | DARWIN_BUILD_PACKAGES=(\ 30 | "cmake" \ 31 | "git" \ 32 | "pkg-config" \ 33 | "ninja-build" 34 | "wget" \ 35 | ) 36 | 37 | LINUX_TEST_PACKAGES=(\ 38 | "curl" \ 39 | "lsof" \ 40 | ) 41 | 42 | # Packages to be installed through pip3. 43 | PYTHON_BUILD_PACKAGES=(\ 44 | "cpplint" \ 45 | ) 46 | 47 | PYTHON_TEST_PACKAGES=( 48 | ) 49 | 50 | 51 | ## ================================================================= 52 | 53 | 54 | main() { 55 | set -o errexit 56 | 57 | INSTALL_TYPE="$1" 58 | if [ -z "$INSTALL_TYPE" ]; then 59 | INSTALL_TYPE="build" 60 | fi 61 | ALLOWED=("build" "test" "all") 62 | FOUND=0 63 | for key in "${ALLOWED[@]}"; do 64 | if [ "$key" == "$INSTALL_TYPE" ] ; then 65 | FOUND=1 66 | fi 67 | done 68 | if [ "$FOUND" = "0" ]; then 69 | echo "Invalid installation type '$INSTALL_TYPE'" 70 | echo -n "Allowed Values: " 71 | ( IFS=$' '; echo "${ALLOWED[*]}" ) 72 | exit 1 73 | fi 74 | 75 | echo "PACKAGES WILL BE INSTALLED. THIS MAY BREAK YOUR EXISTING TOOLCHAIN." 76 | echo "YOU ACCEPT ALL RESPONSIBILITY BY PROCEEDING." 77 | echo 78 | echo "INSTALLATION TYPE: $INSTALL_TYPE" 79 | read -p "Proceed? [Y/n] : " yn 80 | case $yn in 81 | Y|y) install;; 82 | *) ;; 83 | esac 84 | 85 | echo "Script complete." 86 | } 87 | 88 | give_up() { 89 | set +x 90 | OS=$1 91 | VERSION=$2 92 | [ ! -z "$VERSION" ] && VERSION=" $VERSION" 93 | 94 | echo 95 | echo "Unsupported distribution '${OS}${VERSION}'" 96 | echo "Please contact our support team for additional help." 97 | echo "Be sure to include the contents of this message." 98 | echo "Platform: $(uname -a)" 99 | echo 100 | echo "https://github.com/TheSYNcoder/JuCC/issues" 101 | echo 102 | exit 1 103 | } 104 | 105 | install() { 106 | set -x 107 | UNAME=$(uname | tr "[:lower:]" "[:upper:]" ) 108 | VERSION="" 109 | 110 | case $UNAME in 111 | LINUX) 112 | DISTRO=$(cat /etc/os-release | grep '^ID=' | cut -d '=' -f 2 | tr "[:lower:]" "[:upper:]" | tr -d '"') 113 | VERSION=$(cat /etc/os-release | grep '^VERSION_ID=' | cut -d '"' -f 2) 114 | 115 | # We only support Ubuntu right now 116 | [ "$DISTRO" != "UBUNTU" ] && give_up $DISTRO $VERSION 117 | 118 | # Check Ubuntu version 119 | case $VERSION in 120 | 20.*) install_linux ;; 121 | *) give_up $DISTRO $VERSION;; 122 | esac 123 | ;; 124 | DARWIN) install_darwin ;; 125 | 126 | 127 | *) give_up $UNAME $VERSION;; 128 | esac 129 | } 130 | 131 | install_pip() { 132 | curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py 133 | python get-pip.py 134 | rm get-pip.py 135 | } 136 | 137 | install_linux() { 138 | # Update apt-get. 139 | apt-get -y update 140 | 141 | # Install packages. Note that word splitting is desired behavior. 142 | if [ "$INSTALL_TYPE" == "build" ] || [ "$INSTALL_TYPE" = "all" ]; then 143 | apt-get -y install $( IFS=$' '; echo "${LINUX_BUILD_PACKAGES[*]}" ) 144 | fi 145 | if [ "$INSTALL_TYPE" == "test" ] || [ "$INSTALL_TYPE" = "all" ]; then 146 | apt-get -y install $( IFS=$' '; echo "${LINUX_TEST_PACKAGES[*]}" ) 147 | fi 148 | 149 | if [ "$INSTALL_TYPE" == "build" ] || [ "$INSTALL_TYPE" = "all" ]; then 150 | for pkg in "${PYTHON_BUILD_PACKAGES[@]}"; do 151 | if [ "$pkg" == "cpplint" ]; then 152 | sudo python3 -m pip show $pkg || sudo python3 -m pip install $pkg 153 | else 154 | python3 -m pip show $pkg || python3 -m pip install $pkg 155 | fi 156 | done 157 | fi 158 | if [ "$INSTALL_TYPE" == "test" ] || [ "$INSTALL_TYPE" = "all" ]; then 159 | for pkg in "${PYTHON_TEST_PACKAGES[@]}"; do 160 | if [ "$pkg" == "cpplint" ]; then 161 | sudo python3 -m pip show $pkg || sudo python3 -m pip install $pkg 162 | else 163 | python3 -m pip show $pkg || python3 -m pip install $pkg 164 | fi 165 | done 166 | fi 167 | } 168 | 169 | install_darwin(){ 170 | echo "Starting install on darwin, this may take some time ..." 171 | # Check for Homebrew, install if we don't have it 172 | if test ! $(which brew); then 173 | echo "Installing homebrew..." 174 | ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" 175 | fi 176 | 177 | if test ! $(which python3); then 178 | echo "Installing pip..." 179 | brew install python 180 | fi 181 | 182 | if test ! $(which pip); then 183 | echo "Installing pip..." 184 | curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py 185 | python3 get-pip.py 186 | rm get-pip.py 187 | fi 188 | 189 | # Update homebrew recipes 190 | brew update 191 | 192 | # Install GNU core utilities (those that come with OS X are outdated) 193 | brew tap homebrew/dupes 194 | brew install coreutils 195 | 196 | if [ "$INSTALL_TYPE" == "build" ] || [ "$INSTALL_TYPE" = "all" ]; then 197 | brew install llvm 198 | ln -s "$(brew --prefix llvm)/bin/clang-format" "/usr/local/bin/clang-format" 199 | ln -s "$(brew --prefix llvm)/bin/clang-tidy" "/usr/local/bin/clang-tidy" 200 | ln -s "$(brew --prefix llvm)/bin/clang-apply-replacements" "/usr/local/bin/clang-apply-replacements" 201 | brew install $( IFS=$' '; echo "${DARWIN_BUILD_PACKAGES[*]}" ) 202 | fi 203 | if [ "$INSTALL_TYPE" == "test" ] || [ "$INSTALL_TYPE" = "all" ]; then 204 | brew install $( IFS=$' '; echo "${LINUX_TEST_PACKAGES[*]}" ) 205 | fi 206 | 207 | if [ "$INSTALL_TYPE" == "build" ] || [ "$INSTALL_TYPE" = "all" ]; then 208 | for pkg in "${PYTHON_BUILD_PACKAGES[@]}"; do 209 | if [ "$pkg" == "cpplint" ]; then 210 | sudo python3 -m pip show $pkg || sudo python3 -m pip install $pkg 211 | else 212 | python3 -m pip show $pkg || python3 -m pip install $pkg 213 | fi 214 | done 215 | fi 216 | if [ "$INSTALL_TYPE" == "test" ] || [ "$INSTALL_TYPE" = "all" ]; then 217 | for pkg in "${PYTHON_TEST_PACKAGES[@]}"; do 218 | if [ "$pkg" == "cpplint" ]; then 219 | sudo python3 -m pip show $pkg || sudo python3 -m pip install $pkg 220 | else 221 | python3 -m pip show $pkg || python3 -m pip install $pkg 222 | fi 223 | done 224 | fi 225 | } 226 | 227 | main "$@" 228 | -------------------------------------------------------------------------------- /server/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "build", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "parse.js", 6 | "directories": { 7 | "lib": "lib" 8 | }, 9 | "scripts": { 10 | "start": "node server.js", 11 | "test": "echo \"Error: no test specified\" && exit 1" 12 | }, 13 | "keywords": [], 14 | "author": "", 15 | "license": "ISC", 16 | "dependencies": { 17 | "ejs": "^3.1.6", 18 | "express": "^4.17.1" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /server/public/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) Copyright © 2016 Fran Peručić 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /server/public/README.md: -------------------------------------------------------------------------------- 1 |
 2 |   _______                   _          _     
 3 |  |__   __|                 | |        (_)    
 4 |     | |_ __ ___  __ _ _ __ | |_ ______ _ ___ 
 5 |     | | '__/ _ \/ _` | '_ \| __|______| / __|
 6 |     | | | |  __/ (_| | | | | |_       | \__ \
 7 |     |_|_|  \___|\__,_|_| |_|\__|      | |___/
 8 |                                      _/ |    
 9 |                                     |__/     
10 | 
11 | 12 | Treant-js is an SVG based JS library for drawing tree diagrams. 13 | It relies on Raphael for handling SVG and animations. 14 | 15 | For Docs, Examples, and everything else see: 16 | http://fperucic.github.io/treant-js 17 | -------------------------------------------------------------------------------- /server/public/Treant.css: -------------------------------------------------------------------------------- 1 | /* required LIB STYLES */ 2 | /* .Treant se automatski dodaje na svaki chart conatiner */ 3 | .Treant { position: relative; overflow: hidden; padding: 0 !important; } 4 | .Treant > .node, 5 | .Treant > .pseudo { position: absolute; display: block; visibility: hidden; } 6 | .Treant.Treant-loaded .node, 7 | .Treant.Treant-loaded .pseudo { visibility: visible; } 8 | .Treant > .pseudo { width: 0; height: 0; border: none; padding: 0; } 9 | .Treant .collapse-switch { width: 3px; height: 3px; display: block; border: 1px solid black; position: absolute; top: 1px; right: 1px; cursor: pointer; } 10 | .Treant .collapsed .collapse-switch { background-color: #868DEE; } 11 | .Treant > .node img { border: none; float: left; } 12 | -------------------------------------------------------------------------------- /server/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheSYNcoder/JuCC/4765f9dd93349de93c09d53594721af0fea0acc0/server/public/favicon.ico -------------------------------------------------------------------------------- /server/public/jquery.easing.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery Easing v1.3 - http://gsgd.co.uk/sandbox/jquery/easing/ 3 | * 4 | * Uses the built in easing capabilities added In jQuery 1.1 5 | * to offer multiple easing options 6 | * 7 | * TERMS OF USE - jQuery Easing 8 | * 9 | * Open source under the BSD License. 10 | * 11 | * Copyright © 2008 George McGinley Smith 12 | * All rights reserved. 13 | * 14 | * Redistribution and use in source and binary forms, with or without modification, 15 | * are permitted provided that the following conditions are met: 16 | * 17 | * Redistributions of source code must retain the above copyright notice, this list of 18 | * conditions and the following disclaimer. 19 | * Redistributions in binary form must reproduce the above copyright notice, this list 20 | * of conditions and the following disclaimer in the documentation and/or other materials 21 | * provided with the distribution. 22 | * 23 | * Neither the name of the author nor the names of contributors may be used to endorse 24 | * or promote products derived from this software without specific prior written permission. 25 | * 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 27 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 28 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 30 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 31 | * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 33 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 34 | * OF THE POSSIBILITY OF SUCH DAMAGE. 35 | * 36 | */ 37 | 38 | // t: current time, b: begInnIng value, c: change In value, d: duration 39 | jQuery.easing['jswing'] = jQuery.easing['swing']; 40 | 41 | jQuery.extend( jQuery.easing, 42 | { 43 | def: 'easeOutQuad', 44 | swing: function (x, t, b, c, d) { 45 | //alert(jQuery.easing.default); 46 | return jQuery.easing[jQuery.easing.def](x, t, b, c, d); 47 | }, 48 | easeInQuad: function (x, t, b, c, d) { 49 | return c*(t/=d)*t + b; 50 | }, 51 | easeOutQuad: function (x, t, b, c, d) { 52 | return -c *(t/=d)*(t-2) + b; 53 | }, 54 | easeInOutQuad: function (x, t, b, c, d) { 55 | if ((t/=d/2) < 1) return c/2*t*t + b; 56 | return -c/2 * ((--t)*(t-2) - 1) + b; 57 | }, 58 | easeInCubic: function (x, t, b, c, d) { 59 | return c*(t/=d)*t*t + b; 60 | }, 61 | easeOutCubic: function (x, t, b, c, d) { 62 | return c*((t=t/d-1)*t*t + 1) + b; 63 | }, 64 | easeInOutCubic: function (x, t, b, c, d) { 65 | if ((t/=d/2) < 1) return c/2*t*t*t + b; 66 | return c/2*((t-=2)*t*t + 2) + b; 67 | }, 68 | easeInQuart: function (x, t, b, c, d) { 69 | return c*(t/=d)*t*t*t + b; 70 | }, 71 | easeOutQuart: function (x, t, b, c, d) { 72 | return -c * ((t=t/d-1)*t*t*t - 1) + b; 73 | }, 74 | easeInOutQuart: function (x, t, b, c, d) { 75 | if ((t/=d/2) < 1) return c/2*t*t*t*t + b; 76 | return -c/2 * ((t-=2)*t*t*t - 2) + b; 77 | }, 78 | easeInQuint: function (x, t, b, c, d) { 79 | return c*(t/=d)*t*t*t*t + b; 80 | }, 81 | easeOutQuint: function (x, t, b, c, d) { 82 | return c*((t=t/d-1)*t*t*t*t + 1) + b; 83 | }, 84 | easeInOutQuint: function (x, t, b, c, d) { 85 | if ((t/=d/2) < 1) return c/2*t*t*t*t*t + b; 86 | return c/2*((t-=2)*t*t*t*t + 2) + b; 87 | }, 88 | easeInSine: function (x, t, b, c, d) { 89 | return -c * Math.cos(t/d * (Math.PI/2)) + c + b; 90 | }, 91 | easeOutSine: function (x, t, b, c, d) { 92 | return c * Math.sin(t/d * (Math.PI/2)) + b; 93 | }, 94 | easeInOutSine: function (x, t, b, c, d) { 95 | return -c/2 * (Math.cos(Math.PI*t/d) - 1) + b; 96 | }, 97 | easeInExpo: function (x, t, b, c, d) { 98 | return (t==0) ? b : c * Math.pow(2, 10 * (t/d - 1)) + b; 99 | }, 100 | easeOutExpo: function (x, t, b, c, d) { 101 | return (t==d) ? b+c : c * (-Math.pow(2, -10 * t/d) + 1) + b; 102 | }, 103 | easeInOutExpo: function (x, t, b, c, d) { 104 | if (t==0) return b; 105 | if (t==d) return b+c; 106 | if ((t/=d/2) < 1) return c/2 * Math.pow(2, 10 * (t - 1)) + b; 107 | return c/2 * (-Math.pow(2, -10 * --t) + 2) + b; 108 | }, 109 | easeInCirc: function (x, t, b, c, d) { 110 | return -c * (Math.sqrt(1 - (t/=d)*t) - 1) + b; 111 | }, 112 | easeOutCirc: function (x, t, b, c, d) { 113 | return c * Math.sqrt(1 - (t=t/d-1)*t) + b; 114 | }, 115 | easeInOutCirc: function (x, t, b, c, d) { 116 | if ((t/=d/2) < 1) return -c/2 * (Math.sqrt(1 - t*t) - 1) + b; 117 | return c/2 * (Math.sqrt(1 - (t-=2)*t) + 1) + b; 118 | }, 119 | easeInElastic: function (x, t, b, c, d) { 120 | var s=1.70158;var p=0;var a=c; 121 | if (t==0) return b; if ((t/=d)==1) return b+c; if (!p) p=d*.3; 122 | if (a < Math.abs(c)) { a=c; var s=p/4; } 123 | else var s = p/(2*Math.PI) * Math.asin (c/a); 124 | return -(a*Math.pow(2,10*(t-=1)) * Math.sin( (t*d-s)*(2*Math.PI)/p )) + b; 125 | }, 126 | easeOutElastic: function (x, t, b, c, d) { 127 | var s=1.70158;var p=0;var a=c; 128 | if (t==0) return b; if ((t/=d)==1) return b+c; if (!p) p=d*.3; 129 | if (a < Math.abs(c)) { a=c; var s=p/4; } 130 | else var s = p/(2*Math.PI) * Math.asin (c/a); 131 | return a*Math.pow(2,-10*t) * Math.sin( (t*d-s)*(2*Math.PI)/p ) + c + b; 132 | }, 133 | easeInOutElastic: function (x, t, b, c, d) { 134 | var s=1.70158;var p=0;var a=c; 135 | if (t==0) return b; if ((t/=d/2)==2) return b+c; if (!p) p=d*(.3*1.5); 136 | if (a < Math.abs(c)) { a=c; var s=p/4; } 137 | else var s = p/(2*Math.PI) * Math.asin (c/a); 138 | if (t < 1) return -.5*(a*Math.pow(2,10*(t-=1)) * Math.sin( (t*d-s)*(2*Math.PI)/p )) + b; 139 | return a*Math.pow(2,-10*(t-=1)) * Math.sin( (t*d-s)*(2*Math.PI)/p )*.5 + c + b; 140 | }, 141 | easeInBack: function (x, t, b, c, d, s) { 142 | if (s == undefined) s = 1.70158; 143 | return c*(t/=d)*t*((s+1)*t - s) + b; 144 | }, 145 | easeOutBack: function (x, t, b, c, d, s) { 146 | if (s == undefined) s = 1.70158; 147 | return c*((t=t/d-1)*t*((s+1)*t + s) + 1) + b; 148 | }, 149 | easeInOutBack: function (x, t, b, c, d, s) { 150 | if (s == undefined) s = 1.70158; 151 | if ((t/=d/2) < 1) return c/2*(t*t*(((s*=(1.525))+1)*t - s)) + b; 152 | return c/2*((t-=2)*t*(((s*=(1.525))+1)*t + s) + 2) + b; 153 | }, 154 | easeInBounce: function (x, t, b, c, d) { 155 | return c - jQuery.easing.easeOutBounce (x, d-t, 0, c, d) + b; 156 | }, 157 | easeOutBounce: function (x, t, b, c, d) { 158 | if ((t/=d) < (1/2.75)) { 159 | return c*(7.5625*t*t) + b; 160 | } else if (t < (2/2.75)) { 161 | return c*(7.5625*(t-=(1.5/2.75))*t + .75) + b; 162 | } else if (t < (2.5/2.75)) { 163 | return c*(7.5625*(t-=(2.25/2.75))*t + .9375) + b; 164 | } else { 165 | return c*(7.5625*(t-=(2.625/2.75))*t + .984375) + b; 166 | } 167 | }, 168 | easeInOutBounce: function (x, t, b, c, d) { 169 | if (t < d/2) return jQuery.easing.easeInBounce (x, t*2, 0, c, d) * .5 + b; 170 | return jQuery.easing.easeOutBounce (x, t*2-d, 0, c, d) * .5 + c*.5 + b; 171 | } 172 | }); 173 | 174 | /* 175 | * 176 | * TERMS OF USE - EASING EQUATIONS 177 | * 178 | * Open source under the BSD License. 179 | * 180 | * Copyright © 2001 Robert Penner 181 | * All rights reserved. 182 | * 183 | * Redistribution and use in source and binary forms, with or without modification, 184 | * are permitted provided that the following conditions are met: 185 | * 186 | * Redistributions of source code must retain the above copyright notice, this list of 187 | * conditions and the following disclaimer. 188 | * Redistributions in binary form must reproduce the above copyright notice, this list 189 | * of conditions and the following disclaimer in the documentation and/or other materials 190 | * provided with the distribution. 191 | * 192 | * Neither the name of the author nor the names of contributors may be used to endorse 193 | * or promote products derived from this software without specific prior written permission. 194 | * 195 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 196 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 197 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 198 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 199 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 200 | * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 201 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 202 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 203 | * OF THE POSSIBILITY OF SUCH DAMAGE. 204 | * 205 | */ 206 | -------------------------------------------------------------------------------- /server/public/jquery.mousewheel.js: -------------------------------------------------------------------------------- 1 | /*! Copyright (c) 2011 Brandon Aaron (http://brandonaaron.net) 2 | * Licensed under the MIT License (LICENSE.txt). 3 | * 4 | * Thanks to: http://adomas.org/javascript-mouse-wheel/ for some pointers. 5 | * Thanks to: Mathias Bank(http://www.mathias-bank.de) for a scope bug fix. 6 | * Thanks to: Seamus Leahy for adding deltaX and deltaY 7 | * 8 | * Version: 3.0.6 9 | * 10 | * Requires: 1.2.2+ 11 | */ 12 | 13 | (function($) { 14 | 15 | var types = ['DOMMouseScroll', 'mousewheel']; 16 | 17 | if ($.event.fixHooks) { 18 | for ( var i=types.length; i; ) { 19 | $.event.fixHooks[ types[--i] ] = $.event.mouseHooks; 20 | } 21 | } 22 | 23 | $.event.special.mousewheel = { 24 | setup: function() { 25 | if ( this.addEventListener ) { 26 | for ( var i=types.length; i; ) { 27 | this.addEventListener( types[--i], handler, false ); 28 | } 29 | } else { 30 | this.onmousewheel = handler; 31 | } 32 | }, 33 | 34 | teardown: function() { 35 | if ( this.removeEventListener ) { 36 | for ( var i=types.length; i; ) { 37 | this.removeEventListener( types[--i], handler, false ); 38 | } 39 | } else { 40 | this.onmousewheel = null; 41 | } 42 | } 43 | }; 44 | 45 | $.fn.extend({ 46 | mousewheel: function(fn) { 47 | return fn ? this.bind("mousewheel", fn) : this.trigger("mousewheel"); 48 | }, 49 | 50 | unmousewheel: function(fn) { 51 | return this.unbind("mousewheel", fn); 52 | } 53 | }); 54 | 55 | 56 | function handler(event) { 57 | var orgEvent = event || window.event, args = [].slice.call( arguments, 1 ), delta = 0, returnValue = true, deltaX = 0, deltaY = 0; 58 | event = $.event.fix(orgEvent); 59 | event.type = "mousewheel"; 60 | 61 | // Old school scrollwheel delta 62 | if ( orgEvent.wheelDelta ) { delta = orgEvent.wheelDelta/120; } 63 | if ( orgEvent.detail ) { delta = -orgEvent.detail/3; } 64 | 65 | // New school multidimensional scroll (touchpads) deltas 66 | deltaY = delta; 67 | 68 | // Gecko 69 | if ( orgEvent.axis !== undefined && orgEvent.axis === orgEvent.HORIZONTAL_AXIS ) { 70 | deltaY = 0; 71 | deltaX = -1*delta; 72 | } 73 | 74 | // Webkit 75 | if ( orgEvent.wheelDeltaY !== undefined ) { deltaY = orgEvent.wheelDeltaY/120; } 76 | if ( orgEvent.wheelDeltaX !== undefined ) { deltaX = -1*orgEvent.wheelDeltaX/120; } 77 | 78 | // Add event and delta to the front of the arguments 79 | args.unshift(event, delta, deltaX, deltaY); 80 | 81 | return ($.event.dispatch || $.event.handle).apply(this, args); 82 | } 83 | 84 | })(jQuery); 85 | -------------------------------------------------------------------------------- /server/public/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "treantjs", 3 | "version": "1.0.0", 4 | "description": "JavaScipt library for visualization of tree diagrams", 5 | "main": "Treant.js", 6 | "directories": { 7 | "example": "examples" 8 | }, 9 | "scripts": { 10 | "test": "echo \"Error: no test specified\" && exit 1" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/fperucic/treant-js.git" 15 | }, 16 | "keywords": [ 17 | "treant", 18 | "diagram", 19 | "tree", 20 | "js", 21 | "svg", 22 | "draw" 23 | ], 24 | "author": "Fran Peručić", 25 | "license": "MIT", 26 | "bugs": { 27 | "url": "https://github.com/fperucic/treant-js/issues" 28 | }, 29 | "homepage": "https://github.com/fperucic/treant-js#readme" 30 | } 31 | -------------------------------------------------------------------------------- /server/public/perfect-scrollbar.css: -------------------------------------------------------------------------------- 1 | .ps-container .ps-scrollbar-x { 2 | position: absolute; /* please don't change 'position' */ 3 | bottom: 3px; /* there must be 'bottom' for ps-scrollbar-x */ 4 | height: 8px; 5 | background-color: #aaa; 6 | border-radius: 4px; 7 | -webkit-border-radius: 4px; 8 | -moz-border-radius: 4px; 9 | opacity: 0; 10 | filter: alpha(opacity = 0); 11 | -webkit-transition: opacity.2s linear; 12 | -moz-transition: opacity .2s linear; 13 | transition: opacity .2s linear; 14 | } 15 | 16 | .ps-container:hover .ps-scrollbar-x { 17 | opacity: 0.6; 18 | filter: alpha(opacity = 60); 19 | } 20 | 21 | .ps-container .ps-scrollbar-x:hover { 22 | opacity: 0.9; 23 | filter: alpha(opacity = 90); 24 | cursor:default; 25 | } 26 | 27 | .ps-container .ps-scrollbar-x.in-scrolling { 28 | opacity: 0.9; 29 | filter: alpha(opacity = 90); 30 | } 31 | 32 | .ps-container .ps-scrollbar-y { 33 | position: absolute; /* please don't change 'position' */ 34 | right: 3px; /* there must be 'right' for ps-scrollbar-y */ 35 | width: 8px; 36 | background-color: #aaa; 37 | border-radius: 4px; 38 | -webkit-border-radius: 4px; 39 | -moz-border-radius: 4px; 40 | opacity: 0; 41 | filter: alpha(opacity = 0); 42 | -webkit-transition: opacity.2s linear; 43 | -moz-transition: opacity .2s linear; 44 | transition: opacity .2s linear; 45 | } 46 | 47 | .ps-container:hover .ps-scrollbar-y { 48 | opacity: 0.6; 49 | filter: alpha(opacity = 60); 50 | } 51 | 52 | .ps-container .ps-scrollbar-y:hover { 53 | opacity: 0.9; 54 | filter: alpha(opacity = 90); 55 | cursor: default; 56 | } 57 | 58 | .ps-container .ps-scrollbar-y.in-scrolling { 59 | opacity: 0.9; 60 | filter: alpha(opacity = 90); 61 | } 62 | -------------------------------------------------------------------------------- /server/public/tree.css: -------------------------------------------------------------------------------- 1 | body,div,dl,dt,dd,ul,ol,li,h1,h2,h3,h4,h5,h6,pre,form,fieldset,input,textarea,p,blockquote,th,td { margin:0; padding:0; } 2 | table { border-collapse:collapse; border-spacing:0; } 3 | fieldset,img { border:0; } 4 | address,caption,cite,code,dfn,em,strong,th,var { font-style:normal; font-weight:normal; } 5 | caption,th { text-align:left; } 6 | h1,h2,h3,h4,h5,h6 { font-size:100%; font-weight:normal; } 7 | q:before,q:after { content:''; } 8 | abbr,acronym { border:0; } 9 | 10 | body { background: #fff; font-family: "Helvetica Neue",Helvetica,Arial,sans-serif; } 11 | /* optional Container STYLES */ 12 | .chart { height: 100vh; width: 80vw; margin: 5px; margin: 15px auto; border: 3px solid #DDD; border-radius: 3px; } 13 | 14 | ul{ 15 | list-style-type: none; 16 | } 17 | .evolution-tree { 18 | padding: 2px; 19 | width: 40px; height: 40px; 20 | border-radius: 3px; 21 | font-size: 10px; 22 | } 23 | 24 | .title { 25 | display: flex; 26 | justify-content: center; 27 | align-items: center; 28 | color: burlywood; 29 | padding: 2em; 30 | } 31 | 32 | .subtitle{ 33 | display: flex; 34 | flex-direction: column; 35 | justify-content: center; 36 | align-items: center; 37 | color :mediumaquamarine; 38 | padding :0.5em; 39 | } 40 | 41 | .list { 42 | display: flex; 43 | justify-content: space-around; 44 | flex-direction: row; 45 | } 46 | 47 | .name{ 48 | padding: 0.5em; 49 | } 50 | 51 | .evolution-tree .node-name { text-align: center; position: absolute; width: 88px; left: -50%; } 52 | .evolution-tree img { margin-right: 10px; float: none !important; } 53 | 54 | .evolution-tree.the-parent { border-radius: 50%; background-color: #000; width: 3px; height: 3px; } 55 | .evolution-tree.the-parent .node-name { width: auto; margin-top: -7px; text-indent: 12px; font-weight: bold; } 56 | -------------------------------------------------------------------------------- /server/server.js: -------------------------------------------------------------------------------- 1 | 2 | 3 | const express = require('express'); 4 | const app = express(); 5 | const path = require('path') 6 | app.set("view engine" , "ejs"); 7 | app.use(express.static(path.join(__dirname, "public"))); 8 | const port = 8080 || process.env.PORT; 9 | 10 | 11 | const fs = require('fs'); 12 | const args = process.argv; 13 | var out_json = JSON.parse(fs.readFileSync(`../build/${args[2]}`,'utf-8')); 14 | 15 | 16 | 17 | var tree = {} 18 | 19 | tree.chart = { 20 | container: "#chart", 21 | levelSeparation: 25, 22 | siblingSeparation: 70, 23 | subTeeSeparation: 70, 24 | nodeAlign: "BOTTOM", 25 | scrollbar: "fancy", 26 | padding: 35, 27 | node: { HTMLclass: "evolution-tree" }, 28 | connectors: { 29 | type: "curve", 30 | style: { 31 | "stroke-width": 2, 32 | "stroke-linecap": "round", 33 | "stroke": "#ccc" 34 | } 35 | } 36 | } 37 | 38 | tree.nodeStructure = out_json; 39 | 40 | 41 | app.get("/", (req,res) => { 42 | res.render("index",{ 43 | tree : tree 44 | }); 45 | }); 46 | 47 | app.listen(port); 48 | console.log('Server started on port' , port); 49 | console.log('Go to http://localhost:8080 to view the Abstract Syntax Tree'); 50 | 51 | 52 | -------------------------------------------------------------------------------- /server/views/index.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | AST 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |

Abstract Syntax Tree Visualization - JUCC

16 |
17 |
Creators
18 |
19 |
    20 |
  • Shuvayan Ghosh Dastidar
  • 21 |
  • Abhishek Pal
  • 22 |
  • Bisakh Mondal
  • 23 |
  • Aritra Samanta
  • 24 |
25 |
26 |
27 |
28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /src/grammar/grammar.cpp: -------------------------------------------------------------------------------- 1 | #include "grammar/grammar.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace jucc::grammar { 9 | 10 | bool HasParent(const grammar::Productions &productions, const std::string &parent) { 11 | return std::any_of(productions.begin(), productions.end(), 12 | [&](const grammar::Production &prod) { return prod.GetParent() == parent; }); 13 | } 14 | 15 | grammar::Rules GetRulesForParent(const grammar::Productions &productions, const std::string &parent) { 16 | for (const auto &production : productions) { 17 | if (production.GetParent() == parent) { 18 | return production.GetRules(); 19 | } 20 | } 21 | return grammar::Rules(); 22 | } 23 | 24 | Parser::Parser(const char *filepath) { file_ = std::ifstream(filepath); } 25 | 26 | Parser::~Parser() { 27 | if (file_.is_open()) { 28 | file_.close(); 29 | } 30 | } 31 | 32 | std::vector Parser::FastTokenize(const std::string &s) { 33 | std::vector res; 34 | std::stringstream ss(s); 35 | std::string token; 36 | while (ss >> token) { 37 | res.push_back(token); 38 | } 39 | return res; 40 | } 41 | 42 | /** 43 | * This is based on a basic state machine that implicitly uses a grammar to parse. 44 | * The parse states represent a block of a .g grammar file. 45 | * Eg. ParseState TERMINALS imples we have seen a %terminals token and yet to see 46 | * a % block closing token. 47 | * RuleState keeps track of additonal states required to parse a rule inside of a 48 | * %rule block. 49 | */ 50 | bool Parser::Parse() { 51 | // Parser State 52 | enum ParseState { BASIC, TERMINALS, NON_TERMINALS, START, RULES }; 53 | 54 | // Parser State while parsing productions 55 | // only valid when Parser State is RULES 56 | enum RuleState { LEFT, COLON, ENTITY }; 57 | 58 | const std::string terminals_start = "%terminals"; 59 | const std::string non_terminals_start = "%non_terminals"; 60 | const std::string start_start = "%start"; 61 | const std::string rules_start = "%rules"; 62 | const std::string state_reset_token = "%"; 63 | 64 | // check if file is open else return false 65 | if (file_.is_open()) { 66 | ParseState curr_parse_state = BASIC; 67 | std::string line; 68 | 69 | // ugh... 70 | std::unordered_map>> grammar; 71 | 72 | while (getline(file_, line)) { 73 | // states for rule parsing 74 | RuleState curr_rule_state = LEFT; 75 | std::string production_parent; 76 | std::vector rule_entities; 77 | 78 | // ignore empty lines and comments 79 | if (line.length() == 0 || line[0] == '#') { 80 | continue; 81 | } 82 | 83 | // tokenize line 84 | std::vector tokens = FastTokenize(line); 85 | 86 | // update parser state 87 | if (tokens[0] == terminals_start) { 88 | if (curr_parse_state != BASIC || tokens.size() > 1) { 89 | error_ = "grammar parsing error: invalid token %terminals"; 90 | return false; 91 | } 92 | curr_parse_state = TERMINALS; 93 | tokens.erase(tokens.begin()); 94 | } else if (tokens[0] == non_terminals_start) { 95 | if (curr_parse_state != BASIC || tokens.size() > 1) { 96 | error_ = "grammar parsing error: invalid token %non_terminals"; 97 | return false; 98 | } 99 | curr_parse_state = NON_TERMINALS; 100 | tokens.erase(tokens.begin()); 101 | } else if (tokens[0] == start_start) { 102 | if (curr_parse_state != BASIC || tokens.size() > 1) { 103 | error_ = "grammar parsing error: invalid token %start"; 104 | return false; 105 | } 106 | curr_parse_state = START; 107 | tokens.erase(tokens.begin()); 108 | } else if (tokens[0] == rules_start) { 109 | if (curr_parse_state != BASIC || tokens.size() > 1) { 110 | error_ = "grammar parsing error: invalid token %rules"; 111 | return false; 112 | } 113 | curr_parse_state = RULES; 114 | tokens.erase(tokens.begin()); 115 | } else if (tokens[0] == state_reset_token) { 116 | if (curr_parse_state == BASIC || tokens.size() > 1) { 117 | error_ = "grammar parsing error: invalid token %"; 118 | return false; 119 | } 120 | curr_parse_state = BASIC; 121 | tokens.erase(tokens.begin()); 122 | } 123 | 124 | // iterate over tokens 125 | // add tokens to different lists as a function of Parser State 126 | // for Parser State RULES parse the production following implicit grammar: 127 | // "LEFT : ENTITY_LIST" 128 | // "ENTITY_LIST: ENTITY ENTITY_LIST | ENTITY" 129 | for (const auto &token : tokens) { 130 | switch (curr_parse_state) { 131 | case BASIC: 132 | error_ = "grammar parsing error: invalid token outside block: " + token; 133 | return false; 134 | break; 135 | 136 | case TERMINALS: 137 | if (token == std::string(EPSILON)) { 138 | error_ = "grammar parsing error: EPSILON is reserved"; 139 | return false; 140 | } 141 | terminals_.push_back(token); 142 | break; 143 | 144 | case NON_TERMINALS: 145 | if (token == std::string(EPSILON)) { 146 | error_ = "grammar parsing error: EPSILON is reserved"; 147 | return false; 148 | } 149 | non_terminals_.push_back(token); 150 | break; 151 | 152 | case START: 153 | if (!start_symbol_.empty() || token == std::string(EPSILON)) { 154 | error_ = "grammar parsing error: ambiguous start symbol"; 155 | return false; 156 | } 157 | start_symbol_ = token; 158 | break; 159 | 160 | case RULES: 161 | switch (curr_rule_state) { 162 | case LEFT: 163 | if (token == std::string(EPSILON)) { 164 | error_ = "grammar parsing error: production cannot start with EPSILON"; 165 | return false; 166 | } 167 | production_parent = token; 168 | curr_rule_state = COLON; 169 | break; 170 | 171 | case COLON: 172 | if (token != ":") { 173 | error_ = "grammar parsing error: rules syntax error ':' expected: " + token; 174 | return false; 175 | } 176 | curr_rule_state = ENTITY; 177 | break; 178 | 179 | case ENTITY: 180 | rule_entities.push_back(token); 181 | break; 182 | 183 | default: 184 | break; 185 | } 186 | default: 187 | break; 188 | } 189 | } 190 | 191 | if (curr_parse_state == RULES) { 192 | if (curr_rule_state == ENTITY) { 193 | grammar[production_parent].push_back(rule_entities); 194 | } 195 | if (curr_rule_state == COLON) { 196 | error_ = "grammar parsing error: rules syntax error ':' expected"; 197 | return false; 198 | } 199 | } 200 | } 201 | 202 | // sanity checks 203 | // check for undefined tokens and duplicates 204 | if (curr_parse_state != BASIC) { 205 | error_ = "grammar parsing error: block is incomplete '%' expected"; 206 | return false; 207 | } 208 | 209 | std::unordered_set terminals; 210 | std::unordered_set non_terminals; 211 | 212 | for (const auto &terminal : terminals_) { 213 | terminals.insert(terminal); 214 | } 215 | 216 | if (terminals.size() != terminals_.size()) { 217 | error_ = "grammar parsing error: inconsistent or duplicate terminals"; 218 | return false; 219 | } 220 | 221 | for (const auto &nterminal : non_terminals_) { 222 | if (terminals.find(nterminal) != terminals.end()) { 223 | error_ = "grammar parsing error: terminals and non_terminals not disjoint"; 224 | return false; 225 | } 226 | non_terminals.insert(nterminal); 227 | } 228 | 229 | if (non_terminals.size() != non_terminals_.size()) { 230 | error_ = "grammar parsing error: inconsistent or duplicate non_terminals"; 231 | return false; 232 | } 233 | 234 | // convert std::unordered_map to Production object with checks 235 | for (const auto &production : grammar) { 236 | Production prod; 237 | if (non_terminals.find(production.first) == non_terminals.end()) { 238 | error_ = "grammar parsing error: non_terminal not found: " + production.first; 239 | return false; 240 | } 241 | prod.SetParent(production.first); 242 | 243 | Rules rules; 244 | for (const auto &rule : production.second) { 245 | Rule prod_rule; 246 | for (const auto &entity : rule) { 247 | if (non_terminals.find(entity) == non_terminals.end() && terminals.find(entity) == terminals.end() && 248 | entity != std::string(EPSILON)) { 249 | error_ = "grammar parsing error: rule token is not defined: " + entity; 250 | return false; 251 | } 252 | } 253 | prod_rule.SetEntities(rule); 254 | rules.push_back(prod_rule); 255 | } 256 | 257 | prod.SetRules(rules); 258 | 259 | // add Production object created to list of valid productions 260 | grammar_.push_back(prod); 261 | } 262 | 263 | return true; 264 | } 265 | 266 | error_ = "grammar parsing error: file not found"; 267 | return false; 268 | } 269 | 270 | std::string Rule::ToString() const { 271 | std::stringstream ss; 272 | for (const auto &entity : entities_) { 273 | ss << entity; 274 | } 275 | return ss.str(); 276 | } 277 | 278 | bool Rule::HasPrefix(const Rule &prefix) const { 279 | // Takes care of even EPSILON too. 280 | if (prefix.GetEntities().size() > entities_.size()) { 281 | return false; 282 | } 283 | 284 | for (int i = 0; i < static_cast(prefix.GetEntities().size()); i++) { 285 | if (entities_[i] != prefix.GetEntities()[i]) { 286 | return false; 287 | } 288 | } 289 | 290 | return true; 291 | } 292 | 293 | } // namespace jucc::grammar 294 | -------------------------------------------------------------------------------- /src/include/grammar/grammar.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_GRAMMAR_GRAMMAR_H 2 | #define JUCC_GRAMMAR_GRAMMAR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace jucc { 10 | namespace grammar { 11 | const char EPSILON[] = "EPSILON"; 12 | 13 | class Rule { 14 | /** 15 | * An entity is a single terminal or non terminal in the right hand side 16 | * of a production. 17 | * Example: 18 | * For production: E : F + E => { "F", "+", "E" } is a rule. 19 | */ 20 | std::vector entities_; 21 | 22 | public: 23 | Rule() = default; 24 | explicit Rule(std::vector entities) : entities_(std::move(entities)) {} 25 | [[nodiscard]] const std::vector &GetEntities() const { return entities_; } 26 | void SetEntities(const std::vector &entities) { Rule::entities_ = entities; } 27 | [[nodiscard]] std::string ToString() const; 28 | 29 | /** 30 | * Takes an Rule and checks if the entries of the Rule is a perfect 31 | * prefix of the this->entities_ or not. 32 | * @return a boolean after checking if param is actually a prefix or not. 33 | */ 34 | [[nodiscard]] bool HasPrefix(const Rule & /*prefix*/) const; 35 | }; 36 | 37 | using Rules = std::vector; 38 | 39 | class Production { 40 | /** 41 | * class Parser returns a list of Productions. 42 | * Example: 43 | * For productions: E : F + E 44 | * E : EPSILON 45 | * parent = "E" 46 | * rules = { Rule1, Rule2 } 47 | */ 48 | std::string parent_; 49 | Rules rules_; 50 | 51 | public: 52 | Production() = default; 53 | Production(std::string parent, Rules rules) : parent_(std::move(parent)), rules_(std::move(rules)) {} 54 | 55 | [[nodiscard]] const std::string &GetParent() const { return parent_; } 56 | [[nodiscard]] const Rules &GetRules() const { return rules_; } 57 | void SetParent(const std::string &parent) { Production::parent_ = parent; } 58 | void SetRules(const Rules &rules) { Production::rules_ = rules; } 59 | }; 60 | 61 | using Productions = std::vector; 62 | 63 | /** 64 | * Search if a production exists for a given parent 65 | * utility function 66 | * @return if parent symbol is present in productions 67 | */ 68 | bool HasParent(const grammar::Productions & /*productions*/, const std::string & /*parent*/); 69 | 70 | /** 71 | * Given a parent finds all rules for it in the set of productions 72 | * @returns a vector of rules for the given parent 73 | */ 74 | Rules GetRulesForParent(const grammar::Productions & /*productions*/, const std::string & /*parent*/); 75 | 76 | class Parser { 77 | std::ifstream file_; 78 | std::vector terminals_; // Terminals defined in grammar file 79 | std::vector non_terminals_; // Non terminals defined in grammar file 80 | std::string start_symbol_; // Start symbol for the grammar 81 | Productions grammar_; // Production rules 82 | std::string error_; // parser error message 83 | 84 | /** 85 | * Splits input string via spaces. 86 | */ 87 | static std::vector FastTokenize(const std::string &s); 88 | 89 | public: 90 | /** 91 | * Constructor 92 | * @param filepath : opens std::ifstream file_ 93 | */ 94 | explicit Parser(const char *filepath); 95 | 96 | /** 97 | * Destructor 98 | * closes std::ifstream file_ 99 | */ 100 | ~Parser(); 101 | 102 | /** 103 | * Parses the input file and populates private variables. 104 | * Returns true if successful. 105 | * On error returns false and sets error_. 106 | */ 107 | bool Parse(); 108 | 109 | /** 110 | * Getters for each private variable. 111 | */ 112 | std::vector GetTerminals() { return terminals_; } 113 | std::vector GetNonTerminals() { return non_terminals_; } 114 | std::string GetStartSymbol() { return start_symbol_; } 115 | Productions GetProductions() { return grammar_; } 116 | std::string GetError() { return error_; } 117 | }; 118 | 119 | } // namespace grammar 120 | } // namespace jucc 121 | 122 | #endif // JUCC_GRAMMAR_GRAMMAR_H 123 | -------------------------------------------------------------------------------- /src/include/lexer/lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_LEXER_LEXER_H 2 | #define JUCC_LEXER_LEXER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "symbol_table/symbol_table.h" 9 | 10 | namespace jucc { 11 | namespace lexer { 12 | 13 | enum Token { 14 | TOK_EOF = -1, 15 | TOK_IDENTIFIER = -2, 16 | TOK_DECIMAL = -3, 17 | TOK_FRACTIONAL = -4, 18 | 19 | // Conditionals 20 | TOK_IF = -6, 21 | TOK_ELSE = -7, 22 | 23 | // Data types 24 | TOK_INT = -8, 25 | TOK_FLOAT = -9, 26 | TOK_VOID = -10, 27 | // error 28 | TOK_ERROR = -100, 29 | 30 | // punctuation tokens 31 | 32 | TOK_SEMICOLON = -11, // ; 33 | TOK_CURLY_OPEN = -12, // { 34 | TOK_CURLY_CLOSE = -13, // } 35 | TOK_PAREN_OPEN = -14, // ( 36 | TOK_PAREN_CLOSE = -15, // ) 37 | TOK_DOT = -16, // . 38 | TOK_COMMA = -17, // , 39 | TOK_LEFT_SHIFT = -18, // << 40 | TOK_RIGHT_SHIFT = -19, // >> 41 | TOK_LESS_THAN = -20, // < 42 | TOK_GREATER_THAN = -21, // > 43 | TOK_EQUAL_TO = -22, // == 44 | TOK_ASSIGNMENT = -23, // = 45 | TOK_COMMENT = -24, // // 46 | TOK_LITERAL = -25, // "c++" 47 | TOK_CHARACTER = -26, // 'c' 48 | 49 | // arithmetic tokens 50 | 51 | TOK_PLUS = -29, // + 52 | TOK_MINUS = -30, // - 53 | TOK_MULTIPLY = -31, // * 54 | TOK_DIVIDE = -32, // / 55 | TOK_MODULUS = -33, // % 56 | 57 | // RELATIONAL 58 | 59 | TOK_GREATER_THAN_OR_EQUALS = -34, // >= 60 | TOK_LESS_THAN_OR_EQUALS = -35, // <= 61 | TOK_NOT = -36, // ! 62 | TOK_NOT_EQUAL_TO = -37, // != 63 | 64 | TOK_MAIN = -38, // main 65 | 66 | // cout, cin 67 | 68 | TOK_COUT = -27, // cout 69 | TOK_CIN = -28, // cin 70 | }; 71 | 72 | class Lexer { 73 | /** 74 | * used to store a identifier token 75 | */ 76 | std::string identifier_string_; 77 | /** 78 | * used to store a corresponding error token 79 | * suppose a numerical token 16.3ere which is 80 | * neither a numerical token or a identifier 81 | */ 82 | 83 | std::string error_string_; 84 | /** 85 | * used to store a literal string 86 | * literal strings are of type "a string" 87 | */ 88 | 89 | std::string literal_string_; 90 | /** 91 | * used to store the value of the integer token 92 | * during tokenization. 93 | */ 94 | 95 | int intval_; 96 | /** 97 | * used to store the value of the float token 98 | * during tokenization. 99 | */ 100 | 101 | double floatval_; 102 | /** 103 | * The current nesting level as parsed by the lexer in 104 | * the input file. 105 | */ 106 | 107 | int current_nesting_level_{0}; 108 | /** 109 | * vector to store duplicate symbol errors. 110 | */ 111 | std::vector duplicate_symbol_errors_; 112 | /** 113 | * vector to store undeclared symbol errors. 114 | */ 115 | std::vector undeclared_symbol_errors_; 116 | 117 | /** 118 | * Stores the current datatype of the identifier. 119 | * for example int a = 5; 120 | * When tokenizing a current_datatype_ = "int", for all others it is 121 | * an empty string. 122 | */ 123 | std::string current_datatype_; 124 | 125 | /** 126 | * A symbol table object for building up the symbol table for the input file. 127 | * Check src/include/symbol_table/symbol_table.h and src/symbol_table/symbol_table.cpp 128 | * for more details. 129 | */ 130 | symbol_table::SymbolTable symbol_table_; 131 | 132 | /** 133 | * checks if a variable is directly present 134 | * after a declared datatype 135 | * 136 | * Use case 1 137 | * int a = 5; for token 'a' direct_before_datatype = true 138 | * else false 139 | * 140 | * Use case 2 141 | * 142 | * int z = 5 + y , t = 5 , r = 5 + s; 143 | * for token z, direct_before_datatype = true 144 | * for token y, direct_before_datatype = false 145 | * for token t, direct_before_datatype = true 146 | * for token r, direct_before_datatype = true 147 | * for token s, direct_before_datatype = false 148 | */ 149 | bool direct_before_datatype_{false}; 150 | 151 | public: 152 | Lexer() = default; 153 | 154 | /** 155 | * Takes a ifstream object as input and gets the next character 156 | * from the input file and returns the appropriate token. 157 | */ 158 | int GetToken(std::ifstream &is); 159 | 160 | /** 161 | * Getter for the current_datatype. 162 | */ 163 | std::string GetCurrentDatatype(); 164 | 165 | /** 166 | * Returns the terminal of the enum token returned by the lexer 167 | */ 168 | static std::string GetTokenType(int token); 169 | 170 | /** 171 | * Getter for the current nesting level. 172 | */ 173 | [[nodiscard]] int GetCurrentNestingLevel() const; 174 | 175 | /** 176 | * Getter for undeclared symbol errors. 177 | */ 178 | std::vector GetUndeclaredSymbolErrors(); 179 | 180 | /** 181 | * Getter for duplicate symbol errors. 182 | */ 183 | std::vector GetDuplicateSymbolErrors(); 184 | 185 | /** 186 | * Getter for direct_before_datatype flag 187 | */ 188 | [[nodiscard]] const bool &GetDirectBeforeDatatypeFlag() const { return direct_before_datatype_; } 189 | }; // class Lexer 190 | 191 | } // namespace lexer 192 | } // namespace jucc 193 | 194 | #endif 195 | -------------------------------------------------------------------------------- /src/include/main/jucc.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_JUCC_H 2 | #define JUCC_JUCC_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "grammar/grammar.h" 10 | #include "lexer/lexer.h" 11 | #include "parser/parser.h" 12 | #include "parser/parsing_table.h" 13 | #include "utils/utils.h" 14 | 15 | namespace jucc { 16 | /** 17 | * Dummy function to test setup clang and build config 18 | */ 19 | std::string Hello(); 20 | 21 | class InputParser { 22 | std::vector tokens_; 23 | 24 | public: 25 | /** 26 | * public constructor for initializing command line arguments 27 | * and converting them to string tokens for efficient search 28 | * for flags in the command line input 29 | */ 30 | InputParser(int argc, char *argv[]); 31 | 32 | /** 33 | * Returns true if the command line options has the required flag 'flag' 34 | * Searches the tokens 35 | * @ Returns true or false whether the given input flag is present in the 36 | * command line options. 37 | */ 38 | bool HasFlag(const std::string &flag); 39 | 40 | /** 41 | * Returns the next argument for a flag in command line options. 42 | * $ jucc -f 43 | * GetArgument("-f") returns filename 44 | * @ Returns string 45 | */ 46 | std::string GetArgument(const std::string &flag); 47 | }; 48 | 49 | /** 50 | * Dummy function increments computes x + y the retarded way 51 | * for benchmarking only 52 | */ 53 | int Radd(int x, int y); 54 | } // namespace jucc 55 | 56 | #endif // JUCC_JUCC_H 57 | -------------------------------------------------------------------------------- /src/include/parser/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_PARSER_PARSER_H 2 | #define JUCC_PARSER_PARSER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "nlohmann/json.hpp" 10 | #include "parser/parsing_table.h" 11 | #include "utils/first_follow.h" 12 | 13 | using json = nlohmann::ordered_json; 14 | 15 | namespace jucc { 16 | namespace parser { 17 | 18 | class Parser { 19 | /** 20 | * json pretty print indentation for generated parse tree 21 | */ 22 | static const int INDENTATION = 4; 23 | 24 | /** 25 | * parse tree for Treant.js integration 26 | */ 27 | json parse_tree_; 28 | 29 | /** 30 | * A stack to put the symbols and perform the actual parsing 31 | */ 32 | std::stack stack_; 33 | 34 | /** 35 | * The given input string to parse. 36 | */ 37 | std::vector input_string_; 38 | 39 | /** 40 | * The start symbol for the grammar 41 | */ 42 | std::string start_symbol_; 43 | 44 | /** 45 | * Holds the current step of parsing. 46 | */ 47 | int current_step_{0}; 48 | 49 | /** 50 | * Holds the build up parsing table object 51 | */ 52 | ParsingTable table_; 53 | 54 | /** 55 | * Holds the history of the productions parsed during parsing 56 | */ 57 | std::vector production_history_; 58 | 59 | /** 60 | * Holds a copy of the input string initially 61 | * and changes with each step of parsing. 62 | */ 63 | std::vector current_string_; 64 | 65 | /** 66 | * Errors incurred during the parsing of the given input file. 67 | */ 68 | std::vector parser_errors_; 69 | 70 | /** 71 | * Helper function to generate error messages for parsing. 72 | */ 73 | static std::string GenerateErrorMessage(const std::string ¤t_token); 74 | 75 | /** 76 | * Supportive function for Parser::FormattedJSON 77 | * @param value 78 | * @return { text: { name: "value" } } 79 | */ 80 | static json GetTextNode(const std::string & /* value */); 81 | 82 | /** 83 | * Utility recursive function for Parser::FormattedJSON 84 | * @param body, a json 85 | * @returns Treant.js formatted JSON 86 | */ 87 | static json RecRunner(const json & /*main*/, std::string /* key */); 88 | 89 | public: 90 | /** 91 | * Constructor for initializing stack and other members. 92 | */ 93 | Parser(); 94 | 95 | /** 96 | * Used for parsing the next token of the input string 97 | */ 98 | void ParseNextStep(); 99 | 100 | /** 101 | * Resets the entire parsing process 102 | */ 103 | void ResetParsing(); 104 | 105 | /** 106 | * Function that returns true when the parsing is completed 107 | */ 108 | bool IsComplete(); 109 | 110 | /** 111 | * Completes a step of parsing 112 | */ 113 | void DoNextStep(); 114 | 115 | /** 116 | * Build the parse tree from production history 117 | * Parse tree not built if parser in error state 118 | */ 119 | void BuildParseTree(); 120 | 121 | /** 122 | * Dumps the parse tree in given path in json format 123 | * @param filepath 124 | * @param formatted (Default is in Treant.js format else raw if value is set "false") 125 | * @returns true on success 126 | */ 127 | bool WriteParseTree(const std::string &filepath, bool formatted = true); 128 | 129 | /** 130 | * Takes a json with no array, ideally received from parser::GetParseTree() 131 | * Format is given here https://fperucic.github.io/treant-js/ 132 | * @returns a formatted JSON which acts as a input for Treant.js 133 | */ 134 | [[maybe_unused]] [[nodiscard]] static json FormattedJSON(const json & /* body */); 135 | 136 | /* getters and setters*/ 137 | void SetInputString(std::vector inps); 138 | void SetParsingTable(ParsingTable table); 139 | void SetStartSymbol(std::string start); 140 | [[nodiscard]] const std::vector &GetProductionHistory() { return production_history_; } 141 | [[nodiscard]] const std::vector &GetParserErrors() { return parser_errors_; } 142 | [[nodiscard]] const json &GetParseTree() { return parse_tree_; } 143 | }; 144 | } // namespace parser 145 | 146 | } // namespace jucc 147 | 148 | #endif 149 | -------------------------------------------------------------------------------- /src/include/parser/parsing_table.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_PARSER_PARSING_TABLE_H 2 | #define JUCC_PARSER_PARSING_TABLE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "grammar/grammar.h" 10 | #include "utils/first_follow.h" 11 | 12 | namespace jucc { 13 | 14 | namespace parser { 15 | 16 | const char SYNCH_TOKEN[] = "synch"; 17 | const char ERROR_TOKEN[] = "error"; 18 | 19 | class ParsingTable { 20 | public: 21 | using Table = std::unordered_map>; 22 | 23 | private: 24 | /** 25 | * stores the parsing table, which is calculated from the productions in the grammar 26 | * and it's first and follow set 27 | */ 28 | Table table_; 29 | 30 | /** 31 | * Stores firsts of the productions 32 | */ 33 | utils::SymbolsMap firsts_; 34 | 35 | /** 36 | * Stores the follows of the productions 37 | */ 38 | utils::SymbolsMap follows_; 39 | 40 | /** 41 | * Stores the productions of the grammar. 42 | */ 43 | grammar::Productions productions_; 44 | 45 | /** 46 | * Stores the terminals of the grammar 47 | */ 48 | std::vector terminals_; 49 | 50 | /** 51 | * Stores the non-termninals of the grammar. 52 | */ 53 | std::vector non_terminals_; 54 | 55 | /** 56 | * Stores the errors if any, in case of a non - LL(1) 57 | * grammar during the construction of the parsing table. 58 | */ 59 | std::vector errors_; 60 | 61 | /** 62 | * A helper function to generate error message. 63 | */ 64 | static std::string GenerateErrorMessage(const std::string &production, const std::string &symbol); 65 | 66 | public: 67 | /** 68 | * Default constructor 69 | */ 70 | ParsingTable() = default; 71 | 72 | /** 73 | * Used for setting synchronization tokens in the parsing table calculated from the 74 | * follow set. Adds "$" to terminals_. 75 | */ 76 | ParsingTable(std::vector terms, std::vector non_terms) 77 | : terminals_(std::move(terms)), non_terminals_(std::move(non_terms)) { 78 | terminals_.emplace_back(utils::STRING_ENDMARKER); 79 | } 80 | 81 | /** 82 | * Builds the parsing table from the firsts and follows 83 | */ 84 | void BuildTable(); 85 | 86 | /** 87 | * Gets the entry in the parsing table corresponding to a terminal and a non-terminal 88 | * Gets the production and the rule number 89 | */ 90 | std::pair GetEntry(const std::string &non_terminal_, const std::string &terminal_); 91 | 92 | /* getters and setters */ 93 | void SetFirsts(utils::SymbolsMap firsts) { firsts_ = std::move(firsts); } 94 | void SetProductions(grammar::Productions productions) { productions_ = std::move(productions); } 95 | void SetFollows(utils::SymbolsMap follows) { follows_ = std::move(follows); } 96 | [[nodiscard]] const utils::SymbolsMap &GetFirsts() { return firsts_; } 97 | [[nodiscard]] const utils::SymbolsMap &GetFollows() { return follows_; } 98 | [[nodiscard]] const grammar::Productions &GetProductions() { return productions_; } 99 | [[nodiscard]] const std::vector &GetNonTerminals() { return non_terminals_; } 100 | [[nodiscard]] const std::vector &GetTerminals() { return terminals_; } 101 | [[nodiscard]] const Table &GetTable() { return table_; } 102 | [[nodiscard]] const std::vector &GetErrors() { return errors_; } 103 | }; 104 | 105 | } // namespace parser 106 | } // namespace jucc 107 | 108 | #endif 109 | -------------------------------------------------------------------------------- /src/include/symbol_table/symbol_table.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_SYMBOL_TABLE_SYMBOL_TABLE_H 2 | #define JUCC_SYMBOL_TABLE_SYMBOL_TABLE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace jucc { 10 | 11 | namespace symbol_table { 12 | 13 | struct Node { 14 | /** 15 | * Used to store the name of the identifier 16 | * obtained during tokenization 17 | */ 18 | std::string identifier_; 19 | 20 | /** 21 | * Used to store the data type of the identifier 22 | * One of int or float 23 | */ 24 | std::string data_type_; 25 | 26 | /** 27 | * Used to store the nesting level of scoping 28 | * Deeper the nesting, higher the scope 29 | * 30 | * ... 31 | * int a0_; 32 | * if ( condition ) { 33 | * int a1_; 34 | * if ( second_condition ) { 35 | * int a2_; 36 | * if ( third_condition) { 37 | * int a3_; 38 | * ... 39 | * } 40 | * } 41 | * } 42 | * ... 43 | * a0_ has 0 level , a1_ has 1 level and so on ... 44 | */ 45 | int nesting_level_; 46 | 47 | /** 48 | * The pointer to the next node. 49 | */ 50 | Node *next_; 51 | 52 | /** 53 | * Constructor for node class 54 | */ 55 | Node(std::string it_, std::string dt_, int nt_) 56 | : identifier_(std::move(it_)), data_type_(std::move(dt_)), nesting_level_(nt_), next_(nullptr) {} 57 | }; 58 | 59 | class LinkedList { 60 | /** 61 | * The head_ of the linked list 62 | */ 63 | Node *head_{nullptr}; 64 | 65 | public: 66 | LinkedList() = default; 67 | 68 | /** 69 | * Allocates memory for a new Node and returns it after initializing. 70 | */ 71 | static Node *CreateNewNode(std::string it_, std::string dt_, int nt_); 72 | 73 | /** 74 | * Adds a new node at the starting of the linked list 75 | */ 76 | void AddNewNode(std::string it_, std::string dt_, int nt_); 77 | 78 | /** 79 | * Deletes the first node of the linked list 80 | */ 81 | void DeleteStartingNode(); 82 | 83 | /** 84 | * Returns true if linked list is empty or vice-versa 85 | */ 86 | bool IsEmpty(); 87 | 88 | /** 89 | * returns head_ 90 | */ 91 | Node *GetHead(); 92 | }; 93 | 94 | class SymbolTable { 95 | /** 96 | * Store the identfier mappings with respect to their presence 97 | * in different nesting levels in the program 98 | */ 99 | std::unordered_map hash_table_; 100 | 101 | /** 102 | * A vector to store different duplicate symbols found in the input 103 | */ 104 | std::vector duplicate_symbols_; 105 | 106 | /** 107 | * A vector to store the undeclared symbols in the input file. 108 | */ 109 | std::vector undeclared_symbols_; 110 | 111 | public: 112 | /** 113 | * Checks if the current identifier is present in the same nesting level 114 | * int the hash_table. If present reports a duplicate symbol error, that is, 115 | * inserts into the duplicate_symbols vector. 116 | * @Params : Node * node , bool direct_before_datatype 117 | * Node is the struct encapsulating the information about an identifier 118 | * direct_before_datatype is an boolean flag describing whether the identifier is declared. 119 | */ 120 | void CheckAndAddEntry(Node *node_, bool direct_before_datatype_); 121 | 122 | /** 123 | * On scope end - sc_ 124 | * Removes the nodes of all the variables in the hash_table which have nesting_level_ = sc_ 125 | */ 126 | void RemoveNodesOnScopeEnd(int level_); 127 | 128 | /** 129 | * Inserts symbols into duplicate symbols array 130 | */ 131 | void InsertIntoDuplicateSymbols(const std::string &identifier_); 132 | 133 | /** 134 | * Returns the linked list or list of nodes associated with an 135 | * identifier. 136 | */ 137 | Node *GetLinkedListById(const std::string &id_); 138 | 139 | /** 140 | * Getter method for duplicated symbols. 141 | */ 142 | std::vector GetDuplicateSymbols(); 143 | 144 | /** 145 | * Getter method for undeclared variables. 146 | */ 147 | std::vector GetUndeclaredSymbols(); 148 | 149 | /** 150 | * Checks if the identifier is present in hash_table_ 151 | * Utility function for testing 152 | */ 153 | int CheckOccurrencesOfId(const std::string &id_); 154 | }; 155 | 156 | } // namespace symbol_table 157 | 158 | } // namespace jucc 159 | 160 | #endif 161 | -------------------------------------------------------------------------------- /src/include/utils/first_follow.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_FIRST_FOLLOW_H 2 | #define JUCC_FIRST_FOLLOW_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "grammar/grammar.h" 9 | 10 | namespace jucc::utils { 11 | // Standard practice to mark input end 12 | const char STRING_ENDMARKER[] = "$"; 13 | 14 | // grammar::Productions getModifiedGrammar(const grammar::Productions &); 15 | 16 | /** 17 | * For each terminal and non terminal in the grammar compute if symbol is / derives to nullable 18 | * Terminals except EPSILON are defaulted to non nullable 19 | * @returns an unordered_map keyed by each symbol in the grammar of booleans 20 | * true if symbol is nullable, false otherwise. 21 | */ 22 | std::unordered_map CalcNullables(const grammar::Productions & /*augmented_grammar*/); 23 | 24 | using SymbolsMap = std::unordered_map>; 25 | 26 | /** 27 | * For each non terminal in given set of productions computes Firsts. 28 | * @returns an unordered_map keyed by non terminals in the grammar of a vector of terminals. 29 | */ 30 | SymbolsMap CalcFirsts(const grammar::Productions & /*augmented_grammar*/, 31 | const std::unordered_map & /*nullables*/); 32 | 33 | /** 34 | * For each non terminal in given set of productions computes Follows 35 | * @returns an unordered_map keyed by non terminals in the grammar of a vector of terminals. 36 | */ 37 | SymbolsMap CalcFollows(const grammar::Productions & /*augmented_grammar*/, const SymbolsMap & /*firsts*/, 38 | const std::unordered_map & /*nullables*/, 39 | const std::string & /*start_symbol*/); 40 | 41 | } // namespace jucc::utils 42 | 43 | #endif // JUCC_FIRST_FOLLOW_H 44 | -------------------------------------------------------------------------------- /src/include/utils/left_factoring.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_LEFT_FACTORING_H 2 | #define JUCC_LEFT_FACTORING_H 3 | #include "grammar/grammar.h" 4 | namespace grammar = jucc::grammar; 5 | 6 | namespace jucc::utils { 7 | /** 8 | * Does remove left factor from a production. 9 | * @return a set of productions free from left factors. 10 | */ 11 | grammar::Productions RemoveLeftFactors(const grammar::Production & /*prod*/); 12 | 13 | /** 14 | * Finds longest prefix which is most common to the rules of the productions. 15 | * @return a grammar Rule which can be used as a core of grammar::Rule. 16 | */ 17 | grammar::Rule LongestCommonPrefix(const grammar::Production & /*prod*/); 18 | 19 | } // namespace jucc::utils 20 | 21 | #endif // JUCC_LEFT_FACTORING_H 22 | -------------------------------------------------------------------------------- /src/include/utils/left_recursion.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_LEFT_RECURSION_H 2 | #define JUCC_LEFT_RECURSION_H 3 | #include "grammar/grammar.h" 4 | 5 | namespace jucc::utils { 6 | // Standard practice. 7 | const char DASH[] = "'"; 8 | const char DASHAT[] = "@"; 9 | 10 | /** 11 | * Removes direct left Recursion. 12 | * Example: for production 13 | * E -> E + T 14 | * E -> T 15 | * E -> EPSILON 16 | * @return a set of productions after removing left recursion. 17 | * E' -> +TE' | EPSILON 18 | * E -> TE' | EPSILON E' 19 | */ 20 | grammar::Productions RemoveDirectLeftRecursion(const grammar::Production & /*prod*/); 21 | 22 | /** 23 | * Checks if a given production is recursive. 24 | * @return boolean 25 | */ 26 | bool IsRecursive(const grammar::Production & /*prod*/); 27 | 28 | /** 29 | * TODO!! (If required) 30 | * Removes Indirect Left Recursion. 31 | * A -> B 32 | * B -> C 33 | * C -> A | B | EPSILON 34 | * @return a set of Productions. 35 | */ 36 | grammar::Productions RemoveIndirectLeftRecursions(const grammar::Productions & /*prod*/); 37 | 38 | } // namespace jucc::utils 39 | #endif // JUCC_LEFT_RECURSION_H 40 | -------------------------------------------------------------------------------- /src/include/utils/trie/memory_efficient_trie.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_UTILS_MEMORY_EFFICIENT_TRIE_H 2 | #define JUCC_UTILS_MEMORY_EFFICIENT_TRIE_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "grammar/grammar.h" 9 | 10 | namespace jucc { 11 | namespace utils { 12 | 13 | class Trie { 14 | /** 15 | * Class Trie to store a production efficiently for any required 16 | * operations, mainly for prefix matching. 17 | */ 18 | public: 19 | // Creates a trie for set of Rules where each entity is used as a key. 20 | std::unordered_map nodes_; 21 | 22 | std::vector keys_list_; // Stores the prefix list of entities upto the current entity. 23 | int count_; // Number of occurrences of the current entity after insertion of a set of Rules. 24 | 25 | Trie(); 26 | }; 27 | 28 | class TrieManager { 29 | /** 30 | * A higher order abstration to manager a complete Trie. 31 | * Takes care of unwanted memory leaking through a custom garbage collector. 32 | */ 33 | Trie *master_; // Current head of the trie. 34 | 35 | std::vector gc_; // The garbage collector which stores any newly created trie node. 36 | 37 | public: 38 | /** 39 | * Constructor. 40 | */ 41 | TrieManager(); 42 | 43 | /** 44 | * Customised no-param constructor. 45 | */ 46 | Trie *NewTrieNode(); 47 | 48 | /** 49 | * Getter: Returns master_ node, i.e. the Head of the trie. 50 | */ 51 | [[nodiscard]] Trie *GetMaster() const; 52 | 53 | /** 54 | * Insert a particular grammar Rule into the master_. 55 | * @param grammar::Rule & 56 | */ 57 | void Insert(const grammar::Rule & /*rule*/); 58 | 59 | /** 60 | * Insert a whole production into the trie node master_. 61 | */ 62 | void InsertAll(const grammar::Production & /*prod*/); 63 | 64 | /** 65 | * Makes a preorder traversal efficiently of the master_ node of the Trie and 66 | * returns the most common prefix of the Production Rules formed by individual 67 | * Rule entities. 68 | */ 69 | // NOLINTNEXTLINE 70 | static void GreedyPreorder(Trie * /*head*/, int & /*len*/, grammar::Rule & /*max_str*/, bool /*is_prime_head*/); 71 | 72 | /** 73 | * Virtual Destructor performs garbage collection where created memory gets released. 74 | */ 75 | virtual ~TrieManager(); 76 | }; 77 | 78 | } // namespace utils 79 | } // namespace jucc 80 | #endif // JUCC_MEMORYEFFICIENTTRIE_H 81 | -------------------------------------------------------------------------------- /src/include/utils/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef JUCC_UTILS_UTILS_H 2 | #define JUCC_UTILS_UTILS_H 3 | 4 | #include 5 | #include 6 | 7 | #include "grammar/grammar.h" 8 | #include "utils/first_follow.h" 9 | #include "utils/left_factoring.h" 10 | #include "utils/left_recursion.h" 11 | #include "utils/trie/memory_efficient_trie.h" 12 | 13 | namespace jucc { 14 | namespace utils { 15 | /** 16 | * Makes the grammar non ambiguous. 17 | * @return A set of production free from left recursions and left factors. 18 | */ 19 | grammar::Productions RemoveAllPossibleAmbiguity(const grammar::Productions & /*prods*/); 20 | 21 | /** 22 | * @returns a list of parents from the given set of productions. 23 | */ 24 | std::vector GetAllNonTerminals(const grammar::Productions & /*prods*/); 25 | 26 | /** 27 | * @returns a list of terminal symbols from the given set of productions. 28 | */ 29 | std::vector GetAllTerminals(const grammar::Productions & /*prods*/); 30 | 31 | } // namespace utils 32 | } // namespace jucc 33 | #endif 34 | -------------------------------------------------------------------------------- /src/main/jucc.cpp: -------------------------------------------------------------------------------- 1 | #include "main/jucc.h" 2 | 3 | #include 4 | 5 | namespace jucc { 6 | 7 | std::string Hello() { 8 | std::string ret; 9 | ret += "\033[1;33m"; 10 | ret += " .---. _..._ _..._ "; 11 | ret += "\033[0m\n"; 12 | ret += "\033[1;33m"; 13 | ret += " | | .-'_..._''. .-'_..._''. "; 14 | ret += "\033[0m\n"; 15 | ret += "\033[1;32m"; 16 | ret += " '---' .' .' '.\\ .' .' '.\\ "; 17 | ret += "\033[0m\n"; 18 | ret += "\033[1;36m"; 19 | ret += " .---. / .' / .' "; 20 | ret += "\033[0m\n"; 21 | ret += "\033[1;34m"; 22 | ret += " | | . ' . ' "; 23 | ret += "\033[0m\n"; 24 | ret += "\033[1;35m"; 25 | ret += " | | | | | | "; 26 | ret += "\033[0m\n"; 27 | ret += "\033[1;31m"; 28 | ret += " | | _ _ | | | | "; 29 | ret += "\033[0m\n"; 30 | ret += "\033[1;33m"; 31 | ret += " | | | ' / |. ' . ' "; 32 | ret += "\033[0m\n"; 33 | ret += "\033[1;33m"; 34 | ret += " | | .' | .' | \\ '. .\\ '. . "; 35 | ret += "\033[0m\n"; 36 | ret += "\033[1;32m"; 37 | ret += " | | / | / | '. `._____.-'/ '. `._____.-'/ "; 38 | ret += "\033[0m\n"; 39 | ret += "\033[1;36m"; 40 | ret += " __.' '| `'. | `-.______ / `-.______ / "; 41 | ret += "\033[0m\n"; 42 | ret += "\033[1;36m"; 43 | ret += "| ' ' .'| '/ ` ` "; 44 | ret += "\033[0m\n"; 45 | ret += "\033[1;35m"; 46 | ret += "|____.' `-' `--' "; 47 | ret += "\033[0m\n"; 48 | ret += "\n"; 49 | return ret; 50 | } 51 | 52 | /** 53 | * Benchmark testing function do not use. 54 | */ 55 | int Radd(int x, int y) { 56 | for (int i = 0; i < y; i++) { 57 | x++; 58 | } 59 | return x; 60 | } 61 | 62 | InputParser::InputParser(int argc, char *argv[]) { 63 | for (int i = 0; i < argc; i++) { 64 | tokens_.emplace_back(std::string(argv[i])); 65 | } 66 | } 67 | 68 | bool InputParser::HasFlag(const std::string &flag) { 69 | return std::find(tokens_.begin(), tokens_.end(), flag) != tokens_.end(); 70 | } 71 | 72 | std::string InputParser::GetArgument(const std::string &flag) { 73 | std::vector::const_iterator itr; 74 | itr = std::find(tokens_.begin(), tokens_.end(), flag); 75 | if (itr != tokens_.end() && ++itr != tokens_.end()) { 76 | return *itr; 77 | } 78 | return std::string(""); 79 | } 80 | 81 | } // namespace jucc 82 | -------------------------------------------------------------------------------- /src/main/main.cpp: -------------------------------------------------------------------------------- 1 | /**------------------------------------------------------------------------- 2 | * 3 | * main.cpp 4 | * Stub main() routine for the jucc executable. 5 | * 6 | * This does some essential startup tasks for any incarnation of jucc 7 | * 8 | * Contributors 9 | * Shuvayan Ghosh Dastidar 10 | * Abhishek Pal 11 | * Aritra Samanta 12 | * Bisakh Mondal 13 | * 14 | * Department of Computer Science and Engineering, Jadavpur University 15 | * 16 | * 17 | * IDENTIFICATION 18 | * src/main/main.cpp 19 | * 20 | *------------------------------------------------------------------------- 21 | */ 22 | 23 | #include "main/jucc.h" 24 | 25 | /** 26 | * jucc begins execution here. 27 | */ 28 | auto main(int argc, char *argv[]) -> int { 29 | /* print a hello world message */ 30 | std::cout << jucc::Hello(); 31 | 32 | /** 33 | * InputParser parses the cmd line arguments and returns 34 | * input file path and grammar file path 35 | */ 36 | jucc::InputParser input_parser = jucc::InputParser(argc, argv); 37 | std::string file_grammar = input_parser.GetArgument("-g"); 38 | std::string file_input = input_parser.GetArgument("-f"); 39 | std::string output_file = input_parser.GetArgument("-o"); 40 | if (output_file.empty() || file_input.empty() || file_grammar.empty()) { 41 | std::cout << "jucc: usage: jucc -g -f -o \n"; 42 | return -1; 43 | } 44 | 45 | /* Parse the grammar file and check for formatting errors */ 46 | jucc::grammar::Parser grammar_parser = jucc::grammar::Parser(file_grammar.c_str()); 47 | if (!grammar_parser.Parse()) { 48 | std::cout << "jucc: " << grammar_parser.GetError() << '\n'; 49 | return -1; 50 | } 51 | 52 | /* Check if the input file path is good */ 53 | std::ifstream ifs(file_input); 54 | if (!ifs.good()) { 55 | std::cout << "jucc: cannot read input file, bad file!\n"; 56 | return -1; 57 | } 58 | 59 | /** 60 | * Get the parsed grammar production and process it 61 | * Steps include: 62 | * 1. Left recursion removal 63 | * 2. Left factoring 64 | */ 65 | jucc::grammar::Productions raw_productions = grammar_parser.GetProductions(); 66 | jucc::grammar::Productions productions = jucc::utils::RemoveAllPossibleAmbiguity(raw_productions); 67 | 68 | /* Calculate first and follows to build the LL(1) parsing table */ 69 | auto nullables = jucc::utils::CalcNullables(productions); 70 | auto firsts = jucc::utils::CalcFirsts(productions, nullables); 71 | auto follows = jucc::utils::CalcFollows(productions, firsts, nullables, grammar_parser.GetStartSymbol()); 72 | auto terminals = grammar_parser.GetTerminals(); 73 | auto non_terminals = jucc::utils::GetAllNonTerminals(productions); 74 | 75 | jucc::parser::ParsingTable parsing_table = jucc::parser::ParsingTable(terminals, non_terminals); 76 | parsing_table.SetFirsts(firsts); 77 | parsing_table.SetFollows(follows); 78 | parsing_table.SetProductions(productions); 79 | parsing_table.BuildTable(); 80 | 81 | /* Check for errors in grammar and exit if errors exist */ 82 | auto err = parsing_table.GetErrors(); 83 | if (!err.empty()) { 84 | std::cout << "jucc: "; 85 | for (auto &e : err) { 86 | std::cout << e << '\n'; 87 | } 88 | return -1; 89 | } 90 | 91 | /* Use Lexer to get input tokens */ 92 | std::vector input_tokens; 93 | jucc::lexer::Lexer lexer = jucc::lexer::Lexer(); 94 | int token; 95 | while ((token = lexer.GetToken(ifs)) != jucc::lexer::TOK_EOF) { 96 | std::string ret_string = jucc::lexer::Lexer::GetTokenType(token); 97 | if (ret_string == "ignore") { 98 | continue; 99 | } 100 | if (ret_string == "error") { 101 | std::cout << "jucc: error illegal term found in input file\n"; 102 | return -1; 103 | } 104 | input_tokens.emplace_back(ret_string); 105 | } 106 | 107 | /* Check for symbol table errors and exit if errors exist */ 108 | std::vector errors; 109 | for (const auto &symbol : lexer.GetUndeclaredSymbolErrors()) { 110 | errors.push_back("undeclared symbol: " + symbol); 111 | } 112 | for (const auto &symbol : lexer.GetDuplicateSymbolErrors()) { 113 | errors.push_back("duplicate symbol: " + symbol); 114 | } 115 | if (!errors.empty()) { 116 | std::cout << "jucc: "; 117 | for (auto &e : errors) { 118 | std::cout << e << '\n'; 119 | } 120 | return -1; 121 | } 122 | 123 | /* Parse the input file using the parsing table and report errors */ 124 | jucc::parser::Parser parser = jucc::parser::Parser(); 125 | parser.SetInputString(input_tokens); 126 | parser.SetStartSymbol(grammar_parser.GetStartSymbol()); 127 | parser.SetParsingTable(parsing_table); 128 | 129 | while (!parser.IsComplete()) { 130 | parser.ParseNextStep(); 131 | } 132 | 133 | err = parser.GetParserErrors(); 134 | if (!err.empty()) { 135 | std::cout << "jucc: "; 136 | for (auto &e : err) { 137 | std::cout << e << '\n'; 138 | } 139 | return -1; 140 | } 141 | 142 | /* If there are no parser errors then proceed to generate the parse tree */ 143 | parser.BuildParseTree(); 144 | if (!parser.WriteParseTree(output_file)) { 145 | std::cout << "jucc: could not write parse tree to " + output_file + '\n'; 146 | return -1; 147 | } 148 | std::string command = "cd ../server && npm start " + output_file; 149 | system(command.c_str()); 150 | 151 | return 0; 152 | } 153 | -------------------------------------------------------------------------------- /src/parser/parser.cpp: -------------------------------------------------------------------------------- 1 | #include "parser/parser.h" 2 | 3 | #include 4 | #include 5 | 6 | namespace jucc::parser { 7 | 8 | Parser::Parser() : parse_tree_(json::object({})) { 9 | // initialize the stack 10 | stack_.push(std::string(utils::STRING_ENDMARKER)); 11 | input_string_.clear(); 12 | current_string_.clear(); 13 | } 14 | 15 | std::string Parser::GenerateErrorMessage(const std::string ¤t_token) { 16 | std::string ret_string; 17 | ret_string += "parser error: at symbol: " + current_token; 18 | return ret_string; 19 | } 20 | 21 | void Parser::SetInputString(std::vector inps) { 22 | if (!inps.empty()) { 23 | input_string_ = std::move(inps); 24 | current_string_ = input_string_; 25 | // augmented string for parsing 26 | current_string_.emplace_back(std::string(utils::STRING_ENDMARKER)); 27 | } 28 | } 29 | 30 | void Parser::SetStartSymbol(std::string start) { 31 | start_symbol_ = std::move(start); 32 | stack_.push(start_symbol_); 33 | } 34 | 35 | void Parser::SetParsingTable(ParsingTable table) { table_ = std::move(table); } 36 | 37 | bool Parser::IsComplete() { 38 | return (current_step_ == static_cast(current_string_.size())) || 39 | stack_.top() == std::string(utils::STRING_ENDMARKER); 40 | } 41 | 42 | void Parser::ResetParsing() { 43 | while (!stack_.empty()) { 44 | stack_.pop(); 45 | } 46 | stack_.push(std::string(utils::STRING_ENDMARKER)); 47 | stack_.push(start_symbol_); 48 | current_string_ = input_string_; 49 | current_step_ = 0; 50 | current_string_.emplace_back(std::string(utils::STRING_ENDMARKER)); 51 | } 52 | 53 | void Parser::DoNextStep() { 54 | if (!IsComplete()) { 55 | current_step_++; 56 | } 57 | } 58 | 59 | void Parser::ParseNextStep() { 60 | std::string top_symbol = stack_.top(); 61 | std::string current_token = current_string_[current_step_]; 62 | ParsingTable::Table table = table_.GetTable(); 63 | // skip tokens until it is in the first or is a synch token 64 | while (!IsComplete() && table[top_symbol][current_token] == std::string(ERROR_TOKEN)) { 65 | parser_errors_.push_back(GenerateErrorMessage(current_token)); 66 | DoNextStep(); 67 | if (current_step_ < static_cast(current_string_.size())) { 68 | current_token = current_string_[current_step_]; 69 | } 70 | } 71 | if (!IsComplete()) { 72 | // if SYNCH TOKEN - We skip the current symbol on stack top 73 | if (table[top_symbol][current_token] == std::string(SYNCH_TOKEN)) { 74 | parser_errors_.push_back(GenerateErrorMessage(current_token)); 75 | stack_.pop(); 76 | } else { 77 | auto terminals = table_.GetTerminals(); 78 | // check if current stack top matches the current token 79 | if (top_symbol == current_token) { 80 | stack_.pop(); 81 | DoNextStep(); 82 | } else if (std::find(terminals.begin(), terminals.end(), top_symbol) != terminals.end() && 83 | std::find(terminals.begin(), terminals.end(), current_token) != terminals.end()) { 84 | parser_errors_.push_back(GenerateErrorMessage(current_token)); 85 | DoNextStep(); 86 | } else { 87 | // we expand the production 88 | auto prod_rule = table_.GetEntry(top_symbol, current_token); 89 | auto productions = table_.GetProductions(); 90 | auto req_rule = productions[prod_rule.first].GetRules()[prod_rule.second]; 91 | auto entities = req_rule.GetEntities(); 92 | std::reverse(entities.begin(), entities.end()); 93 | stack_.pop(); 94 | if (!entities.empty() && entities[0] == std::string(grammar::EPSILON)) { 95 | production_history_.push_back(prod_rule.first * 100 + prod_rule.second); 96 | return; 97 | } 98 | for (auto &entity : entities) { 99 | stack_.push(entity); 100 | } 101 | production_history_.push_back(prod_rule.first * 100 + prod_rule.second); 102 | } 103 | } 104 | } 105 | } 106 | 107 | void Parser::BuildParseTree() { 108 | // if errors cannot build tree 109 | if (!parser_errors_.empty()) { 110 | return; 111 | } 112 | 113 | // init parse tree state 114 | parse_tree_[start_symbol_] = json::object({}); 115 | std::stack parent_node_stack; 116 | parent_node_stack.push(&parse_tree_[start_symbol_]); 117 | 118 | auto productions = table_.GetProductions(); 119 | auto terminals = table_.GetTerminals(); 120 | auto non_terminals = table_.GetNonTerminals(); 121 | // iterate over production history and build tree 122 | for (const auto &prod : production_history_) { 123 | int production_index = prod / 100; 124 | int rule_index = prod % 100; 125 | auto parent = productions[production_index].GetParent(); 126 | auto rule = productions[production_index].GetRules()[rule_index]; 127 | auto entities = rule.GetEntities(); 128 | json *parent_node = parent_node_stack.top(); 129 | /** 130 | * rename entities to handle duplicates 131 | * Example: 132 | * change entities from {"A", "A", "B", "A", "C", "B"} to 133 | * {"A", "A_1", "B", "A_2", "C", "B_1"} inplace 134 | */ 135 | std::unordered_map symbol_count; 136 | // store an reverse map for name of entities before renaming 137 | std::unordered_map default_name; 138 | for (auto &entity : entities) { 139 | auto p_entity = entity; 140 | if (symbol_count[entity]++ != 0) { 141 | entity += "_" + std::to_string(symbol_count[entity] - 1); 142 | } 143 | default_name[entity] = p_entity; 144 | 145 | // add renamed entities to current parent node 146 | if (std::find(terminals.begin(), terminals.end(), p_entity) != terminals.end()) { 147 | (*parent_node)[entity] = json(); 148 | } else { 149 | (*parent_node)[entity] = json::object({}); 150 | } 151 | } 152 | 153 | // update parent_node_stack 154 | parent_node_stack.pop(); 155 | for (auto it = entities.rbegin(); it < entities.rend(); it++) { 156 | if (std::find(non_terminals.begin(), non_terminals.end(), default_name[*it]) != non_terminals.end()) { 157 | parent_node_stack.push(&((*parent_node)[*it])); 158 | } 159 | } 160 | } 161 | } 162 | 163 | bool Parser::WriteParseTree(const std::string &filepath, bool formatted) { 164 | std::ofstream ofs(filepath); 165 | if (ofs.is_open()) { 166 | ofs << (formatted ? FormattedJSON(parse_tree_) : parse_tree_).dump(INDENTATION) << '\n'; 167 | return true; 168 | } 169 | return false; 170 | } 171 | 172 | json Parser::GetTextNode(const std::string &value) { 173 | json j; 174 | j["text"]["name"] = value; 175 | return j; 176 | } 177 | 178 | json Parser::RecRunner(const json &main, std::string key = "") { 179 | if (main.empty()) { 180 | return GetTextNode(key); 181 | } 182 | auto body = main; 183 | if (key.empty()) { 184 | for (auto it = main.begin(); it != main.end();) { 185 | key = it.key(); 186 | body = it.value(); 187 | break; 188 | } 189 | } 190 | 191 | json j = GetTextNode(key); 192 | for (auto it = body.begin(); it != body.end(); it++) { 193 | j["children"].push_back(RecRunner(it.value(), it.key())); 194 | } 195 | return j; 196 | } 197 | 198 | json Parser::FormattedJSON(const json &body) { return Parser::RecRunner(body); } 199 | 200 | } // namespace jucc::parser 201 | -------------------------------------------------------------------------------- /src/parser/parsing_table.cpp: -------------------------------------------------------------------------------- 1 | #include "parser/parsing_table.h" 2 | 3 | #include 4 | #include 5 | 6 | namespace jucc::parser { 7 | 8 | std::string ParsingTable::GenerateErrorMessage(const std::string &production, const std::string &symbol) { 9 | std::string ret; 10 | ret += "parsing table error: duplicate entry in parsing table, "; 11 | ret += "production: "; 12 | ret = ret.append(production); 13 | ret += " symbol: "; 14 | ret = ret.append(symbol); 15 | return ret; 16 | } 17 | 18 | void ParsingTable::BuildTable() { 19 | // fill initially all errors 20 | for (auto &nt : non_terminals_) { 21 | for (auto &t : terminals_) { 22 | table_[nt][t] = std::string(ERROR_TOKEN); 23 | } 24 | } 25 | 26 | // We consider that the symbols on the Follow(A) to be in the synchronization set 27 | for (auto &nt : non_terminals_) { 28 | if (follows_.count(nt) != 0U) { 29 | for (const auto &symbol : follows_[nt]) { 30 | if (table_[nt][symbol] != std::string(ERROR_TOKEN)) { 31 | errors_.push_back(GenerateErrorMessage(nt, symbol)); 32 | } 33 | table_[nt][symbol] = std::string(SYNCH_TOKEN); 34 | } 35 | } 36 | } 37 | 38 | // firsts 39 | for (int prod_no = 0; prod_no < static_cast(productions_.size()); prod_no++) { 40 | auto rules = productions_[prod_no].GetRules(); 41 | for (int rule_no = 0; rule_no < static_cast(rules.size()); rule_no++) { 42 | std::string first_entity = rules[rule_no].GetEntities()[0]; 43 | // check if first_entity is terminal 44 | if (std::find(terminals_.begin(), terminals_.end(), first_entity) != terminals_.end()) { 45 | std::string entry = table_[productions_[prod_no].GetParent()][first_entity]; 46 | if ((entry != std::string(ERROR_TOKEN)) && (entry != std::string(SYNCH_TOKEN))) { 47 | errors_.push_back(GenerateErrorMessage(productions_[prod_no].GetParent(), first_entity)); 48 | } 49 | 50 | table_[productions_[prod_no].GetParent()][first_entity] = std::to_string(prod_no * 100 + rule_no); 51 | } else if (std::find(non_terminals_.begin(), non_terminals_.end(), first_entity) != non_terminals_.end()) { 52 | // first entity is a non-terminal 53 | if (firsts_.count(first_entity) != 0U) { 54 | for (auto &symbol : firsts_[first_entity]) { 55 | if (symbol != std::string(grammar::EPSILON)) { 56 | std::string entry = table_[productions_[prod_no].GetParent()][symbol]; 57 | if ((entry != std::string(ERROR_TOKEN)) && (entry != std::string(SYNCH_TOKEN))) { 58 | errors_.push_back(GenerateErrorMessage(productions_[prod_no].GetParent(), symbol)); 59 | } 60 | 61 | table_[productions_[prod_no].GetParent()][symbol] = std::to_string(prod_no * 100 + rule_no); 62 | } 63 | } 64 | } 65 | 66 | } else if (first_entity == std::string(grammar::EPSILON)) { 67 | // first entity is epsilon 68 | if (follows_.count(productions_[prod_no].GetParent()) != 0U) { 69 | for (auto &symbol : follows_[productions_[prod_no].GetParent()]) { 70 | std::string entry = table_[productions_[prod_no].GetParent()][symbol]; 71 | if ((entry != std::string(ERROR_TOKEN)) && (entry != std::string(SYNCH_TOKEN))) { 72 | errors_.push_back(GenerateErrorMessage(productions_[prod_no].GetParent(), symbol)); 73 | } 74 | 75 | table_[productions_[prod_no].GetParent()][symbol] = std::to_string(prod_no * 100 + rule_no); 76 | } 77 | } 78 | } 79 | } 80 | } 81 | } 82 | 83 | std::pair ParsingTable::GetEntry(const std::string &non_terminal_, const std::string &terminal_) { 84 | std::string entry = table_[non_terminal_][terminal_]; 85 | int value; 86 | std::stringstream ss(entry); 87 | ss >> value; 88 | return std::make_pair(value / 100, value % 100); 89 | } 90 | } // namespace jucc::parser 91 | -------------------------------------------------------------------------------- /src/symbol_table/symbol_table.cpp: -------------------------------------------------------------------------------- 1 | #include "symbol_table/symbol_table.h" 2 | 3 | namespace jucc::symbol_table { 4 | 5 | bool LinkedList::IsEmpty() { return this->head_ == nullptr; } 6 | 7 | Node *LinkedList::CreateNewNode(std::string it_, std::string dt_, int nt_) { 8 | Node *node = new Node(std::move(it_), std::move(dt_), nt_); 9 | return node; 10 | } 11 | 12 | void LinkedList::AddNewNode(std::string it_, std::string dt_, int nt_) { 13 | Node *node = LinkedList::CreateNewNode(std::move(it_), std::move(dt_), nt_); 14 | node->next_ = head_; 15 | head_ = node; 16 | } 17 | 18 | void LinkedList::DeleteStartingNode() { 19 | if (IsEmpty()) { 20 | return; 21 | } 22 | Node *temp_ref = head_; 23 | head_ = temp_ref->next_; 24 | delete temp_ref; 25 | } 26 | 27 | Node *LinkedList::GetHead() { return this->head_; } 28 | 29 | void SymbolTable::CheckAndAddEntry(Node *node_, bool direct_before_datatype_) { 30 | // if present check if in common scope 31 | if (hash_table_.count(node_->identifier_) != 0U) { 32 | // check if using a previously declared variable. 33 | if (!direct_before_datatype_) { 34 | return; 35 | } 36 | // declaring a variable 37 | if (hash_table_[node_->identifier_].GetHead()->nesting_level_ == node_->nesting_level_) { 38 | // error duplicate symbol 39 | SymbolTable::InsertIntoDuplicateSymbols(node_->identifier_); 40 | } else { 41 | // else we add a new node at the scope 42 | hash_table_[node_->identifier_].AddNewNode(node_->identifier_, node_->data_type_, node_->nesting_level_); 43 | } 44 | return; 45 | } 46 | 47 | // if not present in common scope just insert it in hash table 48 | if (!node_->data_type_.empty()) { 49 | hash_table_[node_->identifier_].AddNewNode(node_->identifier_, node_->data_type_, node_->nesting_level_); 50 | } else { 51 | // insert into undeclared 52 | undeclared_symbols_.push_back(node_->identifier_); 53 | } 54 | } 55 | 56 | void SymbolTable::InsertIntoDuplicateSymbols(const std::string &identifier_) { 57 | // Further improvement on this will be storing line numbers and file details 58 | duplicate_symbols_.push_back(identifier_); 59 | } 60 | 61 | void SymbolTable::RemoveNodesOnScopeEnd(int level_) { 62 | std::vector delete_queue; 63 | for (auto &id_nodes : hash_table_) { 64 | if (!id_nodes.second.IsEmpty() && id_nodes.second.GetHead()->nesting_level_ == level_) { 65 | hash_table_[id_nodes.first].DeleteStartingNode(); 66 | if (hash_table_[id_nodes.first].IsEmpty()) { 67 | // delete the entry 68 | delete_queue.push_back(id_nodes.first); 69 | } 70 | } 71 | } 72 | for (const auto &id : delete_queue) { 73 | auto iterator = hash_table_.find(id); 74 | if (iterator != hash_table_.end()) { 75 | hash_table_.erase(iterator); 76 | } 77 | } 78 | } 79 | 80 | Node *SymbolTable::GetLinkedListById(const std::string &id_) { return hash_table_[id_].GetHead(); } 81 | 82 | std::vector SymbolTable::GetDuplicateSymbols() { return duplicate_symbols_; } 83 | 84 | std::vector SymbolTable::GetUndeclaredSymbols() { return undeclared_symbols_; } 85 | 86 | int SymbolTable::CheckOccurrencesOfId(const std::string &id_) { return hash_table_.count(id_); } 87 | } // namespace jucc::symbol_table 88 | -------------------------------------------------------------------------------- /src/utils/first_follow.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/first_follow.h" 2 | 3 | #include 4 | 5 | #include "utils/utils.h" 6 | 7 | namespace jucc::utils { 8 | std::unordered_map CalcNullables(const grammar::Productions &augmented_grammar) { 9 | std::unordered_map nullables; 10 | // set all terminals to non - nullable 11 | auto terminals = GetAllTerminals(augmented_grammar); 12 | for (const auto &term : terminals) { 13 | nullables[term] = false; 14 | } 15 | 16 | // EPSILON is nullable by definition 17 | nullables[std::string(grammar::EPSILON)] = true; 18 | 19 | // recursively compute for all non terminals 20 | std::function &)> calc_recursive; 21 | // lambda function that returns boolean value 22 | calc_recursive = [&](const std::string &key, std::vector &path) { 23 | if (find(path.begin(), path.end(), key) != path.end()) { 24 | return false; 25 | } 26 | 27 | // marking current symbol as visited 28 | path.push_back(key); 29 | 30 | // registering non-terminal in FOLLOW map if encountered for the first time 31 | if (nullables.find(key) != nullables.end()) { 32 | path.pop_back(); 33 | return nullables[key]; 34 | } 35 | 36 | for (const auto &rule : GetRulesForParent(augmented_grammar, key)) { 37 | bool premature_termination = false; 38 | for (const auto &symbol : rule.GetEntities()) { 39 | // exit loop if non-nullable symbol is encountered 40 | if (!calc_recursive(symbol, path)) { 41 | premature_termination = true; 42 | break; 43 | } 44 | } 45 | 46 | // for production A -> X Y Z, A is nullable iff X, Y, Z all are nullable 47 | if (!premature_termination) { 48 | nullables[key] = true; 49 | path.pop_back(); 50 | return true; 51 | } 52 | } 53 | 54 | // by default, a symbol is considered non-nullable 55 | nullables[key] = false; 56 | path.pop_back(); 57 | return false; 58 | }; 59 | 60 | for (const auto &production : augmented_grammar) { 61 | std::vector path; 62 | calc_recursive(production.GetParent(), path); 63 | } 64 | 65 | return nullables; 66 | } 67 | 68 | SymbolsMap CalcFirsts(const grammar::Productions &augmented_grammar, 69 | const std::unordered_map &nullables) { 70 | SymbolsMap firsts; 71 | // finished -> used to check if any new symbols are added to any FIRST(non-terminal) in a particular iteration 72 | // useful in case of cycles in productions 73 | bool finished = false; 74 | // store terminals to later remove from final results 75 | // terminals used as base case in recursion 76 | auto terminals = GetAllTerminals(augmented_grammar); 77 | for (const auto &term : terminals) { 78 | firsts[term] = {term}; 79 | } 80 | // add base case for EPSILON 81 | firsts[std::string(grammar::EPSILON)] = {std::string(grammar::EPSILON)}; 82 | 83 | /* at this point, map of FIRST only contains terminal -> {terminal} mappings */ 84 | std::function(const std::string &, std::vector &)> calc_recursive; 85 | // key -> current symbol being explored 86 | // path -> list of all the symbols visited uptil reaching key 87 | // lambda function that returns vector 88 | calc_recursive = [&](const std::string &key, std::vector &path) { 89 | // FIRST(terminal) = {terminal} 90 | if (!grammar::HasParent(augmented_grammar, key)) { 91 | return firsts[key]; 92 | } 93 | 94 | // if key "belongs to" path, then first of key has already been calculated / visited 95 | if (find(path.begin(), path.end(), key) != path.end()) { 96 | return firsts[key]; 97 | } 98 | 99 | // marking current symbol as visited 100 | path.push_back(key); 101 | 102 | /* now only un-visited non-terminals are left to be explored */ 103 | 104 | // registering non-terminal in FIRST map if encountered for the first time 105 | if (firsts.find(key) == firsts.end()) { 106 | firsts[key] = {}; 107 | finished = false; 108 | } 109 | 110 | /* if X is a non-terminal and X -> Y1 Y2 Y3 ... Yk is a production, then 111 | for 1 <= i <= k, FIRST(X) includes FIRST(Yi) iff FIRST(Y1 ... Yi-1) => EPSILON 112 | and FIRST(Y1 ... Yi-1) => EPSILON iff for all 1 <= j <= i - 1, Yj => EPSILON */ 113 | for (const auto &rules : GetRulesForParent(augmented_grammar, key)) { 114 | // iterates through all symbols in the body of a production 115 | bool premature_termination = false; 116 | for (const auto &symbol : rules.GetEntities()) { 117 | // deriving FIRST(current symbol) 118 | std::vector first = calc_recursive(symbol, path); 119 | // der -> each terminal or EPSILON in FIRST(current symbol) 120 | for (const auto &der : first) { 121 | if (der != std::string(grammar::EPSILON) && 122 | find(firsts[key].begin(), firsts[key].end(), der) == firsts[key].end()) { 123 | // include all terminals except EPSILON 124 | firsts[key].push_back(der); 125 | finished = false; 126 | } 127 | } 128 | 129 | // non-nullable symbol encountered, so no need of deriving succeeding symbols 130 | if (!nullables.at(symbol)) { 131 | premature_termination = true; 132 | break; 133 | } 134 | } 135 | 136 | // if all symbols of the body of a production yielded to EPSILON, then FIRST(key) includes EPSILON 137 | if (!premature_termination && 138 | find(firsts[key].begin(), firsts[key].end(), std::string(grammar::EPSILON)) == firsts[key].end()) { 139 | firsts[key].push_back(std::string(grammar::EPSILON)); 140 | } 141 | } 142 | 143 | path.pop_back(); 144 | return firsts[key]; 145 | }; 146 | 147 | /* 148 | Continue finding FIRST of all non-terminals in grammar as long as some changes are committed 149 | In case of cycles in productions, we may need to iterate multiple times 150 | 151 | For example, A -> C a, B -> C b, C -> B c | EPSILON 152 | After iteration 1: FIRST(A) = {a}, FIRST(B) = {b}, FIRST(C) = {EPSILON, b}, finished = false 153 | After iteration 2: FIRST(A) = {a, b}, FIRST(B) = {b}, FIRST(C) = {EPSILON, b}, finished = false 154 | After iteration 3: no changes -> stop, finished = true 155 | */ 156 | while (!finished) { 157 | finished = true; 158 | for (const auto &production : augmented_grammar) { 159 | std::vector path; 160 | calc_recursive(production.GetParent(), path); 161 | } 162 | } 163 | 164 | // remove terminals and EPSILON from final results 165 | for (const auto &term : terminals) { 166 | firsts.erase(term); 167 | } 168 | firsts.erase(std::string(grammar::EPSILON)); 169 | 170 | return firsts; 171 | } 172 | 173 | SymbolsMap CalcFollows(const grammar::Productions &augmented_grammar, const SymbolsMap &firsts, 174 | const std::unordered_map &nullables, const std::string &start_symbol) { 175 | SymbolsMap follows; 176 | bool finished = false; 177 | 178 | // calculating augmented firsts with terminals and EPSILON 179 | auto augmented_firsts = firsts; 180 | augmented_firsts[std::string(grammar::EPSILON)] = {std::string(grammar::EPSILON)}; 181 | auto terminals = GetAllTerminals(augmented_grammar); 182 | for (const auto &term : terminals) { 183 | augmented_firsts[term] = {term}; 184 | } 185 | 186 | // initialize follows 187 | for (const auto &production : augmented_grammar) { 188 | if (production.GetParent() == start_symbol) { 189 | // if production head / parent is the start_symbol, then FOLLOW(head) = {$} 190 | // $ -> input endmarker 191 | follows[production.GetParent()] = {STRING_ENDMARKER}; 192 | } else { 193 | follows[production.GetParent()] = {}; 194 | } 195 | } 196 | 197 | std::function calc_recursive; 198 | // lambda function that returns nothing 199 | calc_recursive = [&](const std::string &key) { 200 | for (const auto &rule : GetRulesForParent(augmented_grammar, key)) { 201 | for (auto symbol_itr = rule.GetEntities().begin(); symbol_itr != rule.GetEntities().end(); symbol_itr++) { 202 | std::string mid = *symbol_itr; 203 | // if A -> alpha B beta is a production, where B -> non-terminal & alpha, beta -> set of symbols 204 | // then FOLLOW(B) contains {FIRST(beta) - EPSILON} 205 | // if mid aka current symbol is terminal, then ignore 206 | if (grammar::HasParent(augmented_grammar, mid)) { 207 | auto next_itr = symbol_itr + 1; 208 | for (; next_itr != rule.GetEntities().end(); next_itr++) { 209 | // next_itr -> iterates through every symbol in beta 210 | for (const auto &der : augmented_firsts.at(*next_itr)) { 211 | // discarding EPSILON 212 | if (der != std::string(grammar::EPSILON) && 213 | find(follows[mid].begin(), follows[mid].end(), der) == follows[mid].end()) { 214 | follows[mid].push_back(der); 215 | finished = false; 216 | } 217 | } 218 | 219 | // non-nullable symbol encountered, so no need of deriving succeeding symbols 220 | if (!nullables.at(*next_itr)) { 221 | break; 222 | } 223 | } 224 | 225 | /* if execution has reached this point, then beta => EPSILON */ 226 | 227 | // if A -> alpha B / A -> alpha B beta & beta => EPSILON, then 228 | // FOLLOW(B) includes FOLLOW(A) 229 | if (next_itr == rule.GetEntities().end()) { 230 | for (const auto &der : follows[key]) { 231 | if (find(follows[mid].begin(), follows[mid].end(), der) == follows[mid].end()) { 232 | follows[mid].push_back(der); 233 | finished = false; 234 | } 235 | } 236 | } 237 | } 238 | } 239 | } 240 | }; 241 | 242 | // boolean variable finished used for same purpose as in CalcFirsts 243 | while (!finished) { 244 | finished = true; 245 | calc_recursive(start_symbol); 246 | for (const auto &production : augmented_grammar) { 247 | // FOLLOW(start_symbol) remains unchanged, and is always {$} 248 | if (production.GetParent() != start_symbol) { 249 | calc_recursive(production.GetParent()); 250 | } 251 | } 252 | } 253 | 254 | return follows; 255 | } 256 | 257 | } // namespace jucc::utils 258 | -------------------------------------------------------------------------------- /src/utils/left_factoring.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/left_factoring.h" 2 | 3 | #include 4 | 5 | #include "utils/left_recursion.h" 6 | #include "utils/trie/memory_efficient_trie.h" 7 | 8 | namespace jucc::utils { 9 | 10 | grammar::Productions RemoveLeftFactors(const grammar::Production &prod) { 11 | grammar::Productions prods; 12 | 13 | bool state_not_updated = false; 14 | auto current_prod = prod; 15 | while (!state_not_updated) { 16 | auto max_common_prefix = utils::LongestCommonPrefix(current_prod).GetEntities(); 17 | if (max_common_prefix.empty()) { 18 | state_not_updated = true; 19 | continue; 20 | } 21 | 22 | auto parent = current_prod.GetParent(); // similar to E 23 | auto parent_dash = current_prod.GetParent() + std::string(utils::DASH); // similar to E' 24 | 25 | grammar::Rules parent_rules; 26 | grammar::Rules parent_dash_rules; 27 | 28 | // insert the commonE' prod 29 | std::vector common_factor_with_dash(max_common_prefix.begin(), max_common_prefix.end()); 30 | // push E' after the common part 31 | common_factor_with_dash.emplace_back(parent_dash); 32 | parent_rules.emplace_back(grammar::Rule(common_factor_with_dash)); 33 | 34 | // produce two production after matching 35 | for (const auto &rule : current_prod.GetRules()) { 36 | if (!rule.HasPrefix(grammar::Rule(max_common_prefix))) { 37 | parent_rules.emplace_back(rule); 38 | } else { 39 | auto new_entities = std::vector( 40 | rule.GetEntities().begin() + static_cast(max_common_prefix.size()), rule.GetEntities().end()); 41 | if (new_entities.empty()) { 42 | new_entities.emplace_back(std::string(grammar::EPSILON)); 43 | } 44 | parent_dash_rules.emplace_back(new_entities); 45 | } 46 | } 47 | 48 | // do this recursive 49 | prods.push_back(grammar::Production(parent, parent_rules)); 50 | current_prod = grammar::Production(parent_dash, parent_dash_rules); 51 | } 52 | prods.push_back(current_prod); 53 | return prods; 54 | } 55 | 56 | grammar::Rule LongestCommonPrefix(const grammar::Production &prod) { 57 | TrieManager trie_manager; 58 | trie_manager.InsertAll(prod); 59 | 60 | grammar::Rule common_prefixes; 61 | int len = 1; 62 | TrieManager::GreedyPreorder(trie_manager.GetMaster(), len, common_prefixes, true); 63 | 64 | return common_prefixes; 65 | } 66 | 67 | } // namespace jucc::utils 68 | -------------------------------------------------------------------------------- /src/utils/left_recursion.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/left_recursion.h" 2 | 3 | #include 4 | namespace jucc::utils { 5 | 6 | grammar::Productions RemoveDirectLeftRecursion(const grammar::Production &prod) { 7 | if (!IsRecursive(prod)) { 8 | return grammar::Productions{prod}; 9 | } 10 | const std::string &parent = prod.GetParent(); 11 | std::string parent_dash = prod.GetParent() + std::string(DASH) + std::string(DASHAT); 12 | 13 | grammar::Productions prods(2); 14 | prods[0].SetParent(parent_dash); 15 | prods[1].SetParent(parent); 16 | 17 | auto parent_rules = prods[0].GetRules(); 18 | auto parent_dash_rules = prods[1].GetRules(); 19 | 20 | for (const auto &rule : prod.GetRules()) { 21 | auto entries = rule.GetEntities(); 22 | if (entries[0] == parent) { 23 | auto new_entries = std::vector(entries.begin() + 1, entries.end()); 24 | new_entries.push_back(parent_dash); 25 | parent_dash_rules.emplace_back(grammar::Rule(new_entries)); 26 | } else { 27 | if (!entries.empty() && entries.back() == std::string(grammar::EPSILON)) { 28 | entries.pop_back(); 29 | } 30 | entries.push_back(parent_dash); 31 | parent_rules.emplace_back(grammar::Rule(entries)); 32 | } 33 | } 34 | parent_dash_rules.push_back(grammar::Rule({std::string(grammar::EPSILON)})); 35 | prods[0].SetRules(parent_dash_rules); 36 | prods[1].SetRules(parent_rules); 37 | return prods; 38 | } 39 | 40 | bool IsRecursive(const grammar::Production &prod) { 41 | const auto &rules = prod.GetRules(); 42 | 43 | return static_cast(std::any_of(rules.begin(), rules.end(), 44 | [&](const auto &rule) { return prod.GetParent() == rule.GetEntities()[0]; })); 45 | } 46 | 47 | } // namespace jucc::utils 48 | -------------------------------------------------------------------------------- /src/utils/trie/memory_efficient_trie.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/trie/memory_efficient_trie.h" 2 | 3 | namespace jucc::utils { 4 | 5 | Trie::Trie() { count_ = 0; } 6 | 7 | TrieManager::TrieManager() { master_ = NewTrieNode(); } 8 | 9 | void TrieManager::Insert(const grammar::Rule &rule) { 10 | auto *head = master_; 11 | std::vector till_keys; 12 | for (const auto &entity : rule.GetEntities()) { 13 | till_keys.push_back(entity); 14 | if (head->nodes_.find(entity) == head->nodes_.end()) { 15 | head->nodes_[entity] = NewTrieNode(); 16 | head->nodes_[entity]->keys_list_ = till_keys; 17 | } 18 | head = head->nodes_[entity]; 19 | head->count_++; 20 | } 21 | } 22 | 23 | void TrieManager::InsertAll(const grammar::Production &prod) { 24 | for (const auto &rule : prod.GetRules()) { 25 | Insert(rule); 26 | } 27 | } 28 | 29 | void TrieManager::GreedyPreorder(Trie *head, int &len, grammar::Rule &max_str, bool is_prime_head) { 30 | if (head == nullptr) { 31 | return; 32 | } 33 | bool state_changed = false; 34 | // Get the node with max count 35 | if (head->count_ >= len && head->count_ != 1) { 36 | len = head->count_; 37 | state_changed = true; 38 | max_str.SetEntities(head->keys_list_); 39 | } 40 | 41 | if (state_changed || is_prime_head) { 42 | for (const auto &node : head->nodes_) { 43 | GreedyPreorder(node.second, len, max_str, false); 44 | } 45 | } 46 | } 47 | 48 | Trie *TrieManager::NewTrieNode() { 49 | Trie *t = new Trie; 50 | this->gc_.push_back(t); 51 | return t; 52 | } 53 | 54 | TrieManager::~TrieManager() { 55 | // flushing 56 | for (auto &node : gc_) { 57 | delete node; 58 | } 59 | } 60 | 61 | Trie *TrieManager::GetMaster() const { return master_; } 62 | 63 | } // namespace jucc::utils 64 | -------------------------------------------------------------------------------- /src/utils/utils.cpp: -------------------------------------------------------------------------------- 1 | #include "utils/utils.h" 2 | 3 | #include 4 | 5 | namespace jucc::utils { 6 | grammar::Productions RemoveAllPossibleAmbiguity(const grammar::Productions &prods) { 7 | grammar::Productions clean; 8 | for (const auto &prod : prods) { 9 | for (const grammar::Production &lr_free : RemoveDirectLeftRecursion(prod)) { 10 | // non ambiguous grammars 11 | grammar::Productions nag = RemoveLeftFactors(lr_free); 12 | clean.insert(clean.end(), nag.begin(), nag.end()); 13 | } 14 | } 15 | 16 | return clean; 17 | } 18 | 19 | std::vector GetAllNonTerminals(const grammar::Productions &prods) { 20 | std::vector non_terminals; 21 | for (const auto &prod : prods) { 22 | non_terminals.push_back(prod.GetParent()); 23 | } 24 | 25 | return non_terminals; 26 | } 27 | 28 | std::vector GetAllTerminals(const grammar::Productions &prods) { 29 | std::vector terminals; 30 | for (const auto &production : prods) { 31 | for (const auto &rule : production.GetRules()) { 32 | for (const auto &symbol : rule.GetEntities()) { 33 | // if symbol is not present as a parent in grammar, then it is a terminal 34 | // EPSILION is ignored 35 | if (!grammar::HasParent(prods, symbol) && symbol != std::string(grammar::EPSILON)) { 36 | terminals.push_back(symbol); 37 | } 38 | } 39 | } 40 | } 41 | 42 | terminals.erase(std::unique(terminals.begin(), terminals.end()), terminals.end()); 43 | return terminals; 44 | } 45 | 46 | } // namespace jucc::utils 47 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # Test Directory 2 | 3 | These are the tests for the compiler. We use the [googletest](https://github.com/google/googletest) framework. 4 | -------------------------------------------------------------------------------- /test/grammar/grammar_test.cpp: -------------------------------------------------------------------------------- 1 | #include "grammar/grammar.h" 2 | 3 | #include "gtest/gtest.h" 4 | 5 | using jucc::grammar::Parser; 6 | 7 | TEST(grammar, Parser0) { 8 | Parser parser = Parser("../test/grammar/grammar_test_0.g"); 9 | ASSERT_EQ(true, parser.Parse()); 10 | ASSERT_EQ("", parser.GetStartSymbol()); 11 | std::vector terminals = {"else", 12 | "float", 13 | "if", 14 | "int", 15 | "void", 16 | "(", 17 | ")", 18 | "{", 19 | "}", 20 | "*", 21 | "+", 22 | "-", 23 | "/", 24 | "%", 25 | "<<", 26 | ">>", 27 | "<", 28 | ">", 29 | "<=", 30 | ">=", 31 | "=", 32 | "==", 33 | "!=", 34 | ";", 35 | "identifier", 36 | "integer_constant", 37 | "float_constant"}; 38 | ASSERT_EQ(terminals, parser.GetTerminals()); 39 | std::vector non_terminals = {"", ""}; 40 | ASSERT_EQ(non_terminals, parser.GetNonTerminals()); 41 | 42 | ASSERT_EQ(2, parser.GetProductions().size()); 43 | ASSERT_EQ("", parser.GetProductions()[0].GetParent()); 44 | ASSERT_EQ("", parser.GetProductions()[1].GetParent()); 45 | 46 | ASSERT_EQ(3, parser.GetProductions()[1].GetRules().size()); 47 | std::vector rule0 = {"identifier", ""}; 48 | std::vector rule1 = {""}; 49 | std::vector rule2 = {"EPSILON"}; 50 | ASSERT_EQ(rule0, parser.GetProductions()[1].GetRules()[0].GetEntities()); 51 | ASSERT_EQ(rule1, parser.GetProductions()[1].GetRules()[1].GetEntities()); 52 | ASSERT_EQ(rule2, parser.GetProductions()[1].GetRules()[2].GetEntities()); 53 | 54 | ASSERT_EQ(1, parser.GetProductions()[0].GetRules().size()); 55 | std::vector rule3 = {"integer_constant"}; 56 | ASSERT_EQ(rule3, parser.GetProductions()[0].GetRules()[0].GetEntities()); 57 | } 58 | 59 | TEST(grammar, Parser1) { 60 | Parser parser = Parser("../test/grammar/grammar_test_1.g"); 61 | ASSERT_EQ(false, parser.Parse()); 62 | ASSERT_EQ("grammar parsing error: invalid token %terminals", parser.GetError()); 63 | } 64 | 65 | TEST(grammar, Parser2) { 66 | Parser parser = Parser("../test/grammar/grammar_test_2.g"); 67 | ASSERT_EQ(false, parser.Parse()); 68 | ASSERT_EQ("grammar parsing error: invalid token %non_terminals", parser.GetError()); 69 | } 70 | 71 | TEST(grammar, Parser3) { 72 | Parser parser = Parser("../test/grammar/grammar_test_3.g"); 73 | ASSERT_EQ(false, parser.Parse()); 74 | ASSERT_EQ("grammar parsing error: invalid token %rules", parser.GetError()); 75 | } 76 | 77 | TEST(grammar, Parser4) { 78 | Parser parser = Parser("../test/grammar/grammar_test_4.g"); 79 | ASSERT_EQ(false, parser.Parse()); 80 | ASSERT_EQ("grammar parsing error: invalid token %start", parser.GetError()); 81 | } 82 | 83 | TEST(grammar, Parser5) { 84 | Parser parser = Parser("../test/grammar/grammar_test_5.g"); 85 | ASSERT_EQ(false, parser.Parse()); 86 | ASSERT_EQ("grammar parsing error: invalid token %", parser.GetError()); 87 | } 88 | 89 | TEST(grammar, Parser6) { 90 | Parser parser = Parser("../test/grammar/grammar_test_6.g"); 91 | ASSERT_EQ(false, parser.Parse()); 92 | ASSERT_EQ("grammar parsing error: invalid token outside block: bruh", parser.GetError()); 93 | } 94 | 95 | TEST(grammar, Parser7) { 96 | Parser parser = Parser("../test/grammar/grammar_test_7.g"); 97 | ASSERT_EQ(false, parser.Parse()); 98 | ASSERT_EQ("grammar parsing error: EPSILON is reserved", parser.GetError()); 99 | } 100 | 101 | TEST(grammar, Parser8) { 102 | Parser parser = Parser("../test/grammar/grammar_test_8.g"); 103 | ASSERT_EQ(false, parser.Parse()); 104 | ASSERT_EQ("grammar parsing error: EPSILON is reserved", parser.GetError()); 105 | } 106 | 107 | TEST(grammar, Parser9) { 108 | Parser parser = Parser("../test/grammar/grammar_test_9.g"); 109 | ASSERT_EQ(false, parser.Parse()); 110 | ASSERT_EQ("grammar parsing error: ambiguous start symbol", parser.GetError()); 111 | } 112 | 113 | TEST(grammar, Parser10) { 114 | Parser parser = Parser("../test/grammar/grammar_test_10.g"); 115 | ASSERT_EQ(false, parser.Parse()); 116 | ASSERT_EQ("grammar parsing error: production cannot start with EPSILON", parser.GetError()); 117 | } 118 | 119 | TEST(grammar, Parser11) { 120 | Parser parser = Parser("../test/grammar/grammar_test_11.g"); 121 | ASSERT_EQ(false, parser.Parse()); 122 | ASSERT_EQ("grammar parsing error: rules syntax error ':' expected: bruh", parser.GetError()); 123 | } 124 | 125 | TEST(grammar, Parser12) { 126 | Parser parser = Parser("../test/grammar/grammar_test_12.g"); 127 | ASSERT_EQ(false, parser.Parse()); 128 | ASSERT_EQ("grammar parsing error: rules syntax error ':' expected", parser.GetError()); 129 | } 130 | 131 | TEST(grammar, Parser13) { 132 | Parser parser = Parser("../test/grammar/grammar_test_13.g"); 133 | ASSERT_EQ(false, parser.Parse()); 134 | ASSERT_EQ("grammar parsing error: block is incomplete '%' expected", parser.GetError()); 135 | } 136 | 137 | TEST(grammar, Parser14) { 138 | Parser parser = Parser("../test/grammar/grammar_test_14.g"); 139 | ASSERT_EQ(false, parser.Parse()); 140 | ASSERT_EQ("grammar parsing error: inconsistent or duplicate terminals", parser.GetError()); 141 | } 142 | 143 | TEST(grammar, Parser15) { 144 | Parser parser = Parser("../test/grammar/grammar_test_15.g"); 145 | ASSERT_EQ(false, parser.Parse()); 146 | ASSERT_EQ("grammar parsing error: inconsistent or duplicate non_terminals", parser.GetError()); 147 | } 148 | 149 | TEST(grammar, Parser16) { 150 | Parser parser = Parser("../test/grammar/grammar_test_16.g"); 151 | ASSERT_EQ(false, parser.Parse()); 152 | ASSERT_EQ("grammar parsing error: terminals and non_terminals not disjoint", parser.GetError()); 153 | } 154 | 155 | TEST(grammar, Parser17) { 156 | Parser parser = Parser("../test/grammar/grammar_test_17.g"); 157 | ASSERT_EQ(false, parser.Parse()); 158 | ASSERT_EQ("grammar parsing error: non_terminal not found: ", parser.GetError()); 159 | } 160 | 161 | TEST(grammar, Parser18) { 162 | Parser parser = Parser("../test/grammar/grammar_test_18.g"); 163 | ASSERT_EQ(false, parser.Parse()); 164 | ASSERT_EQ("grammar parsing error: rule token is not defined: bruh", parser.GetError()); 165 | } 166 | 167 | TEST(grammar, Parser19) { 168 | Parser parser = Parser("invalid_file_path"); 169 | ASSERT_EQ(false, parser.Parse()); 170 | ASSERT_EQ("grammar parsing error: file not found", parser.GetError()); 171 | } 172 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_0.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | : integer_constant 29 | % 30 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_1.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | %terminals 16 | 17 | % 18 | 19 | ## Start Symbol 20 | %start 21 | 22 | % 23 | 24 | ## Grammar for the language 25 | %rules 26 | : identifier 27 | : 28 | : EPSILON 29 | : integer_constant 30 | : float_constant 31 | % 32 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_10.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | EPSILON : integer_constant 29 | : float_constant 30 | % 31 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_11.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | bruh : integer_constant 29 | : float_constant 30 | % 31 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_12.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | 29 | : float_constant 30 | % 31 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_13.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | : integer_constant 29 | : float_constant 30 | 31 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_14.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant ; 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | : integer_constant 29 | : float_constant 30 | % 31 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_15.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | : integer_constant 29 | : float_constant 30 | % 31 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_16.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | : integer_constant 29 | : float_constant 30 | % 31 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_17.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : identifier 27 | : 28 | : EPSILON 29 | : integer_constant 30 | : float_constant 31 | % 32 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_18.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | : integer_constant 29 | : float_constant bruh 30 | % 31 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_2.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | %non_terminals 14 | 15 | ## Non Terminals 16 | %non_terminals 17 | 18 | % 19 | 20 | ## Start Symbol 21 | %start 22 | 23 | % 24 | 25 | ## Grammar for the language 26 | %rules 27 | : identifier 28 | : 29 | : EPSILON 30 | : integer_constant 31 | : float_constant 32 | % 33 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_3.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | %rules 21 | 22 | % 23 | 24 | ## Grammar for the language 25 | %rules 26 | : identifier 27 | : 28 | : EPSILON 29 | : integer_constant 30 | : float_constant 31 | % 32 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_4.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | %start 19 | ## Start Symbol 20 | %start 21 | 22 | % 23 | 24 | ## Grammar for the language 25 | %rules 26 | : identifier 27 | : 28 | : EPSILON 29 | : integer_constant 30 | : float_constant 31 | % 32 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_5.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | % 13 | 14 | ## Non Terminals 15 | %non_terminals 16 | 17 | % 18 | 19 | ## Start Symbol 20 | %start 21 | 22 | % 23 | 24 | ## Grammar for the language 25 | %rules 26 | : identifier 27 | : 28 | : EPSILON 29 | : integer_constant 30 | : float_constant 31 | % 32 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_6.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | bruh moment 24 | 25 | ## Grammar for the language 26 | %rules 27 | : identifier 28 | : 29 | : EPSILON 30 | : integer_constant 31 | : float_constant 32 | % 33 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_7.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | ## Non Terminals 13 | %non_terminals 14 | EPSILON 15 | % 16 | 17 | ## Start Symbol 18 | %start 19 | 20 | % 21 | 22 | ## Grammar for the language 23 | %rules 24 | : identifier 25 | : 26 | : EPSILON 27 | : integer_constant 28 | : float_constant 29 | % 30 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_8.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant EPSILON 11 | % 12 | ## Non Terminals 13 | %non_terminals 14 | 15 | % 16 | 17 | ## Start Symbol 18 | %start 19 | 20 | % 21 | 22 | ## Grammar for the language 23 | %rules 24 | : identifier 25 | : 26 | : EPSILON 27 | : integer_constant 28 | : float_constant 29 | % 30 | -------------------------------------------------------------------------------- /test/grammar/grammar_test_9.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | % 12 | 13 | ## Non Terminals 14 | %non_terminals 15 | 16 | % 17 | 18 | ## Start Symbol 19 | %start 20 | 21 | % 22 | 23 | ## Grammar for the language 24 | %rules 25 | : identifier 26 | : 27 | : EPSILON 28 | : integer_constant 29 | : float_constant 30 | % 31 | -------------------------------------------------------------------------------- /test/lexer/arithmetic.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | int main(){ 6 | int a = 5; 7 | if ( a > 45){ 8 | cout <= 6){ 17 | cout << a; 18 | } 19 | } 20 | } -------------------------------------------------------------------------------- /test/lexer/comments.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | // This is a test mainly for comments 4 | 5 | 6 | // Another comments 7 | 8 | int main(){ 9 | // Just another comment 10 | // Basically a hello world program, Peace! 11 | cout << "Hello"; 12 | } 13 | -------------------------------------------------------------------------------- /test/lexer/input.txt: -------------------------------------------------------------------------------- 1 | 2 | int main() { 3 | int x, y = 5; 4 | cin >> x; 5 | float f = 5.8; 6 | cout << "Hello"; 7 | } 8 | -------------------------------------------------------------------------------- /test/lexer/input2.txt: -------------------------------------------------------------------------------- 1 | int main(){ 2 | int x, y = ( 5 + 6 ), z; 3 | cout << x; 4 | } -------------------------------------------------------------------------------- /test/lexer/input3.txt: -------------------------------------------------------------------------------- 1 | int main() { 2 | int x, y; 3 | cin >> y; 4 | if (x != 0) { 5 | if (y > 0) { 6 | cout << y; 7 | } 8 | } 9 | float z = 1 + 2 + 3 + 1000/ 50 * 23.2 * (x * y * 10); 10 | } -------------------------------------------------------------------------------- /test/lexer/input_err1.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | int main(){ 4 | int a = 5; 5 | int b= 5.6d; 6 | cin >>b; 7 | if (5 <4.2) { 8 | cout << 4; } 9 | } 10 | -------------------------------------------------------------------------------- /test/lexer/input_err2.txt: -------------------------------------------------------------------------------- 1 | 2 | int main() { int x == 'bruh' ; } 3 | // bruh 4 | -------------------------------------------------------------------------------- /test/lexer/scope_error.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | int main(){ 9 | int a; 10 | if ( a ){ 11 | int b = 45; 12 | int b = 34; 13 | if ( b ){ 14 | c = 45; 15 | } 16 | } 17 | cout << b; 18 | } -------------------------------------------------------------------------------- /test/main.cpp: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | 3 | auto main(int argc, char **argv) -> int { 4 | ::testing::InitGoogleTest(&argc, argv); 5 | return RUN_ALL_TESTS(); 6 | } 7 | -------------------------------------------------------------------------------- /test/main/jucc_test.cpp: -------------------------------------------------------------------------------- 1 | #include "main/jucc.h" 2 | 3 | #include "gtest/gtest.h" 4 | 5 | using jucc::InputParser; 6 | 7 | TEST(jucc, Radd) { ASSERT_EQ(100, jucc::Radd(0, 100)); } 8 | 9 | TEST(jucc, Parser) { 10 | int argc = 3; 11 | char *argv[3]; 12 | argv[0] = strdup(std::string("-h").c_str()); 13 | argv[1] = strdup(std::string("-f").c_str()); 14 | argv[2] = strdup(std::string("filename").c_str()); 15 | 16 | auto *parser = new InputParser(argc, argv); 17 | free(argv[0]); 18 | free(argv[1]); 19 | free(argv[2]); 20 | ASSERT_TRUE(parser->HasFlag("-h")); 21 | ASSERT_EQ(parser->GetArgument("-f"), "filename"); 22 | ASSERT_FALSE(parser->HasFlag("-t")); 23 | delete parser; 24 | } 25 | 26 | TEST(jucc, Hello) { ASSERT_EQ(jucc::Hello().substr(0, 6), "\x1B[1;33"); } 27 | -------------------------------------------------------------------------------- /test/parser/grammar.g: -------------------------------------------------------------------------------- 1 | ## This is the grammar file for JuCC 2 | ## Edit this file to make changes to the parsing grammar 3 | ## Epsilon is represented by special string EPSILON 4 | 5 | ## Terminals 6 | %terminals 7 | else float if int void 8 | ( ) { } * + - / % , 9 | << >> < > <= >= = == != ; 10 | identifier integer_constant float_constant 11 | main cin cout 12 | % 13 | 14 | ## Non Terminals 15 | %non_terminals 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | % 25 | 26 | ## Start Symbol 27 | %start 28 | 29 | % 30 | 31 | ## Grammar for the language 32 | %rules 33 | ## Expressions 34 | : identifier 35 | : 36 | : ( ) 37 | : integer_constant 38 | : float_constant 39 | : + 40 | : - 41 | : 42 | : 43 | : 44 | : * 45 | : / 46 | : % 47 | : 48 | : + 49 | : - 50 | : 51 | : cin >> 52 | : cout << 53 | : << 54 | : >> 55 | : 56 | : < 57 | : > 58 | : <= 59 | : >= 60 | : 61 | : == 62 | : != 63 | : 64 | : 65 | 66 | ## Declarations 67 | : ; 68 | : 69 | : , 70 | : EPSILON 71 | : 72 | : = 73 | : void 74 | : int 75 | : float 76 | : 77 | : identifier 78 | : ( ) 79 | : 80 | 81 | ## Statements 82 | : 83 | : 84 | : 85 | : { } 86 | : 87 | : 88 | : EPSILON 89 | : 90 | : 91 | : ; 92 | : ; 93 | : if ( ) 94 | : if ( ) else 95 | 96 | ## Main 97 | : main ( ) 98 | % 99 | -------------------------------------------------------------------------------- /test/parser/parsing_table_test.cpp: -------------------------------------------------------------------------------- 1 | #include "parser/parsing_table.h" 2 | 3 | #include "grammar/grammar.h" 4 | #include "gtest/gtest.h" 5 | 6 | using jucc::parser::ParsingTable; 7 | namespace grammar = jucc::grammar; 8 | namespace utils = jucc::utils; 9 | 10 | TEST(parser, ParsingTable1) { 11 | /** 12 | * Test: Context Free Grammar 13 | * S : a A B b 14 | * A : c | EPSILON 15 | * B : d | EPSILON 16 | * 17 | * 18 | */ 19 | grammar::Production p1; 20 | p1.SetParent("S"); 21 | p1.SetRules({grammar::Rule({"a", "A", "B", "b"})}); 22 | grammar::Production p2; 23 | p2.SetParent("A"); 24 | p2.SetRules({grammar::Rule({"c"}), grammar::Rule({grammar::EPSILON})}); 25 | grammar::Production p3; 26 | p3.SetParent("B"); 27 | p3.SetRules({grammar::Rule({"d"}), grammar::Rule({grammar::EPSILON})}); 28 | grammar::Productions grammar = {p1, p2, p3}; 29 | 30 | auto nullables = utils::CalcNullables(grammar); 31 | auto firsts = utils::CalcFirsts(grammar, nullables); 32 | auto follows = utils::CalcFollows(grammar, firsts, nullables, "S"); 33 | 34 | std::vector terminals = {"a", "b", "c", "d"}; 35 | std::vector non_terminals = {"A", "S", "B"}; 36 | 37 | ParsingTable table = ParsingTable(terminals, non_terminals); 38 | table.SetFirsts(firsts); 39 | table.SetFollows(follows); 40 | table.SetProductions(grammar); 41 | table.BuildTable(); 42 | 43 | ASSERT_EQ(table.GetErrors().size(), 0); 44 | 45 | std::pair p; 46 | p = table.GetEntry("S", "a"); 47 | ASSERT_EQ(p.first, 0); 48 | ASSERT_EQ(p.second, 0); 49 | 50 | p = table.GetEntry("A", "b"); 51 | ASSERT_EQ(p.first, 1); 52 | ASSERT_EQ(p.second, 1); 53 | 54 | p = table.GetEntry("A", "c"); 55 | ASSERT_EQ(p.first, 1); 56 | ASSERT_EQ(p.second, 0); 57 | 58 | p = table.GetEntry("A", "d"); 59 | ASSERT_EQ(p.first, 1); 60 | ASSERT_EQ(p.second, 1); 61 | 62 | p = table.GetEntry("B", "b"); 63 | ASSERT_EQ(p.first, 2); 64 | ASSERT_EQ(p.second, 1); 65 | 66 | p = table.GetEntry("B", "d"); 67 | ASSERT_EQ(p.first, 2); 68 | ASSERT_EQ(p.second, 0); 69 | } 70 | 71 | TEST(parser, ParsingTable2) { 72 | /** 73 | * Test: Context Free Grammar 74 | * S : a B | EPSILON 75 | * B : b C | EPSILON 76 | * C : c S | EPSILON 77 | * 78 | * 79 | */ 80 | grammar::Production p1; 81 | p1.SetParent("S"); 82 | p1.SetRules({grammar::Rule({"a", "B"}), grammar::Rule({grammar::EPSILON})}); 83 | grammar::Production p2; 84 | p2.SetParent("B"); 85 | p2.SetRules({grammar::Rule({"b", "C"}), grammar::Rule({grammar::EPSILON})}); 86 | grammar::Production p3; 87 | p3.SetParent("C"); 88 | p3.SetRules({grammar::Rule({"c", "S"}), grammar::Rule({grammar::EPSILON})}); 89 | grammar::Productions grammar = {p1, p2, p3}; 90 | 91 | auto nullables = utils::CalcNullables(grammar); 92 | auto firsts = utils::CalcFirsts(grammar, nullables); 93 | auto follows = utils::CalcFollows(grammar, firsts, nullables, "S"); 94 | 95 | std::vector terminals = {"a", "b", "c"}; 96 | std::vector non_terminals = {"C", "S", "B"}; 97 | 98 | ParsingTable table = ParsingTable(terminals, non_terminals); 99 | table.SetFirsts(firsts); 100 | table.SetFollows(follows); 101 | table.SetProductions(grammar); 102 | table.BuildTable(); 103 | 104 | ASSERT_EQ(table.GetErrors().size(), 0); 105 | 106 | std::pair p; 107 | p = table.GetEntry("S", "a"); 108 | ASSERT_EQ(p.first, 0); 109 | ASSERT_EQ(p.second, 0); 110 | 111 | p = table.GetEntry("B", "b"); 112 | ASSERT_EQ(p.first, 1); 113 | ASSERT_EQ(p.second, 0); 114 | 115 | p = table.GetEntry("C", "c"); 116 | ASSERT_EQ(p.first, 2); 117 | ASSERT_EQ(p.second, 0); 118 | 119 | p = table.GetEntry("S", utils::STRING_ENDMARKER); 120 | ASSERT_EQ(p.first, 0); 121 | ASSERT_EQ(p.second, 1); 122 | 123 | p = table.GetEntry("B", utils::STRING_ENDMARKER); 124 | ASSERT_EQ(p.first, 1); 125 | ASSERT_EQ(p.second, 1); 126 | 127 | p = table.GetEntry("C", utils::STRING_ENDMARKER); 128 | ASSERT_EQ(p.first, 2); 129 | ASSERT_EQ(p.second, 1); 130 | } 131 | 132 | TEST(parser, ParsingTable3) { 133 | /** 134 | * Test: Context Free Grammar 135 | * S : a B | EPSILON 136 | * B : b C | EPSILON 137 | * C : c S | EPSILON 138 | * 139 | * 140 | */ 141 | grammar::Production p1; 142 | p1.SetParent("S"); 143 | p1.SetRules({grammar::Rule({"a", "B"}), grammar::Rule({grammar::EPSILON})}); 144 | grammar::Production p2; 145 | p2.SetParent("B"); 146 | p2.SetRules({grammar::Rule({"b", "C"}), grammar::Rule({grammar::EPSILON})}); 147 | grammar::Production p3; 148 | p3.SetParent("C"); 149 | p3.SetRules({grammar::Rule({"c", "S"}), grammar::Rule({grammar::EPSILON})}); 150 | grammar::Productions grammar = {p1, p2, p3}; 151 | 152 | auto nullables = utils::CalcNullables(grammar); 153 | auto firsts = utils::CalcFirsts(grammar, nullables); 154 | auto follows = utils::CalcFollows(grammar, firsts, nullables, "S"); 155 | 156 | std::vector terminals = {"a", "b", "c"}; 157 | std::vector non_terminals = {"C", "S", "B"}; 158 | 159 | ParsingTable table = ParsingTable(terminals, non_terminals); 160 | table.SetFirsts(firsts); 161 | table.SetFollows(follows); 162 | table.SetProductions(grammar); 163 | table.BuildTable(); 164 | 165 | ASSERT_EQ(table.GetErrors().size(), 0); 166 | 167 | std::pair p; 168 | p = table.GetEntry("S", "a"); 169 | ASSERT_EQ(p.first, 0); 170 | ASSERT_EQ(p.second, 0); 171 | 172 | p = table.GetEntry("B", "b"); 173 | ASSERT_EQ(p.first, 1); 174 | ASSERT_EQ(p.second, 0); 175 | 176 | p = table.GetEntry("C", "c"); 177 | ASSERT_EQ(p.first, 2); 178 | ASSERT_EQ(p.second, 0); 179 | 180 | p = table.GetEntry("S", utils::STRING_ENDMARKER); 181 | ASSERT_EQ(p.first, 0); 182 | ASSERT_EQ(p.second, 1); 183 | 184 | p = table.GetEntry("B", utils::STRING_ENDMARKER); 185 | ASSERT_EQ(p.first, 1); 186 | ASSERT_EQ(p.second, 1); 187 | 188 | p = table.GetEntry("C", utils::STRING_ENDMARKER); 189 | ASSERT_EQ(p.first, 2); 190 | ASSERT_EQ(p.second, 1); 191 | } 192 | 193 | TEST(parser, ParsingTable4) { 194 | /** 195 | * Test: Context Free Grammar 196 | * S : A B 197 | * A : a | EPSILON 198 | * B : b | EPSILON 199 | * 200 | * 201 | */ 202 | grammar::Production p1; 203 | p1.SetParent("S"); 204 | p1.SetRules({grammar::Rule({"A", "B"})}); 205 | grammar::Production p2; 206 | p2.SetParent("A"); 207 | p2.SetRules({grammar::Rule({"a"}), grammar::Rule({grammar::EPSILON})}); 208 | grammar::Production p3; 209 | p3.SetParent("B"); 210 | p3.SetRules({grammar::Rule({"b"}), grammar::Rule({grammar::EPSILON})}); 211 | grammar::Productions grammar = {p1, p2, p3}; 212 | 213 | auto nullables = utils::CalcNullables(grammar); 214 | auto firsts = utils::CalcFirsts(grammar, nullables); 215 | auto follows = utils::CalcFollows(grammar, firsts, nullables, "S"); 216 | 217 | std::vector terminals = {"a", "b"}; 218 | std::vector non_terminals = {"A", "S", "B"}; 219 | 220 | ParsingTable table = ParsingTable(terminals, non_terminals); 221 | table.SetFirsts(firsts); 222 | table.SetFollows(follows); 223 | table.SetProductions(grammar); 224 | table.BuildTable(); 225 | 226 | ASSERT_EQ(table.GetErrors().size(), 0); 227 | 228 | std::pair p; 229 | p = table.GetEntry("S", "a"); 230 | ASSERT_EQ(p.first, 0); 231 | ASSERT_EQ(p.second, 0); 232 | 233 | p = table.GetEntry("S", "b"); 234 | ASSERT_EQ(p.first, 0); 235 | ASSERT_EQ(p.second, 0); 236 | 237 | p = table.GetEntry("A", "a"); 238 | ASSERT_EQ(p.first, 1); 239 | ASSERT_EQ(p.second, 0); 240 | 241 | p = table.GetEntry("B", "b"); 242 | ASSERT_EQ(p.first, 2); 243 | ASSERT_EQ(p.second, 0); 244 | 245 | p = table.GetEntry("S", utils::STRING_ENDMARKER); 246 | ASSERT_EQ(p.first, 0); 247 | ASSERT_EQ(p.second, 0); 248 | 249 | p = table.GetEntry("A", utils::STRING_ENDMARKER); 250 | ASSERT_EQ(p.first, 1); 251 | ASSERT_EQ(p.second, 1); 252 | 253 | p = table.GetEntry("A", "b"); 254 | ASSERT_EQ(p.first, 1); 255 | ASSERT_EQ(p.second, 1); 256 | 257 | p = table.GetEntry("B", utils::STRING_ENDMARKER); 258 | ASSERT_EQ(p.first, 2); 259 | ASSERT_EQ(p.second, 1); 260 | } 261 | 262 | TEST(parser, ParsingTable5) { 263 | /** 264 | * Test: Context Free Grammar 265 | * S : i E t S S' | a 266 | * S': e S | EPSILON 267 | * E : b 268 | * 269 | * Not LL1 grammar 270 | */ 271 | grammar::Production p1; 272 | p1.SetParent("S"); 273 | p1.SetRules({grammar::Rule({"i", "E", "t", "S", "S'"}), grammar::Rule({"a"})}); 274 | grammar::Production p2; 275 | p2.SetParent("S'"); 276 | p2.SetRules({grammar::Rule({"e", "S"}), grammar::Rule({grammar::EPSILON})}); 277 | grammar::Production p3; 278 | p3.SetParent("E"); 279 | p3.SetRules({grammar::Rule({"b"})}); 280 | grammar::Productions grammar = {p1, p2, p3}; 281 | 282 | auto nullables = utils::CalcNullables(grammar); 283 | auto firsts = utils::CalcFirsts(grammar, nullables); 284 | auto follows = utils::CalcFollows(grammar, firsts, nullables, "S"); 285 | 286 | std::vector terminals = {"a", "b"}; 287 | std::vector non_terminals = {"A", "S", "B"}; 288 | 289 | ParsingTable table = ParsingTable(terminals, non_terminals); 290 | table.SetFirsts(firsts); 291 | table.SetFollows(follows); 292 | table.SetProductions(grammar); 293 | table.BuildTable(); 294 | 295 | ASSERT_EQ(table.GetErrors().size(), 4); 296 | } 297 | 298 | TEST(parser, ParsingTable6) { 299 | /** 300 | * Test: Context Free Grammar 301 | * S : a A a | EPSILON 302 | * A : a b S | EPSILON 303 | * 304 | * Not LL1 grammar 305 | */ 306 | grammar::Production p1; 307 | p1.SetParent("S"); 308 | p1.SetRules({grammar::Rule({"a", "A", "a"}), grammar::Rule({grammar::EPSILON})}); 309 | grammar::Production p2; 310 | p2.SetParent("A"); 311 | p2.SetRules({grammar::Rule({"a", "b", "S"}), grammar::Rule({grammar::EPSILON})}); 312 | 313 | grammar::Productions grammar = {p1, p2}; 314 | 315 | auto nullables = utils::CalcNullables(grammar); 316 | auto firsts = utils::CalcFirsts(grammar, nullables); 317 | auto follows = utils::CalcFollows(grammar, firsts, nullables, "S"); 318 | 319 | std::vector terminals = {"a", "b"}; 320 | std::vector non_terminals = {"A", "S", "B"}; 321 | 322 | ParsingTable table = ParsingTable(terminals, non_terminals); 323 | table.SetFirsts(firsts); 324 | table.SetFollows(follows); 325 | table.SetProductions(grammar); 326 | table.BuildTable(); 327 | 328 | ASSERT_EQ(table.GetErrors().size(), 2); 329 | } 330 | 331 | TEST(parser, ParsingTable7) { 332 | /** 333 | * Test: Context Free Grammar 334 | * S : A | A 335 | * A : a 336 | * 337 | * Not LL1 grammar 338 | */ 339 | grammar::Production p1; 340 | p1.SetParent("S"); 341 | p1.SetRules({grammar::Rule({"A"}), grammar::Rule({"A"})}); 342 | grammar::Production p2; 343 | p2.SetParent("A"); 344 | p2.SetRules({grammar::Rule({"a"})}); 345 | 346 | grammar::Productions grammar = {p1, p2}; 347 | 348 | auto nullables = utils::CalcNullables(grammar); 349 | auto firsts = utils::CalcFirsts(grammar, nullables); 350 | auto follows = utils::CalcFollows(grammar, firsts, nullables, "S"); 351 | 352 | std::vector terminals = {"a"}; 353 | std::vector non_terminals = {"A", "S"}; 354 | 355 | ParsingTable table = ParsingTable(terminals, non_terminals); 356 | table.SetFirsts(firsts); 357 | table.SetFollows(follows); 358 | table.SetProductions(grammar); 359 | table.BuildTable(); 360 | 361 | ASSERT_EQ(table.GetErrors().size(), 1); 362 | } 363 | -------------------------------------------------------------------------------- /test/symbol_table/symbol_table_test.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "symbol_table/symbol_table.h" 4 | 5 | #include "gtest/gtest.h" 6 | 7 | using jucc::symbol_table::LinkedList; 8 | using jucc::symbol_table::Node; 9 | using jucc::symbol_table::SymbolTable; 10 | 11 | TEST(symbolTable, SymbolTable1) { 12 | /** 13 | * Checks an empty linked list 14 | */ 15 | LinkedList list = LinkedList(); 16 | ASSERT_EQ(true, list.IsEmpty()); 17 | } 18 | 19 | TEST(symbolTable, SymbolTable2) { 20 | /** 21 | * Checks adding of a node 22 | */ 23 | LinkedList list = LinkedList(); 24 | list.AddNewNode(std::string("x"), std::string("int"), 0); 25 | ASSERT_EQ(false, list.IsEmpty()); 26 | ASSERT_EQ(std::string("x"), list.GetHead()->identifier_); 27 | ASSERT_EQ(std::string("int"), list.GetHead()->data_type_); 28 | ASSERT_EQ(0, list.GetHead()->nesting_level_); 29 | } 30 | 31 | TEST(symbolTable, SymbolTable3) { 32 | /** 33 | * Checks the deletion of a node 34 | */ 35 | LinkedList list = LinkedList(); 36 | list.AddNewNode(std::string("x"), std::string("int"), 0); 37 | ASSERT_EQ(false, list.IsEmpty()); 38 | list.DeleteStartingNode(); 39 | ASSERT_EQ(true, list.IsEmpty()); 40 | } 41 | 42 | TEST(symbolTable, SymbolTable4) { 43 | /** 44 | * Check and add entry 45 | * Check duplicate symbol error 46 | * Check Deletion 47 | */ 48 | SymbolTable smb = SymbolTable(); 49 | Node *node = new Node("x", "int", 0); 50 | smb.CheckAndAddEntry(node, true); 51 | delete node; 52 | Node *head = smb.GetLinkedListById("x"); 53 | ASSERT_EQ(std::string("int"), head->data_type_); 54 | ASSERT_EQ(0, head->nesting_level_); 55 | 56 | Node *node1 = new Node("x", "int", 1); 57 | smb.CheckAndAddEntry(node1, true); 58 | delete node1; 59 | head = smb.GetLinkedListById("x"); 60 | ASSERT_EQ(std::string("int"), head->data_type_); 61 | ASSERT_EQ(1, head->nesting_level_); 62 | 63 | Node *node2 = new Node("x", "int", 1); 64 | smb.CheckAndAddEntry(node2, true); 65 | delete node2; 66 | 67 | std::vector dups = smb.GetDuplicateSymbols(); 68 | int sz = static_cast(dups.size()); 69 | ASSERT_EQ(sz, 1); 70 | ASSERT_EQ(true, "x" == dups[0]); 71 | 72 | smb.RemoveNodesOnScopeEnd(1); 73 | smb.RemoveNodesOnScopeEnd(0); 74 | smb.RemoveNodesOnScopeEnd(2); 75 | ASSERT_EQ(0, smb.CheckOccurrencesOfId("x")); 76 | } 77 | -------------------------------------------------------------------------------- /test/utils/trie/trie_test.cpp: -------------------------------------------------------------------------------- 1 | #include "grammar/grammar.h" 2 | #include "gtest/gtest.h" 3 | #include "utils/trie/memory_efficient_trie.h" 4 | namespace grammar = jucc::grammar; 5 | 6 | TEST(trie, Trie1) { 7 | // E -> ieStSt | a | b ieS | ieStS 8 | grammar::Production p; 9 | p.SetRules({ 10 | grammar::Rule({"i", "e", "S", "S", "t"}), 11 | grammar::Rule({"a"}), 12 | grammar::Rule({"b"}), 13 | grammar::Rule({"i", "e", "S"}), 14 | grammar::Rule({"i", "e", "S", "t", "S"}), 15 | }); 16 | 17 | jucc::utils::TrieManager tm; 18 | tm.InsertAll(p); 19 | 20 | grammar::Rule rule; 21 | int len = 1; 22 | jucc::utils::TrieManager::GreedyPreorder(tm.GetMaster(), len, rule, true); 23 | ASSERT_EQ(rule.ToString(), "ieS"); 24 | } 25 | -------------------------------------------------------------------------------- /third_party/README.md: -------------------------------------------------------------------------------- 1 | # Third Party Directory 2 | 3 | All third-party code that we need to use goes here. 4 | --------------------------------------------------------------------------------