├── .clang-format
├── .clang-tidy
├── .dockerignore
├── .github
├── ISSUE_TEMPLATE
│ └── jucc-issue-template.md
├── pull_request_template.md
└── workflows
│ └── build_tests.yml
├── .gitignore
├── CMakeLists.txt
├── Dockerfile
├── LICENSE
├── README.md
├── benchmark
├── README.md
├── input
│ ├── benchmark_grammar.g
│ └── in1.cc
├── main.cpp
└── main
│ └── jucc_benchmark.cpp
├── build_support
├── benchmark_CMakeLists.txt.in
├── clang_format_exclusions.txt
├── gtest_CMakeLists.txt.in
├── nlohmann_json_CMakeLists.txt.in
├── run_clang_format.py
├── run_clang_tidy.py
└── run_clang_tidy_extra.py
├── codecov.yml
├── docs
├── README.md
├── cpp_guidelines.md
├── cpp_guidelines_code_style.md
├── tech_clangtools.md
├── tech_docker.md
└── tech_git.md
├── grammar.g
├── script
└── installation
│ └── packages.sh
├── server
├── package.json
├── public
│ ├── LICENSE
│ ├── README.md
│ ├── Treant.css
│ ├── Treant.js
│ ├── favicon.ico
│ ├── jquery.easing.js
│ ├── jquery.min.js
│ ├── jquery.mousewheel.js
│ ├── package.json
│ ├── perfect-scrollbar.css
│ ├── perfect-scrollbar.js
│ ├── raphael.js
│ └── tree.css
├── server.js
└── views
│ └── index.ejs
├── src
├── grammar
│ └── grammar.cpp
├── include
│ ├── grammar
│ │ └── grammar.h
│ ├── lexer
│ │ └── lexer.h
│ ├── main
│ │ └── jucc.h
│ ├── parser
│ │ ├── parser.h
│ │ └── parsing_table.h
│ ├── symbol_table
│ │ └── symbol_table.h
│ └── utils
│ │ ├── first_follow.h
│ │ ├── left_factoring.h
│ │ ├── left_recursion.h
│ │ ├── trie
│ │ └── memory_efficient_trie.h
│ │ └── utils.h
├── lexer
│ └── lexer.cpp
├── main
│ ├── jucc.cpp
│ └── main.cpp
├── parser
│ ├── parser.cpp
│ └── parsing_table.cpp
├── symbol_table
│ └── symbol_table.cpp
└── utils
│ ├── first_follow.cpp
│ ├── left_factoring.cpp
│ ├── left_recursion.cpp
│ ├── trie
│ └── memory_efficient_trie.cpp
│ └── utils.cpp
├── test
├── README.md
├── grammar
│ ├── grammar_test.cpp
│ ├── grammar_test_0.g
│ ├── grammar_test_1.g
│ ├── grammar_test_10.g
│ ├── grammar_test_11.g
│ ├── grammar_test_12.g
│ ├── grammar_test_13.g
│ ├── grammar_test_14.g
│ ├── grammar_test_15.g
│ ├── grammar_test_16.g
│ ├── grammar_test_17.g
│ ├── grammar_test_18.g
│ ├── grammar_test_2.g
│ ├── grammar_test_3.g
│ ├── grammar_test_4.g
│ ├── grammar_test_5.g
│ ├── grammar_test_6.g
│ ├── grammar_test_7.g
│ ├── grammar_test_8.g
│ └── grammar_test_9.g
├── lexer
│ ├── arithmetic.txt
│ ├── comments.txt
│ ├── input.txt
│ ├── input2.txt
│ ├── input3.txt
│ ├── input_err1.txt
│ ├── input_err2.txt
│ ├── lexer_test.cpp
│ └── scope_error.txt
├── main.cpp
├── main
│ └── jucc_test.cpp
├── parser
│ ├── grammar.g
│ ├── parser_test.cpp
│ └── parsing_table_test.cpp
├── symbol_table
│ └── symbol_table_test.cpp
└── utils
│ ├── trie
│ └── trie_test.cpp
│ └── utils_test.cpp
└── third_party
└── README.md
/.clang-format:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | #
18 | #
19 | BasedOnStyle: Google
20 | DerivePointerAlignment: false
21 | PointerAlignment: Right
22 | ColumnLimit: 120
23 |
--------------------------------------------------------------------------------
/.clang-tidy:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied. See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | #
18 | ---
19 | Checks: '
20 | bugprone-*,
21 | clang-analyzer-*,
22 | google-*,
23 | modernize-*,
24 | performance-*,
25 | portability-*,
26 | readability-*,
27 | -modernize-avoid-c-arrays,
28 | -modernize-use-trailing-return-type,
29 | -readability-magic-numbers,
30 | '
31 | CheckOptions:
32 | - { key: readability-identifier-naming.ClassCase, value: CamelCase }
33 | - { key: readability-identifier-naming.EnumCase, value: CamelCase }
34 | - { key: readability-identifier-naming.FunctionCase, value: CamelCase }
35 | - { key: readability-identifier-naming.GlobalConstantCase, value: UPPER_CASE }
36 | - { key: readability-identifier-naming.MemberCase, value: lower_case }
37 | - { key: readability-identifier-naming.MemberSuffix, value: _ }
38 | - { key: readability-identifier-naming.NamespaceCase, value: lower_case }
39 | - { key: readability-identifier-naming.StructCase, value: CamelCase }
40 | - { key: readability-identifier-naming.UnionCase, value: CamelCase }
41 | - { key: readability-identifier-naming.VariableCase, value: lower_case }
42 |
43 | # Turn all the warnings from the checks above into errors.
44 | WarningsAsErrors: '*'
45 | HeaderFilterRegex: '(benchmark|src|test|util)/include'
46 | AnalyzeTemporaryDtors: true
47 |
48 | #### Disabled checks and why: #####
49 | #
50 | # -modernize-avoid-c-arrays,
51 | # Not feasible in the storage layer
52 | # -modernize-use-trailing-return-type
53 | # gtest issues
54 | # -readability-magic-numbers,
55 | # Blows up in tests.
56 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Don't send any build context to Docker.
2 |
3 | # additional
4 | .gitignore
5 | Dockerfile
6 | LICENSE
7 | README.md
8 |
9 | # build dir
10 | build
11 | cmake-build-debug/
12 |
13 | # configs
14 | .idea
15 | .vscode
16 |
17 | # pycache
18 | __pycache__
19 | *.pyc
20 |
21 | # Prerequisites
22 | *.d
23 |
24 | # Compiled Object files
25 | *.slo
26 | *.lo
27 | *.o
28 | *.obj
29 |
30 | # Precompiled Headers
31 | *.gch
32 | *.pch
33 |
34 | # Compiled Dynamic libraries
35 | *.so
36 | *.dylib
37 | *.dll
38 |
39 | # Fortran module files
40 | *.mod
41 | *.smod
42 |
43 | # Compiled Static libraries
44 | *.lai
45 | *.la
46 | *.a
47 | *.lib
48 |
49 | # Executables
50 | *.exe
51 | *.out
52 | *.app
53 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/jucc-issue-template.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: JuCC Issue Template
3 | about: Please follow this template when creating an issue for JuCC!
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | Welcome to the issue tracker for **JuCC**! We're excited that you're interested in improving our system. Below, please **choose either a Feature Request or Bug Report** and replace the sample text below to describe the issue! Additionally, please choose the appropriate labels on the Github panel. If you wish to and are able to solve the issue, feel free to assign yourself; otherwise we will manage this later!
11 |
12 | # Feature Request
13 | ## Summary
14 | Please provide a short summary of the feature you would like implemented.
15 |
16 | ## Solution
17 | If possible, include a description of the desired solution you have in mind. Ideally, a series of steps outlining what is required and a plan to implement would be the most helpful for our developers!
18 |
19 | Feel free to suggest more or raise concerns with the existing items. When we're convinced of the list, we should make an overall project with issues for each item in the list to spread out the tasks and track progress.
20 |
21 | ### Alternatives
22 | If you are aware of any alternatives to the solution you presented, please describe them here!
23 |
24 | ___
25 |
26 | # Bug Report
27 | **Note**: Before filing a bug report, please make sure to check whether the bug has already been filed. If it has, please do not re-file the report, our developers are already hard at work fixing it!
28 |
29 | ## Summary
30 | Please provide a short summary of the bug observed.
31 |
32 | ## Environment
33 | To address the bug, especially if it environment specific, we need to know what kind of configuration you are running on. Please include the following:
34 |
35 | **OS**: Ubuntu (LTS) 20.04 or macOS 10.14+ (please specify version).
36 |
37 | **Compiler**: GCC 7.0+ or Clang 8.0+.
38 |
39 | **CMake Profile**: `Debug`, `Release`, `FastDebug`, or `Relwithdebinfo`. If exists across all cmake profiles on a platform-compiler combo, then please say so.
40 |
41 | **CI**: If the bug has been observed in CI, please link to the CI build so that the bug can be referenced. Please make sure that if the issue has appeared on a PR branch that the PR itself is not causing the issue.
42 |
43 | ## Steps to Reproduce
44 | Whenever possible, retrace how you came about the problem and describe so here.
45 |
46 | If you have an understanding of why the bug occurred, that would be awesome to include as well! In this case, be as descriptive as you can and provide references to lines of code in the code whenever possible!
47 |
48 | ### Expected Behavior
49 | Fill in the behavior expected from our system, as well as the reference you used.
50 |
51 | ### Actual Behavior
52 | Fill in the behavior you actually observed for our system.
53 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | Welcome to the PR tracker for **JuCC**! We're excited that you're interested in improving our system.
2 |
3 | **PRs that do not follow our guidelines** will be immediately closed. In general, you should avoid creating a PR until you are reasonably confident the tests should pass, having tested locally first.
4 |
5 | Please choose the appropriate labels on the Github panel and feel free to assign yourself. Additionally, if your PR solves an open issue, please link the issue on the Github panel. Feel free to assign reviewers to your PR or we will decide who best to assign for a review.
6 |
7 | # Heading
8 | Please choose an appropriate heading for your PR, relevant to the changes you have made.
9 |
10 | ## Description
11 | Please create a description of the issue your PR solves, and how you went about implementing your solution.
12 |
13 | ### Remaining Tasks
14 | Again, you should only create PR once you are reasonably confident you are near completion. However, if there are some tasks still remaining before the PR is ready to merge, please create a checklist to track active progress.
15 | :pushpin: TODOs:
16 | - [x] ~~Stash limit in Parser for pushdown (INVALID)~~
17 | - [x] Add optional property support
18 | - [ ] Fix memory leaks
19 |
20 | ## Further Work
21 | If your PR unlocked the potential for further improvement, please note them here and create additional issues! Do the same if you discovered bugs in the process of development.
22 |
23 | ---
24 | Here's an empty template to format yourself!
25 | # Heading
26 |
27 | ## Description
28 |
29 | ## Remaining tasks
30 |
31 | - [ ] Foo
32 | - [ ] Bar
33 | - [ ] Baz
34 |
35 | ## Further work
36 |
--------------------------------------------------------------------------------
/.github/workflows/build_tests.yml:
--------------------------------------------------------------------------------
1 | name: Build Test
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | linux-build:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 |
18 | - name: setup environment
19 | run: |
20 | export DEBIAN_FRONTEND="noninteractive"
21 | mkdir build
22 | sudo apt-get -y update
23 |
24 | - name: installing dependecies
25 | run: |
26 | echo y | sudo ./script/installation/packages.sh all
27 |
28 | - name: build
29 | working-directory: ./build
30 | run: |
31 | cmake -GNinja ..
32 | ninja -j2
33 |
34 | - name: check linting & formatting
35 | working-directory: ./build
36 | run: |
37 | ninja check-format
38 | ninja check-lint
39 | ninja check-clang-tidy
40 |
41 | - name: run unit-tests
42 | working-directory: ./build
43 | run: |
44 | ninja test
45 |
46 | - name: codecov report
47 | working-directory: ./build
48 | run: |
49 | bash <(curl -s https://codecov.io/bash)
50 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # build dir
2 | build
3 | cmake-build-debug/
4 | node_modules/
5 | .DS_Store
6 | package-lock.json
7 |
8 | # configs
9 | .idea
10 | .vscode
11 |
12 | # pycache
13 | __pycache__
14 | *.pyc
15 |
16 | # Prerequisites
17 | *.d
18 |
19 | # Compiled Object files
20 | *.slo
21 | *.lo
22 | *.o
23 | *.obj
24 |
25 | # Precompiled Headers
26 | *.gch
27 | *.pch
28 |
29 | # Compiled Dynamic libraries
30 | *.so
31 | *.dylib
32 | *.dll
33 |
34 | # Fortran module files
35 | *.mod
36 | *.smod
37 |
38 | # Compiled Static libraries
39 | *.lai
40 | *.la
41 | *.a
42 | *.lib
43 |
44 | # Executables
45 | *.exe
46 | *.out
47 | *.app
48 |
49 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:20.04
2 | CMD bash
3 |
4 | # Install Ubuntu packages.
5 | # Please add packages in alphabetical order.
6 | ARG DEBIAN_FRONTEND=noninteractive
7 |
8 | RUN apt-get -y update
9 | RUN apt-get -y install sudo
10 | COPY . /JuCC
11 | WORKDIR /JuCC
12 |
13 | RUN echo y | ./script/installation/packages.sh all
14 | RUN mkdir build
15 | WORKDIR /JuCC/build
16 | RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release ..
17 |
18 | WORKDIR /JuCC
19 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # JuCC
2 |
3 |
4 |
5 | -----------------
6 |
7 | 
8 | [](https://codecov.io/gh/TheSYNcoder/JuCC)
9 |
10 | This is the official Jadavpur University Compiler Compiler repository.
11 |
12 | ## Key Features
13 | * Supports a subset of the C language for now.
14 | * Custom grammar files to easily extend the language.
15 | * LL(1) parsing with panic mode error recovery.
16 | * Generates .json parse tree outputs for easy visualization with [Treant.js](https://fperucic.github.io/treant-js/).
17 | * 100% Open Source (Apache-2.0 License)
18 |
19 | ## Quickstart
20 | The JuCC project is built and tested on **Ubuntu 20.04**.
21 |
22 | ```
23 | $ git clone https://github.com/TheSYNcoder/JuCC
24 | $ cd JuCC
25 | $ sudo ./script/installation/packages.sh
26 | $ cd server
27 | $ npm i
28 | $ cd ..
29 | $ mkdir build
30 | $ cd build
31 | $ cmake -GNinja -DCMAKE_BUILD_TYPE=Release ..
32 | $ ninja jucc
33 | $ ./bin/jucc -g -f -o
34 | ```
35 |
36 | To run the unit tests provided,
37 |
38 | ```
39 | $ mkdir build
40 | $ cd build
41 | $ cmake -GNinja -DCMAKE_BUILD_TYPE=Release ..
42 | $ ninja
43 | $ ./bin/jucc_test
44 | ```
45 |
46 | To run the benchmarks,
47 | Note: `-DCMAKE_BUILD_TYPE=Release` is needed
48 |
49 | ```
50 | $ mkdir build
51 | $ cd build
52 | $ cmake -GNinja -DCMAKE_BUILD_TYPE=Release ..
53 | $ ninja
54 | $ ./bin/jucc_benchmark
55 | ```
56 |
57 | Before pushing or making a pull request ( The tests must pass, compulsory !! )
58 |
59 | ```
60 | $ ninja
61 | $ ninja check-format
62 | $ ninja check-clang-tidy
63 | $ ninja check-lint
64 | $ ninja test
65 | ```
66 |
67 | To add a new unit test, make a folder with the same relative path as in the src folder, and define your test. Please refer to [docs](https://github.com/TheSYNcoder/JuCC/tree/main/docs/) for more details about writing tests using the [googletest](https://github.com/google/googletest) framework.
68 |
69 |
70 | Additional Notes:
71 | - If you know what you're doing, install the prerequisite packages from `./script/installation/packages.sh` manually.
72 |
73 |
74 | ## For Developers
75 |
76 | Please see the [docs](https://github.com/TheSYNcoder/JuCC/tree/main/docs/).
77 |
78 | ## Contributing
79 |
80 | Contributions from everyone are welcome!
81 |
--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
1 | # Benchmarks Directory
2 |
3 | These are the microbenchmarks for the compiler. We use the [benchmark](https://github.com/google/benchmark) framework.
4 |
--------------------------------------------------------------------------------
/benchmark/input/benchmark_grammar.g:
--------------------------------------------------------------------------------
1 | ## This is the grammar file for JuCC
2 | ## Edit this file to make changes to the parsing grammar
3 | ## Epsilon is represented by special string EPSILON
4 |
5 | ## Terminals
6 | %terminals
7 | else float if int void
8 | ( ) { } * + - / % ,
9 | << >> < > <= >= = == != ;
10 | identifier integer_constant float_constant
11 | main cin cout
12 | %
13 |
14 | ## Non Terminals
15 | %non_terminals
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | %
25 |
26 | ## Start Symbol
27 | %start
28 |
29 | %
30 |
31 | ## Grammar for the language
32 | %rules
33 | ## Expressions
34 | : identifier
35 | :
36 | : ( )
37 | : integer_constant
38 | : float_constant
39 | : +
40 | : -
41 | :
42 | :
43 | :
44 | : *
45 | : /
46 | : %
47 | :
48 | : +
49 | : -
50 | :
51 | : cin >>
52 | : cout <<
53 | : <<
54 | : >>
55 | :
56 | : <
57 | : >
58 | : <=
59 | : >=
60 | :
61 | : ==
62 | : !=
63 | :
64 | :
65 |
66 | ## Declarations
67 | : ;
68 | :
69 | : ,
70 | : EPSILON
71 | :
72 | : =
73 | : void
74 | : int
75 | : float
76 | :
77 | : identifier
78 | : ( )
79 | :
80 |
81 | ## Statements
82 | :
83 | :
84 | :
85 | : { }
86 | :
87 | :
88 | : EPSILON
89 | :
90 | :
91 | : ;
92 | : ;
93 | : if ( )
94 | : if ( ) else
95 |
96 | ## Main
97 | : main ( )
98 | %
99 |
--------------------------------------------------------------------------------
/benchmark/input/in1.cc:
--------------------------------------------------------------------------------
1 | // input file for benchmarking
2 |
3 | int main() {
4 | int x, y;
5 | cin >> x >> y;
6 | if (x != 0) {
7 | if (y > 0) {
8 | cout << y;
9 | } else {
10 | cout << -y;
11 | }
12 | }
13 | float z = 1 + 2 + 3 + 1000/ 50 * 23.2 * (x * y * 10);
14 | cout << x << y << z;
15 |
16 | // other stuff for benchmarking
17 | float z0 = 1 + 2 + 3 + 1000/ 50 * 23.2 * (x * y * 10);
18 | float z1 = 1 + 2 - 3 + 1000/ 50 * 23.2 * (x * y * 10);
19 | float z2 = 1 + 2 / 3 + 1000/ 50 * 23.2 * (x * y * 10);
20 | float z3 = 1 + 2 * 3 + 1000/ 50 * 23.2 * (x * y * 10);
21 | float z4 = 1 + 2 % 3 + 1000/ 50 * 23.2 * (x * y * 10);
22 | float z5 = 1 + 2 > 3 + 1000/ 50 * 23.2 * (x * y * 10);
23 | float z6 = 1 + 2 == 3 + 1000/ 50 * 23.2 * (x * y * 10);
24 | float z7 = 1 + 2 != 3 + 1000/ 50 * 23.2 * (x * y * 10);
25 | float z8 = 1 + 2 >= 3 + 1000/ 50 * 23.2 * (x * y * 10);
26 | float z9 = 1 + 2 <= 3 + 1000/ 50 * 23.2 * (x * y * 10);
27 | cout << z0 << z1 << z2 << z3 << z4;
28 | cout << z5 << z6 << z7 << z8 << z9;
29 | }
30 |
--------------------------------------------------------------------------------
/benchmark/main.cpp:
--------------------------------------------------------------------------------
1 | #include "benchmark/benchmark.h"
2 |
3 | BENCHMARK_MAIN();
4 |
--------------------------------------------------------------------------------
/benchmark/main/jucc_benchmark.cpp:
--------------------------------------------------------------------------------
1 | #include "benchmark/benchmark.h"
2 | #include "main/jucc.h"
3 |
4 | // NOLINTNEXTLINE
5 | static void BenchmarkRadd(benchmark::State &state) {
6 | for (const auto &_ : state) {
7 | jucc::Radd(0, 1000000);
8 | }
9 | }
10 |
11 | BENCHMARK(BenchmarkRadd);
12 |
13 | // NOLINTNEXTLINE
14 | static void BenchmarkJuCC(benchmark::State &state) {
15 | std::string file_grammar = "../benchmark/input/benchmark_grammar.g";
16 | std::string file_input = "../benchmark/input/in1.cc";
17 | for (const auto &_ : state) {
18 | jucc::grammar::Parser grammar_parser = jucc::grammar::Parser(file_grammar.c_str());
19 | grammar_parser.Parse();
20 | jucc::grammar::Productions raw_productions = grammar_parser.GetProductions();
21 | jucc::grammar::Productions productions = jucc::utils::RemoveAllPossibleAmbiguity(raw_productions);
22 | auto nullables = jucc::utils::CalcNullables(productions);
23 | auto firsts = jucc::utils::CalcFirsts(productions, nullables);
24 | auto follows = jucc::utils::CalcFollows(productions, firsts, nullables, grammar_parser.GetStartSymbol());
25 | auto terminals = grammar_parser.GetTerminals();
26 | auto non_terminals = jucc::utils::GetAllNonTerminals(productions);
27 | jucc::parser::ParsingTable parsing_table = jucc::parser::ParsingTable(terminals, non_terminals);
28 | parsing_table.SetFirsts(firsts);
29 | parsing_table.SetFollows(follows);
30 | parsing_table.SetProductions(productions);
31 | parsing_table.BuildTable();
32 |
33 | std::ifstream ifs(file_input);
34 | std::vector input_tokens;
35 | jucc::lexer::Lexer lexer = jucc::lexer::Lexer();
36 | int token;
37 | while ((token = lexer.GetToken(ifs)) != jucc::lexer::TOK_EOF) {
38 | std::string ret_string = jucc::lexer::Lexer::GetTokenType(token);
39 | if (ret_string == "ignore") {
40 | continue;
41 | }
42 | input_tokens.emplace_back(ret_string);
43 | }
44 |
45 | jucc::parser::Parser parser = jucc::parser::Parser();
46 | parser.SetInputString(input_tokens);
47 | parser.SetStartSymbol(grammar_parser.GetStartSymbol());
48 | parser.SetParsingTable(parsing_table);
49 | while (!parser.IsComplete()) {
50 | parser.ParseNextStep();
51 | }
52 | parser.BuildParseTree();
53 | }
54 | }
55 |
56 | BENCHMARK(BenchmarkJuCC);
57 |
58 | /*
59 | * Same as BenchmarkJuCC but without disk operations
60 | */
61 | // NOLINTNEXTLINE
62 | static void BenchmarkJuCCCompute(benchmark::State &state) {
63 | std::string file_grammar = "../benchmark/input/benchmark_grammar.g";
64 | std::string file_input = "../benchmark/input/in1.cc";
65 |
66 | jucc::grammar::Parser grammar_parser = jucc::grammar::Parser(file_grammar.c_str());
67 | grammar_parser.Parse();
68 |
69 | std::ifstream ifs(file_input);
70 | std::vector input_tokens;
71 | jucc::lexer::Lexer lexer = jucc::lexer::Lexer();
72 | int token;
73 | while ((token = lexer.GetToken(ifs)) != jucc::lexer::TOK_EOF) {
74 | std::string ret_string = jucc::lexer::Lexer::GetTokenType(token);
75 | if (ret_string == "ignore") {
76 | continue;
77 | }
78 | input_tokens.emplace_back(ret_string);
79 | }
80 |
81 | for (const auto &_ : state) {
82 | jucc::grammar::Productions raw_productions = grammar_parser.GetProductions();
83 | jucc::grammar::Productions productions = jucc::utils::RemoveAllPossibleAmbiguity(raw_productions);
84 | auto nullables = jucc::utils::CalcNullables(productions);
85 | auto firsts = jucc::utils::CalcFirsts(productions, nullables);
86 | auto follows = jucc::utils::CalcFollows(productions, firsts, nullables, grammar_parser.GetStartSymbol());
87 | auto terminals = grammar_parser.GetTerminals();
88 | auto non_terminals = jucc::utils::GetAllNonTerminals(productions);
89 | jucc::parser::ParsingTable parsing_table = jucc::parser::ParsingTable(terminals, non_terminals);
90 | parsing_table.SetFirsts(firsts);
91 | parsing_table.SetFollows(follows);
92 | parsing_table.SetProductions(productions);
93 | parsing_table.BuildTable();
94 |
95 | jucc::parser::Parser parser = jucc::parser::Parser();
96 | parser.SetInputString(input_tokens);
97 | parser.SetStartSymbol(grammar_parser.GetStartSymbol());
98 | parser.SetParsingTable(parsing_table);
99 | while (!parser.IsComplete()) {
100 | parser.ParseNextStep();
101 | }
102 | parser.BuildParseTree();
103 | }
104 | }
105 |
106 | BENCHMARK(BenchmarkJuCCCompute);
107 |
--------------------------------------------------------------------------------
/build_support/benchmark_CMakeLists.txt.in:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.16)
2 |
3 | project(benchmark-download NONE)
4 |
5 | include(ExternalProject)
6 | ExternalProject_Add(benchmark
7 | GIT_REPOSITORY https://github.com/google/benchmark.git
8 | GIT_TAG master
9 | SOURCE_DIR "${CMAKE_BINARY_DIR}/benchmark-src"
10 | BINARY_DIR "${CMAKE_BINARY_DIR}/benchmark-build"
11 | CONFIGURE_COMMAND ""
12 | BUILD_COMMAND ""
13 | INSTALL_COMMAND ""
14 | TEST_COMMAND ""
15 | )
16 |
--------------------------------------------------------------------------------
/build_support/clang_format_exclusions.txt:
--------------------------------------------------------------------------------
1 | *third_party*
2 |
--------------------------------------------------------------------------------
/build_support/gtest_CMakeLists.txt.in:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.16)
2 |
3 | project(googletest-download NONE)
4 |
5 | include(ExternalProject)
6 | ExternalProject_Add(googletest
7 | GIT_REPOSITORY https://github.com/google/googletest.git
8 | GIT_TAG master
9 | SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src"
10 | BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build"
11 | CONFIGURE_COMMAND ""
12 | BUILD_COMMAND ""
13 | INSTALL_COMMAND ""
14 | TEST_COMMAND ""
15 | )
16 |
--------------------------------------------------------------------------------
/build_support/nlohmann_json_CMakeLists.txt.in:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.16)
2 |
3 | project(nlohmann-json-download NONE)
4 |
5 | include(ExternalProject)
6 | ExternalProject_Add(nlohmann_json
7 | GIT_REPOSITORY https://github.com/nlohmann/json.git
8 | GIT_TAG develop
9 | SOURCE_DIR "${CMAKE_BINARY_DIR}/nlohmann-json-src"
10 | BINARY_DIR "${CMAKE_BINARY_DIR}/nlohmann-json-build"
11 | CONFIGURE_COMMAND ""
12 | BUILD_COMMAND ""
13 | INSTALL_COMMAND ""
14 | TEST_COMMAND ""
15 | )
16 |
--------------------------------------------------------------------------------
/build_support/run_clang_format.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing,
13 | # software distributed under the License is distributed on an
14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | # KIND, either express or implied. See the License for the
16 | # specific language governing permissions and limitations
17 | # under the License.
18 | #
19 | # Modified from the Apache Arrow project for the Terrier project.
20 | #
21 | # Modified from the CMU-DB NoisePage project for the BiscuitDB project.
22 | #
23 | # Taken from JUDB Group BiscuitDB project.
24 |
25 |
26 | import argparse
27 | import codecs
28 | import difflib
29 | import fnmatch
30 | import os
31 | import subprocess
32 | import sys
33 |
34 |
35 | def check(arguments, source_dir):
36 | formatted_filenames = []
37 | error = False
38 | for directory, subdirs, filenames in os.walk(source_dir):
39 | fullpaths = (os.path.join(directory, filename)
40 | for filename in filenames)
41 | source_files = [x for x in fullpaths
42 | if x.endswith(".h") or x.endswith(".cpp")]
43 | formatted_filenames.extend(
44 | # Filter out files that match the globs in the globs file
45 | [filename for filename in source_files
46 | if not any((fnmatch.fnmatch(filename, exclude_glob)
47 | for exclude_glob in exclude_globs))])
48 |
49 | if arguments.fix:
50 | if not arguments.quiet:
51 | # Print out each file on its own line, but run
52 | # clang format once for all of the files
53 | print("\n".join(map(lambda x: "Formatting {}".format(x),
54 | formatted_filenames)))
55 | subprocess.check_call([arguments.clang_format_binary,
56 | "-i"] + formatted_filenames)
57 | else:
58 | for filename in formatted_filenames:
59 | if not arguments.quiet:
60 | print("Checking {}".format(filename))
61 | #
62 | # Due to some incompatibilities between Python 2 and
63 | # Python 3, there are some specific actions we take here
64 | # to make sure the difflib.unified_diff call works.
65 | #
66 | # In Python 2, the call to subprocess.check_output return
67 | # a 'str' type. In Python 3, however, the call returns a
68 | # 'bytes' type unless the 'encoding' argument is
69 | # specified. Unfortunately, the 'encoding' argument is not
70 | # in the Python 2 API. We could do an if/else here based
71 | # on the version of Python we are running, but it's more
72 | # straightforward to read the file in binary and do utf-8
73 | # conversion. In Python 2, it's just converting string
74 | # types to unicode types, whereas in Python 3 it's
75 | # converting bytes types to utf-8 encoded str types. This
76 | # approach ensures that the arguments to
77 | # difflib.unified_diff are acceptable string types in both
78 | # Python 2 and Python 3.
79 | with open(filename, "rb") as reader:
80 | # Run clang-format and capture its output
81 | formatted = subprocess.check_output(
82 | [arguments.clang_format_binary,
83 | filename])
84 | formatted = codecs.decode(formatted, "utf-8")
85 | # Read the original file
86 | original = codecs.decode(reader.read(), "utf-8")
87 | # Run the equivalent of diff -u
88 | diff = list(difflib.unified_diff(
89 | original.splitlines(True),
90 | formatted.splitlines(True),
91 | fromfile=filename,
92 | tofile="{} (after clang format)".format(
93 | filename)))
94 | if diff:
95 | print("{} had clang-format style issues".format(filename))
96 | # Print out the diff to stderr
97 | error = True
98 | sys.stderr.writelines(diff)
99 |
100 | return error
101 |
102 |
103 | if __name__ == "__main__":
104 | parser = argparse.ArgumentParser(
105 | description="Runs clang format on all of the source "
106 | "files. If --fix is specified, and compares the output "
107 | "with the existing file, outputting a unifiied diff if "
108 | "there are any necessary changes")
109 | parser.add_argument("clang_format_binary",
110 | help="Path to the clang-format binary")
111 | parser.add_argument("exclude_globs",
112 | help="Filename containing globs for files "
113 | "that should be excluded from the checks")
114 | parser.add_argument("--source_dirs",
115 | help="Comma-separated root directories of the code")
116 | parser.add_argument("--fix", default=False,
117 | action="store_true",
118 | help="If specified, will re-format the source "
119 | "code instead of comparing the re-formatted "
120 | "output, defaults to %(default)s")
121 | parser.add_argument("--quiet", default=False,
122 | action="store_true",
123 | help="If specified, only print errors")
124 |
125 | args = parser.parse_args()
126 |
127 | had_err = False
128 | exclude_globs = [line.strip() for line in open(args.exclude_globs)]
129 | for source_dir in args.source_dirs.split(','):
130 | if len(source_dir) > 0:
131 | had_err = had_err or check(args, source_dir)
132 |
133 | sys.exit(1 if had_err else 0)
134 |
--------------------------------------------------------------------------------
/build_support/run_clang_tidy_extra.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Taken from JUDB Group BiscuitDB project.
4 |
5 | """
6 | A helper class, to suppress execution of clang-tidy.
7 |
8 | In clang-tidy-6.0, if the clang-tidy configuration file suppresses ALL checks,
9 | (e.g. via a .clang-tidy file), clang-tidy will print usage information and
10 | exit with a return code of 0. Harmless but verbose. In later versions of
11 | clang-tidy the return code becomes 1, making this a bigger problem.
12 |
13 | This helper addresses the problem by suppressing execution according to
14 | the configuration in this file.
15 | """
16 |
17 | import re
18 |
19 | class CheckConfig(object):
20 | """ Check paths against the built-in config """
21 |
22 | def __init__(self):
23 | self._init_config()
24 | # debug prints
25 | self.debug = False
26 | return
27 |
28 | def _init_config(self):
29 | """ Any path matching one of the ignore_pats regular expressions,
30 | denotes that we do NOT want to run clang-tidy on that item.
31 | """
32 | self.ignore_pats = [".*/third_party/.*", ]
33 | return
34 |
35 | def should_skip(self, path):
36 | """ Should execution of clang-tidy be skipped?
37 | path - to check, against the configuration.
38 | Typically the full path.
39 | returns - False if we want to run clang-tidy
40 | True of we want to skip execution on this item
41 | """
42 | for pat in self.ignore_pats:
43 | if re.match(pat, path):
44 | if self.debug:
45 | print("match pat: {}, {} => don't run".format(pat, path))
46 | return True
47 | return False
48 |
49 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | codecov:
2 | require_ci_to_pass: yes
3 |
4 | coverage:
5 | precision: 2
6 | round: down
7 | range: "85...95" # coverage lower than 85 is red, higher than 95 green, between color code no change.
8 | status:
9 | project:
10 | default:
11 | target: auto
12 | threshold: 5.0 # allow for 5% reduction of coverage without failing
13 |
14 | # do not run coverage on patch nor changes. Check overall quality.
15 | patch: off
16 |
17 | parsers:
18 | gcov:
19 | branch_detection:
20 | conditional: yes
21 | loop: yes
22 | method: no
23 | macro: no
24 |
25 | comment:
26 | layout: "reach, diff, flags, files, footer"
27 | behavior: default
28 | require_changes: no
29 |
30 | ignore:
31 | # ignore the third_party directory
32 | - "third_party"
33 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # JuCC Developer Docs
2 |
3 | **Table of Contents**
4 |
5 | - [Getting Started](#getting-started)
6 | - [Development](#development)
7 |
8 | ## Getting Started
9 |
10 | Hi! Welcome to JuCC.
11 |
12 | ### System setup
13 |
14 | 1. **GitHub** We use GitHub for all our development.
15 | - **Account** [Sign up](https://github.com/join) for a GitHub account.
16 | 2. **OS** Make sure you are running [Ubuntu 20.04](https://releases.ubuntu.com/20.04/). If not, the recommended approach is to dual boot or to use a VM.
17 | 3. **IDE** We recommend [Visual Studio Code](https://code.visualstudio.com/download).
18 | 4. **Packages** Install using
19 | - Go to the folder: `cd ~/jucc/script/installation`
20 | - Install all the necessary packages: `sudo bash ./packages.sh`
21 |
22 | ### Further reading
23 |
24 | You should learn a little about the following:
25 |
26 | 1. [git](https://github.com/TheSYNcoder/JuCC/tree/main/docs/tech_git.md)
27 | 2. [C++ and how we use it](https://github.com/TheSYNcoder/JuCC/tree/main/docs/cpp_guidelines.md)
28 |
29 | ## Configuration
30 |
31 | ### CMake flags to know
32 |
33 | - We try our best to list all available options in [CMakeLists.txt](https://github.com/TheSYNcoder/JuCC/blob/main/CMakeLists.txt). Search for `# HEADER CMake options and global variables.`
34 |
35 | ### CMake targets to know
36 |
37 | - You should know these targets.
38 | - `jucc`: Building will build the `jucc` binary and all its dependencies.
39 | - `jucc_benchmark`: Building will build and link the `jucc` object file to the `benchmark` library. Running will run the benchmarks.
40 | - `test`: Building will run all unit tests using `gtest`. This will not show specifics of failed test, run `build/bin/jucc_test` for detailed info.
41 | - `format`: Building will run the formatter `clang-format` on the codebase with our rules. Use this every time right before you commit and right before you make a pull request!
42 | - `check-format`: Building will check if the codebase is correctly formatted according to `clang-format` with our rules.
43 | - `check-clang-tidy`: Building will check if the codebase passes the `clang-tidy` static analyzer tests with our rules.
44 | - `check-lint`: Building will check if the codebase passes the `build-support/cpplint.py` checks.
45 |
46 | If you run into issues, you may need your default `python` to point to a `python3` install. For example, add this to your `~/.zshrc`: `alias python=python3`
47 |
48 | ## Development
49 |
50 | ### Workflow
51 |
52 | 1. Check out the latest version of the JuCC repository.
53 | - `git checkout main`
54 | - `git pull upstream main`
55 | 2. Create a new branch.
56 | - `git checkout -b my_new_branch`
57 | 3. Work on your code. Add features, add documentation, add tests, add remove bugs, and so on.
58 | 4. Push your code.
59 | - Make sure you run tests locally! See below.
60 | - `git push -u origin my_new_branch`
61 | 5. Go to GitHub and open a [new pull request](https://github.com/TheSYNcoder/JuCC/compare).
62 | 6. When a pull request is opened, this triggers our Continuous Integration environment on circle-ci.
63 | - CI will clone the repo, apply your changes, and make sure that formatting, linting, tests, etc pass.
64 | - Code has to pass all the checks for it to be merged!
65 |
66 | ### Running tests locally
67 |
68 | Use `make test` or `ninja test` to run inside your build folder to run all tests.
69 |
70 | For detailed test info:
71 | 1. Go to the folder: `cd ~/JuCC/build`
72 | 2. Generate optimized config with `cmake -GNinja .. -DCMAKE_BUILD_TYPE=Release`
73 | 3. Build project with `ninja`
74 | 4. Run `./bin/jucc_test`
75 |
76 | ### Benchmarks
77 |
78 | 1. Go to the folder: `cd ~/JuCC/build`
79 | 2. Generate optimized config with `cmake -GNinja .. -DCMAKE_BUILD_TYPE=Release`
80 | 3. Build project with `ninja`
81 | 4. Run `./bin/jucc_benchmark [...options]`
82 |
--------------------------------------------------------------------------------
/docs/cpp_guidelines.md:
--------------------------------------------------------------------------------
1 | # C++ Guidelines
2 |
3 | **Table of Contents**
4 |
5 | - [Language](#language)
6 | - [C++ Crash Course](#c++-crash-course)
7 | - [C++ Project Structure](#c++-project-structure)
8 | - [C++ Code Style](#c++-code-style)
9 | - [Compiler](#compiler)
10 | - [Debugging](#debugging)
11 | - [Testing](#testing)
12 | - [Documentation](#documentation)
13 |
14 | ## Language
15 |
16 | JuCC is developed in `C++17`.
17 |
18 | ### C++ Crash Course
19 |
20 | C++ provides a lot of leeway in compiler development compared to other high-level languages. For instance, it supports both manual and automated memory management, varied styles of programming, stronger type checking, different kinds of polymorphism etc.
21 |
22 | Here's a list of useful references :
23 |
24 | * [cppreference](http://en.cppreference.com/w/cpp) is an online reference of the powerful `Standard Template Library` (STL).
25 | * [C++ FAQ](https://isocpp.org/faq) covers a lots of topics.
26 |
27 | Here's a list of modern features that you might want to make use of:
28 |
29 | * `auto` type inference
30 | * Range-based `for` loops
31 | * Smart pointers, in particular `unique_ptr`.
32 | * STL data structures, such as `unordered_set`, `unordered_map`, etc.
33 | * Threads, deleted member functions, lambdas, etc.
34 | * `static_assert` and/or `type_traits` such as `std::enable_if`, `std::is_same`.
35 |
36 | ### C++ Project Structure
37 |
38 | #### Directory Structure
39 |
40 | Organize source code files into relevant folders based on their functionality. Separate binary files from source files, and production code from testing code.
41 |
42 | **Code directories**
43 | * `src`: This is where the bulk of the code for JuCC lives. Anything that you expect to be compiled into the release should be here.
44 | * `benchmark`: This is where Google Benchmarks and their utility code reside. `src` should not have dependencies going into `benchmark`.
45 | * `test`: This is where Google Tests and their utility code reside. `src` should not have dependencies going into `test`. `benchmark` may have dependencies going into `test`.
46 | * `third_party`: This is where we add code which was not written by us but which we need to modify.
47 |
48 | **Infrastructure directories**
49 | * `script`: Scripts that support development and testing lives here. (e.g. dependency installation).
50 | * `build-support`: Files that support Continuous Integration CMake targets (lint, format, etc.).
51 |
52 | ##### src
53 | There can be at most 2-levels of directories under `src`, the first level will be general system components (e.g. storage, execution, network, sql, common), and the second level will be either for a class of similar files, or for a self-contained sub-component.
54 |
55 | Translated into coding guidelines, you should rarely need to create a new first-level subdirectory, and should probably ask on Slack if you believe you do. To create a new secondary directory, make sure you meet the following criteria:
56 | * There are more than 2 (exclusive) files you need to put into this folder
57 | * Each file is stand-alone, i.e. either the contents don't make sense living in a single file, or that putting them in a single file makes the file large and difficult to navigate. (This is open to interpretation, but if, for example, you have 3 files containing 10-line class definitions, maybe they should not be spread out that much).
58 |
59 | And one of the two:
60 | * The subdirectory is a self-contained sub-component. This probably means that the folder only has one outward facing API. A good rule of thumb is when outside code files only need to include one header from this folder, where said API is defined.
61 | * The subdirectory contains a logical grouping of files, and there are enough of them that leaving them ungrouped makes the upper level hard to navigate. (e.g. all the plans, all the common data structures, etc.)
62 |
63 | A good rule of thumb is if you have subdirectory `As`, you should be able to say with a straight face that everything under `As` is an A. (e.g. Everything under `containers` is a container)
64 |
65 | Every class and/or function under these directories should be in namespaces. All code will be under namespace `JuCC`, and namespace the same as their first-level directory name (e.g `common`, `storage`). Secondary sub-directories do not have associated namespaces.
66 |
67 | ##### test
68 | The directory structure of the `test` folder should generally reflect the directory structure of the `src` folder, ignoring the `include`. Each test should be under the same path as the file they test, and named "XXX_test".
69 |
70 | Generally, there can be no code sharing between tests since they are different build targets. There are cases, however, where it makes sense for tests to share some common utility function. In that case, you can write a utility file under `test/util`.
71 |
72 | The `test/util` folder should have no sub-directories. In most cases, one test util file for every directory under `src` should suffice. (e.g. `test/util/storage_test_util.h`) Sometimes it will make sense to have a `util` file for stand-alone modules (e.g. `test/include/util/random_test_util.h`).
73 |
74 | ### C++ Code Style
75 |
76 | See [here](https://github.com/TheSYNcoder/JuCC/tree/main/docs/cpp_guidelines_code_style.md).
77 |
78 | ## Compiler
79 |
80 | We support GCC and LLVM Clang. **We do NOT support AppleClang** aka whatever compiler comes on macOS by default.
81 |
82 | How is the compiler actually invoked?
83 |
84 | 1. CMake is a *build system generator* that is commonly used for C++ development.
85 | - CMake does not compile your program.
86 | - Running `cmake ` generates a system that will compile your program.
87 | 2. CMake uses either [make](https://en.wikipedia.org/wiki/Make_(software)) (the default) or [ninja](https://ninja-build.org/) (requested by passing `-GNinja` as an argument to `cmake`).
88 | - We strongly encourage using `ninja`, which is faster and can intelligently build in parallel by default.
89 |
90 | For example, to manually compile JuCC, this is what you would do:
91 |
92 | 1. Clone the JuCC repo: `git clone https://github.com/TheSYNcoder/JuCC.git`
93 | 2. Create a build folder to build everything in: `mkdir build`
94 | 3. Go to the build folder: `cd build`
95 | 4. Generate a build system with CMake, passing in whatever arguments are desired: `cmake -GNinja -DCMAKE_BUILD_TYPE=Release`
96 | 5. Invoke the build system: `ninja jucc`, more generally `ninja ` for any valid target.
97 |
98 | We have configured the build system so that it will produce a `compile_commands.json` file. This contains the exact compiler invocations that will be used. You can check this file if you're curious. This file is also used by tools like `clang-tidy` to check for correctness. If you are not sure what a compiler flag does, look it up on Google or on the `man` page for `gcc` or `clang`.
99 |
100 | ## Debugging
101 |
102 | You should use a debugger to find any bugs where possible.
103 |
104 | If you need to do complex debugging, you may want to check out the following links:
105 |
106 | - [gdb](https://www.gnu.org/software/gdb/): General GDB documentation.
107 | - [lldb](https://lldb.llvm.org/): General LLDB documentation. A competitor to GDB.
108 | - [rr](https://rr-project.org/): A reversible debugger that lets you go backwards in time. Very powerful, but requires some level of hardware support.
109 |
110 | ## Testing
111 |
112 | Unit tests are critical for ensuring the correct functionality of your modules and reduce time spent on debugging. It can help prevent regressions. We use [googletest](https://github.com/google/googletest), a nice unit-testing framework for C++ projects.
113 |
114 | You should write unit test cases for each class/algorithm that you have added or modified. See the testing section for detail. Try to come up with test cases that make sure that the module exhibits the desired behavior. Some developers even suggest writing the unit tests before implementing the code. Make sure that you include corner cases, and try to find off-by-one errors.
115 |
116 | ## Documentation
117 |
118 | See [here](https://github.com/TheSYNcoder/JuCC/tree/main/docs/cpp_guidelines_code_style.md).
119 |
--------------------------------------------------------------------------------
/docs/cpp_guidelines_code_style.md:
--------------------------------------------------------------------------------
1 | # C++ Guidelines: Code Style
2 |
3 | ## Comments, Formatting, and Libraries
4 |
5 | Please **comment** your code. Comment all the class definitions, non-trivial member functions and variables, and all the steps in your algorithms.
6 | We generally follow the [Google C++ style guide](https://google.github.io/styleguide/cppguide.html). As they mention in that guide, these rules exist to keep the code base manageable while still allowing coders to use C++ language features productively.
7 | Make sure that you follow the [naming rules](https://google.github.io/styleguide/cppguide.html#General_Naming_Rules). For instance, use `class UpperCaseCamelCase` for type names, `int lower_case_with_underscores` for variable/method/function names.
8 |
9 | Please refrain from using any libraries other than the `STL` (and `googletest` for unit testing) without contacting us.
10 |
11 | ## Code Organization and Best Practice
12 | We strive to write modern C++17 code, but C++ is a language with a lot of legacy features from the 80s. Certain guidelines must be followed to make the use of language features tasteful and elegant.
13 |
14 | ### `.h` and `.cpp` files
15 |
16 | Surprisingly, there is no universal standards on what to call c++ code files. In this project, we will use `.h` for headers, inside the various `/include` directories, and `.cpp` for the implementation.
17 |
18 | When possible, implementation should be separated between `.h` and `.cpp` files, as this will make the compilation process much faster and hassle-free. Documentation should be written in the `.h` files. There are a couple of exceptions to this rule:
19 | - One-liners or otherwise boilerplate code that is unlikely to change. What constitutes a one-liner is somewhat ambiguous and can be subjective.
20 | - Templates. The c++ compiler generates instantiations of actual code based on the template given, and may need this information in the compilation units themselves, thus requiring all definition to be present in the included header. There are two solutions to this: either write all of the template code in the header or explicitly instantiate templates. Because doing the latter is painful, we generally will write those definitions in-place.
21 |
22 | ### Forward Declarations
23 |
24 | When referring to some object only by reference, object or some template arguments, it is not necessary that the code knows its structure. As a result, we do not necessarily need to provide its complete declaration with members and methods (i.e. #include), but can get away with a hint that such a class exist. Example is given below:
25 |
26 | ```c++
27 | class Foo;
28 | ...
29 | void DoSomething(Foo *foo); // compiles
30 | void DoSomethingElse(Foo *foo) {
31 | foo->bar(); // error, member access into opaque type
32 | }
33 | ...
34 | ```
35 | Doing this saves re-compilation time. As a rule of thumb, forward declare when possible in the header, but always include the actual headers in the `.cpp` file.
36 |
37 | ### Concurrent Data Structures
38 |
39 | There are many implementations of concurrent data structures online, of differing quality. Before you bring in any external implementation, ask yourself:
40 | - Do I need a concurrent data structure?
41 | - Is this data structure implementation the right choice for my use case and workload?
42 | - Will someone in the future want to swap this out for a different implementation?
43 |
44 | Concurrent data structures, especially lock-free ones, are not magic. They perform well only in the environment they are designed for. Our advice is to always start simple, and ensure correctness with latches and other simple mechanisms. Performance gain needs to be measured and verified on representative benchmarks, taken against multiple alternatives.
45 |
46 | Finally, always prefer a wrapper whose underlying implementation can be swapped out with minimal effort to direct invocation of third-party code, when it comes to data structures.
47 |
--------------------------------------------------------------------------------
/docs/tech_clangtools.md:
--------------------------------------------------------------------------------
1 | # Clang Tools
2 |
3 | ## clang-tidy
4 |
5 | Important clang-tidy points are emphasized here. Read this if you're planning on changing how we use clang-tidy.
6 |
7 | The [official documentation](https://clang.llvm.org/extra/clang-tidy/index.html) and [source code](https://clang.llvm.org/extra/doxygen/dir_83d3dc8f7afce718e8cda93164271fb8.html) are your best bet.
8 |
9 | **Overview**
10 |
11 | 1. clang-tidy is a static analyzer and linter. It checks for common code smells.
12 | 2. When you run clang-tidy, it searches parent directories for a .clang-tidy file.
13 | - `clang-tidy -dump-config` will print out the current configuration.
14 | 3. clang-tidy reads the output of compile_commands.json, which is generated when you run cmake.
15 | 1. compile_commands.json only contains .cpp files.
16 | 2. Therefore, clang-tidy can only process C++ (.cpp) files. It cannot process header (.h) files by themselves.
17 | 3. clang-tidy will however process .h files which are included in a .cpp file.
18 | 4. You might not want warnings from all header files, e.g. only src/, not third_party/.
19 | 5. clang-tidy only supports whitelisting headers via a single regex string.
20 | - HeaderFilterRegex: 'REGEX' in .clang-tidy
21 | - -header-filter=REGEX on the command line, **note that this overrides your .clang-tidy setting**.
22 | - The (undocumented) format of the regex is POSIX ERE. See the implementation of [HeaderFilterRegex](https://clang.llvm.org/extra/doxygen/ClangTidyDiagnosticConsumer_8cpp_source.html#l00533) and [llvm::Regex](http://llvm.org/doxygen/Regex_8h_source.html#l00040).
23 | 4. clang-tidy separates the notion of Warning, Error, Compilation Error.
24 | - clang-tidy will only display warnings for enabled [checks](https://clang.llvm.org/extra/clang-tidy/checks/list.html).
25 | - clang-tidy will convert all warnings that match the WarningsAsErrors regex to errors.
26 | - **WarningsAsErrors does not enable any checks on its own**.
27 | - The list of checks are parsed as regex. **If you make a typo, it silently ignores that check**.
28 | - Run `clang-tidy -list-checks` to confirm which checks are enabled.
29 | 5. You can get clang-tidy to leave certain lines alone.
30 | - To ignore the same line, put `// NOLINT` at the end
31 | - To ignore the next line, put `// NOLINTNEXTLINE` on the immediate preceding line
32 | - Wherever possible, avoid using line-filter. Nobody wants to maintain line numbers when code gets added/deleted.
33 | - A `clang-diagnostic-error` may mean a compilation problem. No amount of NOLINT or disabling checks will shut that up. If you're pulling in third-party header dependencies, make sure they're a dependency for our check-clang-tidy make target too.
34 |
35 | **Gotchas**
36 |
37 | 1. Running clang-tidy on a list of files will run clang-tidy on each file sequentially.
38 | 2. If you include a header file which needs fixing in multiple .cpp files, clang-tidy will repeat the fix multiple times. An example which was included in three files:
39 | ```
40 | Original code : if (last_errno_)
41 | "Fixed" code : if (last_errno_ != 0 != 0 != 0)
42 | ```
43 |
44 | For both of the above problems, LLVM recommends using their [run-clang-tidy.py](https://github.com/llvm-mirror/clang-tools-extra/blob/master/clang-tidy/tool/run-clang-tidy.py) script. It will gather all the code fixes and apply them at once to prevent this issue. It also supports parallelism.
45 |
46 | However, if you use LLVM's default run-clang-tidy.py, caveat emptor:
47 |
48 | - It doesn't work on [Python 3](https://github.com/llvm-mirror/clang-tools-extra/blob/master/clang-tidy/tool/run-clang-tidy.py#L166).
49 | - It will override .clang-tidy's [HeaderFilterRegex](https://github.com/llvm-mirror/clang-tools-extra/blob/master/clang-tidy/tool/run-clang-tidy.py#L86).
50 | - From a Pythonic/PyLint point of view, the code quality isn't great.
51 | - That said, it is actively being developed and the above criticisms may no longer be valid.
52 |
53 | We currently use a modified version of run-clang-tidy.py.
54 |
55 | ## clang-format
56 |
57 | This section discusses clang-format and how we use it. We use the Google C++ style guide.
58 |
59 | Check the [official documentation](https://clang.llvm.org/docs/ClangFormat.html).
60 |
61 | **Overview**
62 |
63 | 1. clang-format is a code formatter and format checker.
64 | 2. When you run clang-format, it searches parent directories for a .clang-format file.
65 | 3. USAGE: `clang-format [options] [ ...]`
66 |
67 | **Gotchas**
68 |
69 | 1. We use the [run_clang_format.py](https://github.com/TheSYNcoder/JuCC/blob/main/build_support/run_clang_format.py) to automate the workflow.
70 |
71 | ## Summary
72 |
73 | Before creating a pull request make sure these tests pass.
74 |
75 | - `ninja check-format`
76 | - `ninja check-clang-tidy`
77 | - `ninja check-lint`
78 |
--------------------------------------------------------------------------------
/docs/tech_docker.md:
--------------------------------------------------------------------------------
1 | # Docker
2 |
3 | ## Installation
4 |
5 | ### Getting Docker
6 | #### Mac (Homebrew)
7 |
8 | 1. `brew cask install docker`
9 | 2. Launch /Applications/Docker.app.
10 |
11 | #### Other (older Ubuntu versions)
12 |
13 | See [Install Docker CE](https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository). The recommended approach is to install using the repository. See section "Install Docker CE", subsection "Install using the repository" and follow the instructions for:
14 |
15 | * SET UP THE REPOSITORY followed by
16 | * INSTALL DOCKER CE
17 |
18 | See [Docker CE](https://www.docker.com/community-edition) if additional information is required.
19 |
20 | ### Setup
21 |
22 | 1. Launch Docker.
23 | 2. From the folder containing the Dockerfile, build the Docker image.
24 | - `docker build -t jucc .`
25 | - docker will load your local repo into a `/jucc` directory in the image
26 |
27 | ## Usage
28 |
29 | 1. Run the Docker image: `docker run -itd --name build jucc`
30 | 2. Run CMake:
31 | - `docker exec build cmake ..`
32 | - `docker exec build make`
33 |
34 | You can interact with the Docker image with
35 | - single commands: `docker exec [-w WORKING_DIRECTORY] build BASH_COMMAND`
36 | - interactive: `docker exec -it build bash`
37 | - by default, the docker image starts in the `/jucc/build` directory
38 |
39 |
40 | **Note: The below step DELETES all the work you have on Docker.**
41 |
42 | To stop the Docker image, run both:
43 | 1. `docker container stop build`
44 | 2. `docker rm build`
45 |
46 |
47 | ## Quirks
48 |
49 | Docker on Windows and Docker on Mac do not behave nicely with LSAN. LSAN needs to be allowed to spawn a ptrace thread. You'll need to `docker run --cap-add SYS_PTRACE ...` to get it working.
50 |
--------------------------------------------------------------------------------
/docs/tech_git.md:
--------------------------------------------------------------------------------
1 | # Git
2 |
3 | There are lots of guides and documents on the internet, but there are too many and many are confusing. Here is a mini guide to use git with a minimal number of commands and parameters. You won't find any details or explanation of Git's internal mechanisms here.
4 |
5 | ### Remote Transfer or how to communicate with the world
6 | * Get a fresh repository: git clone ``
7 | * Update current repository to latest: git fetch -v
8 | * Update current repository with commit from a fork: git fetch -v `` ``
9 | * Send your new commit to the remote: git push `` ``
10 |
11 | ### Commit or how to communicate with your local repository
12 | * stage your change with dynamic selection: git add/rm -p ``
13 | * commit your change: git commit
14 | * uncommit previous commit: git reset --soft HEAD~1
15 | * unstage your change: git reset HEAD --
16 | * discard your change **forever** with dynamic selection: git checkout -p -- ``
17 |
18 | ### Stash or how to save your precious work
19 | Stash is very useful. For example, you will use it before/after (push/pop) merge/rebase action
20 | * Push pending update on the stack: git stash
21 | * Get back your update: git stash pop
22 | * view content of your stash: git stash show -p `stash@\{0\}`
23 |
24 | ### Rebase or how to screw the history
25 | **Never** rebase commits that were pushed remotely. Rebase can be used to improve your current patch set, or to fast-forward-merge after a fetch. For better software engineering we **never** directly merge the upstream main when doing local development. When accepting a PR, we expect you to fetch the upstream main to your local repository, and rebase all the changes in your local branch on top of the upstream main.
26 | * The rebase command: git rebase -i ``
27 | * Cancel it : git rebase --abort
28 | * Resolve conflict: git mergetool ``
29 | * Continue rebase: git rebase --continue
30 |
31 | ### Branch or how to separate your work by feature
32 | Please note that main is actually the default branch
33 | * List branches: git branch -v
34 | * Switch to another branch: git checkout ``
35 | * Creates: git branch ``
36 | * Delete branches: git branch -d ``
37 | * Set the base reference of the branch (for rebase): git branch --set-upstream-to=`` ``
38 |
39 | # Git use case example
40 |
41 | ### Branch management
42 | Let's say you want to rebase your current branch topic-v1 to topic-v2 with new additions. Note: topic-v1 could also be main too.
43 | * Go to current branch: git checkout topic-v1
44 | * Create a new one: git branch topic-v2
45 | * Go into the new branch: git checkout topic-v2
46 | * Set the reference: git branch --set-upstream-to=origin/main topic-v2
47 | * Rebase: git rebase -i
48 | * ...
49 |
50 | ### Split commit
51 | * Copy your repository if you're not confident with this kind of operation: cp -a `` ``
52 | * Do a rebase: git rebase -i
53 | * Use edit on the commit that you want to split
54 | ... rebase on-going...
55 | * Uncommit: git reset --soft HEAD~1
56 | * Unstage: git reset HEAD --
57 |
58 | At this stage of operation, you get all your changes in the local files, but nothing is ready to be committed.
59 |
60 | Repeat the 2 next commands for each new commits that you want to create
61 | * Stage your change with dynamic selection: git add/rm -p ``
62 | * Commit your change: git commit
63 |
64 | Once you have finished to split your commit:
65 | * Finish the rebase: git rebase --continue
66 |
--------------------------------------------------------------------------------
/grammar.g:
--------------------------------------------------------------------------------
1 | ## This is the grammar file for JuCC
2 | ## Edit this file to make changes to the parsing grammar
3 | ## Epsilon is represented by special string EPSILON
4 |
5 | ## Terminals
6 | %terminals
7 | else float if int void
8 | ( ) { } * + - / % ,
9 | << >> < > <= >= = == != ;
10 | identifier integer_constant float_constant
11 | main cin cout
12 | %
13 |
14 | ## Non Terminals
15 | %non_terminals
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | %
25 |
26 | ## Start Symbol
27 | %start
28 |
29 | %
30 |
31 | ## Grammar for the language
32 | %rules
33 | ## Expressions
34 | : identifier
35 | :
36 | : ( )
37 | : integer_constant
38 | : float_constant
39 | : +
40 | : -
41 | :
42 | :
43 | :
44 | : *
45 | : /
46 | : %
47 | :
48 | : +
49 | : -
50 | :
51 | : cin >>
52 | : cout <<
53 | : <<
54 | : >>
55 | :
56 | : <
57 |