├── .clang-format ├── .github └── workflows │ ├── macosx.yml │ ├── ubuntu.yml │ └── windows.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── doc └── flags.md ├── driver ├── CMakeLists.txt ├── include │ └── a_c_compiler │ │ └── driver │ │ └── command_line_options.inl.h └── sources │ └── driver │ └── a_c_compiler.cpp ├── fe ├── CMakeLists.txt ├── include │ └── a_c_compiler │ │ └── fe │ │ ├── a_c_compiler.h │ │ ├── lex │ │ ├── lex.h │ │ └── tokens.inl.h │ │ ├── parse │ │ ├── ast_module.h │ │ ├── ast_node.h │ │ ├── parse.h │ │ ├── parser_diagnostic.h │ │ ├── parser_diagnostic.inl.h │ │ ├── parser_diagnostic_reporter.h │ │ └── parser_diagnostic_reporter.template.h │ │ └── reporting │ │ ├── diagnostic_handles.h │ │ └── logger.h └── sources │ └── a_c_compiler │ └── fe │ ├── lex │ └── lex.cpp │ ├── parse │ ├── ast_module.cpp │ ├── parse.cpp │ └── parser_diagnostic.cpp │ └── reporting │ ├── diagnostic_handles.cpp │ └── logger.cpp ├── options ├── CMakeLists.txt ├── include │ └── a_c_compiler │ │ ├── options │ │ └── global_options.h │ │ └── version.h └── sources │ └── global_options.cpp └── test ├── CMakeLists.txt ├── file_check ├── CMakeLists.txt └── source │ └── main.cpp ├── lex ├── CMakeLists.txt ├── keyword.c ├── main.c ├── numlit.c └── strlit.c └── parse ├── CMakeLists.txt ├── attribute.c ├── funcdef.c ├── struct.c └── typedef.c /.clang-format: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # a_c_compiler 3 | # 4 | # © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | # All rights reserved. 6 | # ============================================================================ # 7 | 8 | --- 9 | BasedOnStyle: WebKit 10 | IndentWidth: 5 11 | TabWidth: 5 12 | ContinuationIndentWidth: 5 13 | UseTab: ForIndentation 14 | 15 | # Namespaces 16 | NamespaceIndentation: All 17 | CompactNamespaces: true 18 | FixNamespaceComments: true 19 | 20 | # Overall Alignment 21 | ColumnLimit: 100 22 | AlignAfterOpenBracket: DontAlign # uses ContinuationIndentWidth for this instead 23 | AccessModifierOffset: -5 # do not push public: or private: around 24 | AlignConsecutiveAssignments: true # affects more than what's expected: do not use 25 | #AlignConsecutiveDeclarations: true # affects more than what's expected: do not use 26 | 27 | # Type Alignment 28 | DerivePointerAlignment: false 29 | PointerAlignment: Left 30 | AlwaysBreakTemplateDeclarations: true 31 | AlwaysBreakBeforeMultilineStrings: true 32 | 33 | # Comments 34 | AlignTrailingComments: true 35 | ReflowComments: true 36 | 37 | # Macros 38 | AlignEscapedNewlines: Left 39 | #IndentPPDirectives: None 40 | 41 | # Functions 42 | AllowShortFunctionsOnASingleLine: None 43 | AlwaysBreakAfterReturnType: None 44 | BreakConstructorInitializers: BeforeComma 45 | ConstructorInitializerIndentWidth: 0 46 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 47 | BinPackArguments: true 48 | BinPackParameters: true 49 | 50 | # Classes 51 | BreakBeforeInheritanceComma: false 52 | 53 | # Braces 54 | Cpp11BracedListStyle: false 55 | BreakBeforeBraces: Custom 56 | BraceWrapping: 57 | AfterEnum: false 58 | AfterControlStatement: false 59 | AfterClass: false 60 | AfterNamespace: false 61 | AfterStruct: false 62 | AfterUnion: false 63 | BeforeElse: true 64 | BeforeCatch: true 65 | IndentBraces: false 66 | SplitEmptyFunction: false 67 | SplitEmptyRecord: false 68 | SplitEmptyNamespace: true 69 | 70 | # Control Statements 71 | AllowShortIfStatementsOnASingleLine: false 72 | AllowShortLoopsOnASingleLine: false 73 | AllowShortCaseLabelsOnASingleLine: false 74 | IndentCaseLabels: false 75 | 76 | # Spaces 77 | SpaceAfterCStyleCast: false 78 | SpacesInCStyleCastParentheses: false 79 | SpaceAfterTemplateKeyword: true 80 | SpaceBeforeAssignmentOperators: true 81 | SpaceBeforeParens: ControlStatements 82 | SpaceInEmptyParentheses: false 83 | SpacesInAngles: false 84 | SpacesInParentheses: false 85 | SpacesInSquareBrackets: false 86 | MaxEmptyLinesToKeep: 3 87 | 88 | # Prevent OCD 89 | SortIncludes: false 90 | 91 | --- 92 | Language: Cpp 93 | Standard: Cpp11 94 | -------------------------------------------------------------------------------- /.github/workflows/macosx.yml: -------------------------------------------------------------------------------- 1 | name: Mac OSX 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | CTEST_OUTPUT_ON_FAILURE: 1 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: macos-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | with: 16 | submodules: recursive 17 | 18 | - name: configure 19 | run: | 20 | cmake -B build/debug -D A_C_COMPILER_TESTS=ON -D A_C_COMPILER_EXAMPLES=ON 21 | cmake -B build/release -D A_C_COMPILER_TESTS=ON -D A_C_COMPILER_EXAMPLES=ON 22 | 23 | - name: build 24 | run: | 25 | cmake --build build/debug --config Debug 26 | cmake --build build/release --config Release 27 | 28 | - name: test 29 | run: | 30 | cd build/debug 31 | ctest --build-config Debug 32 | cd ../.. 33 | cd build/release 34 | ctest --build-config Release 35 | cd ../.. 36 | -------------------------------------------------------------------------------- /.github/workflows/ubuntu.yml: -------------------------------------------------------------------------------- 1 | name: Ubuntu 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | CTEST_OUTPUT_ON_FAILURE: 1 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | with: 16 | submodules: recursive 17 | 18 | - name: install-deps 19 | run: | 20 | sudo apt update && sudo apt upgrade 21 | sudo apt install g++-12 gcc-12 22 | 23 | - name: configure 24 | run: | 25 | cmake -B build/debug -D A_C_COMPILER_TESTS=ON -D A_C_COMPILER_EXAMPLES=ON 26 | cmake -B build/release -D A_C_COMPILER_TESTS=ON -D A_C_COMPILER_EXAMPLES=ON 27 | 28 | - name: build 29 | run: | 30 | cmake --build build/debug --config Debug 31 | cmake --build build/release --config Release 32 | 33 | - name: test 34 | run: | 35 | cd build/debug 36 | ctest --build-config Debug 37 | cd ../.. 38 | cd build/release 39 | ctest --build-config Release 40 | cd ../.. 41 | -------------------------------------------------------------------------------- /.github/workflows/windows.yml: -------------------------------------------------------------------------------- 1 | name: Windows 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | CTEST_OUTPUT_ON_FAILURE: 1 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: windows-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | with: 16 | submodules: recursive 17 | 18 | - name: configure 19 | run: | 20 | cmake -B build/debug -D A_C_COMPILER_TESTS=OFF -D A_C_COMPILER_EXAMPLES=ON 21 | cmake -B build/release -D A_C_COMPILER_TESTS=OFF -D A_C_COMPILER_EXAMPLES=ON 22 | 23 | - name: build 24 | run: | 25 | cmake --build build/debug --config Debug 26 | cmake --build build/release --config Release 27 | 28 | - name: test 29 | run: | 30 | cd build/debug 31 | ctest --build-config Debug 32 | cd ../.. 33 | cd build/release 34 | ctest --build-config Release 35 | cd ../.. 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build directory 2 | build/ 3 | 4 | # Prerequisites 5 | *.d 6 | 7 | # Compiled Object files 8 | *.slo 9 | *.lo 10 | *.o 11 | *.obj 12 | 13 | # Precompiled Headers 14 | *.gch 15 | *.pch 16 | 17 | # Compiled Dynamic libraries 18 | *.so 19 | *.dylib 20 | *.dll 21 | 22 | # Fortran module files 23 | *.mod 24 | *.smod 25 | 26 | # Compiled Static libraries 27 | *.lai 28 | *.la 29 | *.a 30 | *.lib 31 | 32 | # Executables 33 | *.exe 34 | *.out 35 | *.app 36 | .vscode/* 37 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # a_c_compiler 3 | # 4 | # © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | # All rights reserved. 6 | # ============================================================================ # 7 | 8 | cmake_minimum_required(VERSION 3.25) 9 | 10 | include(CMakeDependentOption) 11 | include(FetchContent) 12 | FetchContent_Declare(ztd.cmake 13 | GIT_REPOSITORY https://github.com/soasis/cmake 14 | GIT_TAG main) 15 | FetchContent_MakeAvailable(ztd.cmake) 16 | set(CMAKE_PROJECT_INCLUDE ${ZTD_CMAKE_PROJECT_PRELUDE}) 17 | 18 | project(a_c_compiler 19 | VERSION 0.0.0 20 | DESCRIPTION "A C Compiler" 21 | LANGUAGES CXX) 22 | 23 | # # # Top-Level Directories 24 | # Check if this is the top-level project or not 25 | if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) 26 | set(A_C_COMPILER_IS_TOP_LEVEL_PROJECT YES) 27 | else() 28 | set(A_C_COMPILER_IS_TOP_LEVEL_PROJECT NO) 29 | endif() 30 | 31 | # Modify bad flags / change defaults if we are the top level 32 | if(A_C_COMPILER_IS_TOP_LEVEL_PROJECT) 33 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR}/${CMAKE_BUILD_TYPE}/lib") 34 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR}/${CMAKE_BUILD_TYPE}/bin") 35 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_SYSTEM_PROCESSOR}/${CMAKE_BUILD_TYPE}/bin") 36 | 37 | if(NOT DEFINED CMAKE_CXX_STANDARD) 38 | set(CMAKE_CXX_STANDARD 26) 39 | endif() 40 | 41 | if(NOT DEFINED CMAKE_C_STANDARD) 42 | set(CMAKE_C_STANDARD 23) 43 | endif() 44 | 45 | set(CMAKE_OBJECT_PATH_MAX 1024) 46 | 47 | if(A_C_COMPILER_BENCHMARKS OR A_C_COMPILER_EXAMPLES OR A_C_COMPILER_TESTS OR A_C_COMPILER_SCRATCH) 48 | # normal flags 49 | check_compiler_flag(disable-permissive MSVC /permissive- GCC -pedantic) 50 | check_compiler_flag(utf8-literal-encoding MSVC /execution-charset:utf-8 GCC -fexec-charset=utf-8) 51 | check_compiler_flag(utf8-source-encoding MSVC /source-charset:utf-8 GCC -finput-charset=utf-8) 52 | check_compiler_flag(extra-constexpr-depth MSVC /constexpr:depth2147483647 GCC -fconstexpr-depth=2147483647 Clang -fconstexpr-depth=2147483647 LANGUAGES CXX) 53 | check_compiler_flag(extra-constexpr-steps MSVC /constexpr:steps2147483647 GCC -fconstexpr-ops-limit=2147483647 Clang -fconstexpr-steps=2147483647 LANGUAGES CXX) 54 | check_compiler_flag(template-debugging-mode GCC -ftemplate-backtrace-limit=0 LANGUAGES CXX) 55 | 56 | # Warning flags 57 | check_compiler_flag(warn-pedantic MSVC /permissive- GCC -pedantic) 58 | check_compiler_flag(warn-all MSVC /W4 GCC -Wall) 59 | check_compiler_flag(warn-errors MSVC /WX GCC -Werror) 60 | check_compiler_flag(warn-extra GCC -Wextra Clang -Wextra) 61 | check_compiler_diagnostic(alignas-extra-padding MSVC 4324 LANGUAGES CXX) 62 | check_compiler_diagnostic(bit-int-extension) 63 | # (Wstringop-overflow) - [meta-bug] bogus/missing -Wstringop-overflow warnings 64 | # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88443 65 | # Bogus -Wstringop-overflow warning 66 | # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100395 67 | # [10 Regression] spurious -Wstringop-overflow writing to a trailing array plus offset 68 | # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95353 69 | if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0) 70 | check_compiler_diagnostic(stringop-overflow) 71 | check_compiler_diagnostic(stringop-overread) 72 | check_compiler_diagnostic(array-bounds) 73 | endif() 74 | endif() 75 | endif() 76 | 77 | option(A_C_COMPILER_DOCUMENTATION "Enable build of documentation" OFF) 78 | option(A_C_COMPILER_DOCUMENTATION_NO_SPHINX "Turn off Sphinx usage (useful for ReadTheDocs builds)" OFF) 79 | option(A_C_COMPILER_EXAMPLES "Enable build of examples" OFF) 80 | option(A_C_COMPILER_BENCHMARKS "Enable build of benchmarks" OFF) 81 | option(A_C_COMPILER_TESTS "Enable build of tests" OFF) 82 | 83 | ## Add dependencies 84 | # dependencies need to be jailed to prevent 85 | # variables from escaping: hence the `*_jail` functions! 86 | function(ztd_idk_dependency_jail) 87 | FetchContent_Declare(ztd.idk 88 | GIT_REPOSITORY https://github.com/soasis/idk 89 | GIT_TAG main) 90 | FetchContent_MakeAvailable(ztd.idk) 91 | set_property(DIRECTORY "${ztd.idk_SOURCE_DIR}" PROPERTY EXCLUDE_FROM_ALL YES) 92 | endfunction() 93 | ztd_idk_dependency_jail() 94 | 95 | function(fmt_dependency_jail) 96 | FetchContent_Declare(fmt 97 | GIT_REPOSITORY https://github.com/fmtlib/fmt 98 | GIT_TAG master) 99 | FetchContent_MakeAvailable(fmt) 100 | set_property(DIRECTORY "${fmt_SOURCE_DIR}" PROPERTY EXCLUDE_FROM_ALL YES) 101 | endfunction() 102 | fmt_dependency_jail() 103 | 104 | if (A_C_COMPILER_TESTS) 105 | enable_testing() 106 | function(ctre_dependency_jail) 107 | # ctre 108 | set(BUILD_TESTING OFF) 109 | FetchContent_Declare(ctre 110 | GIT_REPOSITORY https://github.com/hanickadot/compile-time-regular-expressions.git 111 | GIT_TAG main 112 | GIT_SHALLOW ON) 113 | FetchContent_MakeAvailable(ctre) 114 | set_property(DIRECTORY "${ctre_SOURCE_DIR}" PROPERTY EXCLUDE_FROM_ALL YES) 115 | endfunction() 116 | ctre_dependency_jail() 117 | endif() 118 | 119 | ## Add executables, components, and libraries 120 | # a_c_compiler frontend libraries/executable 121 | add_subdirectory(options) 122 | add_subdirectory(fe) 123 | add_subdirectory(driver) 124 | if (A_C_COMPILER_TESTS) 125 | # a_c_compiler tests 126 | add_subdirectory(test) 127 | endif() 128 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThePhD/a_c_compiler/d6d38f8758fa32a07e42477cb854e6efd3a9e639/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A C Compiler 2 | 3 | ## Building 4 | 5 | The usual CMake workflow: 6 | 7 | ```shell 8 | $ cmake -S. -Bbuild 9 | $ cmake --build build 10 | ``` 11 | 12 | ## Testing 13 | 14 | ```shell 15 | $ cd build 16 | $ ctest 17 | Internal ctest changing into directory: /home/betsy/workspace/a_c_compiler/build 18 | Test project /home/betsy/workspace/a_c_compiler/build 19 | Start 1: test.lex.main 20 | 1/2 Test #1: test.lex.main .................... Passed 0.00 sec 21 | Start 2: test.lex.numlit 22 | 2/2 Test #2: test.lex.numlit .................. Passed 0.00 sec 23 | 24 | 100% tests passed, 0 tests failed out of 2 25 | 26 | Total Test time (real) = 0.00 sec 27 | ``` 28 | 29 | ## Developing 30 | 31 | See the `doc` directory for documentation on developing A C Compiler. 32 | -------------------------------------------------------------------------------- /doc/flags.md: -------------------------------------------------------------------------------- 1 | # Compiler Flags 2 | 3 | To add a compiler flag, you may add an entry to `lib/tools/command_line_options.inl.h`. 4 | There are two kinds of entries here: flags and options. 5 | Flags are either true or false and are enabled when they are found in the command line arguments and otherwise disabled. 6 | Options take a value. 7 | -------------------------------------------------------------------------------- /driver/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # a_c_compiler 3 | # 4 | # © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | # All rights reserved. 6 | # ============================================================================> 7 | 8 | file(GLOB_RECURSE a_c_compiler.sources 9 | LIST_DIRECTORIES OFF 10 | CONFIGURE_DEPENDS 11 | sources/**.cpp sources/**.c) 12 | add_executable(a_c_compiler.driver ${a_c_compiler.sources}) 13 | add_executable(a_c_compiler::driver ALIAS a_c_compiler.driver) 14 | target_include_directories(a_c_compiler.driver 15 | PRIVATE 16 | include) 17 | target_link_libraries(a_c_compiler.driver 18 | PRIVATE 19 | a_c_compiler::fe 20 | a_c_compiler::options) 21 | -------------------------------------------------------------------------------- /driver/include/a_c_compiler/driver/command_line_options.inl.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #ifdef FLAG 9 | /* 10 | * true/false flags 11 | * 12 | * (Source name, default value, short flag, long flag, feature flag?, feature flag bit?, help message) 13 | */ 14 | FLAG(help, false, "-h", "--help", nullopt, nullopt, "Print help message") 15 | FLAG(verbose, false, "-v", "--verbose", nullopt, nullopt, "Display extra information from the driver") 16 | FLAG(debug_lexer, false, "-L", "-fdebug-lexer", nullopt, nullopt, "Dump tokens after lexing phase") 17 | FLAG(debug_parser, false, "", "-fdebug-parser", 1, 0x1, "Dump tokens after lexing phase") 18 | #endif 19 | 20 | #ifdef OPTION 21 | /* 22 | * Options that take a value 23 | * 24 | * (Source name, type, command line name, default value, help message) 25 | */ 26 | OPTION(set_feature_flag, std::string, "-fset-feature-flag", "0,0x0", "Manually set a feature flag") 27 | OPTION(stop_after_phase, std::string, "-fstop-after-phase", "", 28 | "Stop compilation after given phase is complete") 29 | OPTION(output_file, std::string, "--output-file", "", "The file to write output into.") 30 | OPTION( 31 | lex_output_file, std::string, "--lex-output-file", "", "The file to write lexer output into.") 32 | OPTION(example_int_option, int, "-fexample-int-option", 123, 33 | "Dummy option that takes an int argument") 34 | #endif 35 | -------------------------------------------------------------------------------- /driver/sources/driver/a_c_compiler.cpp: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | using namespace std::literals; 25 | namespace fs = std::filesystem; 26 | using std::nullopt; 27 | 28 | 29 | using namespace a_c_compiler; 30 | 31 | static struct { 32 | #define FLAG(NAME, DEFAULT_VALUE, ...) bool NAME = DEFAULT_VALUE; 33 | #define OPTION(NAME, TYPE, CLINAME, DEFVAL, HELP) TYPE NAME = DEFVAL; 34 | #include 35 | #undef FLAG 36 | #undef OPTION 37 | std::vector positional_args; 38 | } cli_opts; 39 | 40 | int help_with(std::string_view exe, int return_code) noexcept { 41 | static constexpr size_t width = 25; 42 | const auto help_flag = [=](std::string f_short, std::string f_long, const char* help) { 43 | const auto flag = f_short + (f_short.empty() ? "" : " | ") + f_long; 44 | std::cout << "\t" << std::left << std::setw(width) << flag << " :: " << help << "\n"; 45 | }; 46 | const auto help_option 47 | = [=](std::string option, std::string type, std::string default_value, std::string help) { 48 | std::cout << "\t" << std::left << std::setw(width) << option << " :: " << help 49 | << "\n\t" << std::setw(width + 3) << "" 50 | << " (" 51 | << "type=" << type << ", default=" << default_value << ")\n"; 52 | }; 53 | std::cout << "Usage:\t" << exe << " [flags] [source files]\n\n"; 54 | 55 | std::cout << "Flags:\n"; 56 | #define FLAG(NAME, DEFVAL, FSHORT, FLONG, FLAG, BIT, HELP) help_flag(FSHORT, FLONG, HELP); 57 | #include 58 | #undef FLAG 59 | 60 | std::cout << "\nOptions:\n"; 61 | #define OPTION(NAME, TYPE, CLINAME, DEFVAL, HELP) help_option(CLINAME, #TYPE, #DEFVAL, HELP); 62 | #include 63 | #undef OPTION 64 | 65 | return return_code; 66 | } 67 | 68 | int help(std::string_view exe) noexcept { 69 | return help_with(exe, EXIT_FAILURE); 70 | } 71 | 72 | template 73 | T parse_option(std::string arg) noexcept; 74 | 75 | template <> 76 | std::string parse_option(std::string arg) noexcept { 77 | return arg; 78 | } 79 | template <> 80 | int parse_option(std::string arg) noexcept { 81 | return std::atoi(arg.c_str()); 82 | } 83 | 84 | void handle_feature_flag(std::string_view arg_str, global_options& global_opts) { 85 | ZTD_ASSERT_MESSAGE( 86 | "expected comma separator to be in feature flag argument", arg_str.contains(',')); 87 | size_t n = arg_str.find(','); 88 | std::size_t num_written; 89 | 90 | std::string_view flag_str = arg_str.substr(0, n); 91 | std::size_t flag_value = std::stoul(flag_str.data(), &num_written, 10); 92 | ZTD_ASSERT_MESSAGE("failed to parse feature flag argument", num_written); 93 | 94 | std::string_view bit_str = arg_str.substr(n + 1); 95 | std::size_t bit_value = std::stoul(bit_str.data(), &num_written, 16); 96 | ZTD_ASSERT_MESSAGE("failed to parse feature flag argument", num_written); 97 | 98 | global_opts.set_feature_flag(flag_value, bit_value); 99 | } 100 | 101 | bool parse_args(const std::string& exe, const std::vector& args, 102 | global_options& global_opts) noexcept { 103 | auto it = args.begin(); 104 | while (it != args.end()) { 105 | /* parse flags */ 106 | if (*it == "-h" or *it == "--help") { 107 | return false; 108 | } 109 | 110 | #define FLAG(NAME, DEFVAL, FSHORT, FLONG, FLAG, BIT, HELP) \ 111 | else if (*it == FSHORT or *it == FLONG) { \ 112 | cli_opts.NAME = true; \ 113 | std::optional flag = FLAG, bit = BIT; \ 114 | if (flag.has_value()) { \ 115 | ZTD_ASSERT(bit.has_value()); \ 116 | global_opts.set_feature_flag(flag.value(), bit.value()); \ 117 | } \ 118 | } 119 | 120 | #define OPTION(NAME, TYPE, CLINAME, DEFVAL, HELP) \ 121 | else if (*it == CLINAME) { \ 122 | it++; \ 123 | ZTD_ASSERT_MESSAGE("Expected argument to follow flag -f" #NAME, it != args.end()); \ 124 | cli_opts.NAME = parse_option(*it); \ 125 | } 126 | 127 | #include 128 | 129 | #undef FLAG 130 | #undef OPTION 131 | 132 | /* parse positional args */ 133 | else { 134 | cli_opts.positional_args.emplace_back(*it); 135 | } 136 | if (*it == "-fset-feature-flag") { 137 | handle_feature_flag(cli_opts.set_feature_flag, global_opts); 138 | } 139 | it++; 140 | } 141 | 142 | /* Check validity of command line args. */ 143 | 144 | /* Check that all positional args are files that exist */ 145 | for (auto const& fpath : cli_opts.positional_args) { 146 | if (!fs::exists(fpath)) { 147 | std::cerr << "[error] could not find input file \"" << fpath << "\"\n"; 148 | return EXIT_FAILURE; 149 | } 150 | const auto ty = fs::status(fpath).type(); 151 | using ft = fs::file_type; 152 | ZTD_ASSERT_MESSAGE("Expected source file to be a regular file or a symlink!", 153 | ty == ft::regular or ty == ft::symlink); 154 | } 155 | 156 | return true; 157 | } 158 | 159 | /* Print the full compilation configuration as YAML */ 160 | void print_cli_opts() { 161 | std::cout << "\nCompilation Options:\n" 162 | << " source_files: [ "; 163 | auto it = cli_opts.positional_args.begin(); 164 | while (it != cli_opts.positional_args.end()) { 165 | std::cout << *it << (++it == cli_opts.positional_args.end() ? " " : ", "); 166 | } 167 | std::cout << "]\n"; 168 | 169 | std::cout 170 | #define FLAG(NAME, DEFVAL, FSHORT, FLONG, FLAG, BIT, HELP) \ 171 | << " " << #NAME << ":\n kind: flag\n value: " << cli_opts.NAME << "\n" 172 | #define OPTION(NAME, TYPE, CLINAME, DEFVAL, HELP) \ 173 | << " " << #NAME << ":\n kind: option\n value: " << cli_opts.NAME \ 174 | << "\n type: " << #TYPE << "\n" 175 | #include 176 | ; 177 | #undef FLAG 178 | #undef OPTION 179 | } 180 | 181 | int main(int argc, char** argv) { 182 | std::string exe(argv[0]); 183 | std::vector args(argv + 1, argv + argc); 184 | a_c_compiler::global_options global_opts {}; 185 | a_c_compiler::diagnostic_handles diag_handles {}; 186 | 187 | if (!parse_args(exe, args, global_opts)) { 188 | return help(exe); 189 | } 190 | 191 | if (cli_opts.verbose) { 192 | print_cli_opts(); 193 | } 194 | 195 | bool failed_lexer_output = false; 196 | bool failed_parse_output = false; 197 | 198 | for (auto const& source_file : cli_opts.positional_args) { 199 | if (cli_opts.verbose) { 200 | std::cout << "\nLexing source file " << source_file << "\n"; 201 | } 202 | 203 | auto tokens = lex(source_file, global_opts, diag_handles); 204 | 205 | if (cli_opts.debug_lexer) { 206 | const bool write_lex_to_stdout = cli_opts.lex_output_file.empty(); 207 | if (cli_opts.verbose) { 208 | std::cout << "Dumping tokens to " 209 | << (write_lex_to_stdout ? "standard output" 210 | : cli_opts.lex_output_file.c_str()) 211 | << "\n"; 212 | } 213 | 214 | if (write_lex_to_stdout) { 215 | dump_tokens(tokens); 216 | } 217 | else { 218 | std::ofstream lex_output_stream(cli_opts.lex_output_file.c_str()); 219 | if (lex_output_stream) { 220 | dump_tokens_into(tokens, lex_output_stream); 221 | } 222 | else { 223 | std::cerr << "cannot write to lex output file \"" 224 | << cli_opts.lex_output_file << "\"\n"; 225 | failed_lexer_output = true; 226 | } 227 | } 228 | } 229 | 230 | if (cli_opts.stop_after_phase == "lex") { 231 | return failed_lexer_output ? EXIT_FAILURE : EXIT_SUCCESS; 232 | } 233 | 234 | auto ast_module = parse(tokens, global_opts, diag_handles); 235 | 236 | if (cli_opts.verbose) { 237 | ast_module.dump(); 238 | } 239 | 240 | if (cli_opts.stop_after_phase == "parse") { 241 | return failed_parse_output || failed_lexer_output ? EXIT_FAILURE : EXIT_SUCCESS; 242 | } 243 | } 244 | 245 | return EXIT_SUCCESS; 246 | } 247 | -------------------------------------------------------------------------------- /fe/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # a_c_compiler 3 | # 4 | # © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | # All rights reserved. 6 | # ============================================================================ # 7 | 8 | file(GLOB_RECURSE a_c_compiler.fe.sources 9 | LIST_DIRECTORIES OFF 10 | CONFIGURE_DEPENDS 11 | sources/**.cpp sources/**.c) 12 | 13 | add_library(a_c_compiler.fe ${a_c_compiler.fe.sources}) 14 | add_library(a_c_compiler::fe ALIAS a_c_compiler.fe) 15 | target_include_directories(a_c_compiler.fe 16 | PUBLIC 17 | include 18 | ) 19 | target_link_libraries(a_c_compiler.fe 20 | PUBLIC 21 | ztd::idk 22 | fmt::fmt 23 | a_c_compiler::options 24 | ) 25 | target_compile_definitions(a_c_compiler.fe 26 | PRIVATE 27 | _CRT_SECURE_NO_WARNINGS=1 28 | ) 29 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/a_c_compiler.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/lex/lex.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace a_c_compiler { 20 | 21 | namespace fs = std::filesystem; 22 | 23 | enum token_id : int32_t { 24 | #define CHAR_TOKEN(TOK, INTVAL) TOK = INTVAL, 25 | #define KEYWORD_TOKEN(TOK, INTVAL, KEYWORD) TOK = INTVAL, 26 | #define TOKEN(TOK, INTVAL) TOK = INTVAL, 27 | #include 28 | #undef CHAR_TOKEN 29 | #undef KEYWORD_TOKEN 30 | #undef TOKEN 31 | }; 32 | 33 | 34 | struct file_offset_info { 35 | size_t lineno, column; 36 | }; 37 | 38 | struct token { 39 | token_id id; 40 | file_offset_info source_location; 41 | }; 42 | 43 | using token_vector = std::vector; 44 | void dump_tokens_into(token_vector const& toks, std::ostream& output_stream) noexcept; 45 | void dump_tokens(token_vector const& toks) noexcept; 46 | 47 | std::string_view lexed_id(size_t index) noexcept; 48 | std::string_view lexed_numeric_literal(size_t index) noexcept; 49 | std::string_view lexed_string_literal(size_t index) noexcept; 50 | 51 | token_vector lex(fs::path const& source_file, const global_options& global_opts, 52 | diagnostic_handles& diag_handles) noexcept; 53 | } /* namespace a_c_compiler */ 54 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/lex/tokens.inl.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #ifdef CHAR_TOKEN 9 | CHAR_TOKEN(tok_l_paren, '(') 10 | CHAR_TOKEN(tok_r_paren, ')') 11 | CHAR_TOKEN(tok_l_curly_bracket, '{') 12 | CHAR_TOKEN(tok_r_curly_bracket, '}') 13 | CHAR_TOKEN(tok_l_square_bracket, '[') 14 | CHAR_TOKEN(tok_r_square_bracket, ']') 15 | CHAR_TOKEN(tok_semicolon, ';') 16 | CHAR_TOKEN(tok_asterisk, '*') 17 | CHAR_TOKEN(tok_colon, ':') 18 | CHAR_TOKEN(tok_comma, ',') 19 | CHAR_TOKEN(tok_equals_sign, '=') 20 | 21 | CHAR_TOKEN(tok_plus, '+') 22 | CHAR_TOKEN(tok_minus, '-') 23 | CHAR_TOKEN(tok_ampersand, '&') 24 | CHAR_TOKEN(tok_percent, '%') 25 | #endif 26 | 27 | #ifdef KEYWORD_TOKEN 28 | #define MAKE_KEYWORD_TOKEN(KEYWORD, INTVAL) KEYWORD_TOKEN(tok_keyword_##KEYWORD, INTVAL, KEYWORD) 29 | MAKE_KEYWORD_TOKEN(alignas, -1025) 30 | MAKE_KEYWORD_TOKEN(_Alignas, -1026) 31 | MAKE_KEYWORD_TOKEN(alignof, -1027) 32 | MAKE_KEYWORD_TOKEN(_Alignof, -1028) 33 | MAKE_KEYWORD_TOKEN(auto, -1029) 34 | MAKE_KEYWORD_TOKEN(__auto_type, -1030) 35 | MAKE_KEYWORD_TOKEN(bool, -1031) 36 | MAKE_KEYWORD_TOKEN(_Bool, -1032) 37 | MAKE_KEYWORD_TOKEN(break, -1033) 38 | MAKE_KEYWORD_TOKEN(case, -1034) 39 | MAKE_KEYWORD_TOKEN(char, -1035) 40 | MAKE_KEYWORD_TOKEN(const, -1036) 41 | MAKE_KEYWORD_TOKEN(constexpr, -1037) 42 | MAKE_KEYWORD_TOKEN(continue, -1038) 43 | MAKE_KEYWORD_TOKEN(default, -1039) 44 | MAKE_KEYWORD_TOKEN(do, -1040) 45 | MAKE_KEYWORD_TOKEN(double, -1041) 46 | MAKE_KEYWORD_TOKEN(else, -1042) 47 | MAKE_KEYWORD_TOKEN(enum, -1043) 48 | MAKE_KEYWORD_TOKEN(extern, -1044) 49 | MAKE_KEYWORD_TOKEN(false, -1045) 50 | MAKE_KEYWORD_TOKEN(float, -1046) 51 | MAKE_KEYWORD_TOKEN(for, -1047) 52 | MAKE_KEYWORD_TOKEN(goto, -1048) 53 | MAKE_KEYWORD_TOKEN(if, -1049) 54 | MAKE_KEYWORD_TOKEN(inline, -1050) 55 | MAKE_KEYWORD_TOKEN(int, -1051) 56 | MAKE_KEYWORD_TOKEN(long, -1052) 57 | MAKE_KEYWORD_TOKEN(nullptr, -1053) 58 | MAKE_KEYWORD_TOKEN(register, -1054) 59 | MAKE_KEYWORD_TOKEN(restrict, -1055) 60 | MAKE_KEYWORD_TOKEN(return, -1056) 61 | MAKE_KEYWORD_TOKEN(short, -1057) 62 | MAKE_KEYWORD_TOKEN(signed, -1058) 63 | MAKE_KEYWORD_TOKEN(sizeof, -1059) 64 | MAKE_KEYWORD_TOKEN(static, -1060) 65 | MAKE_KEYWORD_TOKEN(static_assert, -1061) 66 | MAKE_KEYWORD_TOKEN(_Static_assert, -1062) 67 | MAKE_KEYWORD_TOKEN(struct, -1063) 68 | MAKE_KEYWORD_TOKEN(switch, -1064) 69 | MAKE_KEYWORD_TOKEN(thread_local, -1065) 70 | MAKE_KEYWORD_TOKEN(_Thread_local, -1066) 71 | MAKE_KEYWORD_TOKEN(true, -1067) 72 | MAKE_KEYWORD_TOKEN(typedef, -1068) 73 | MAKE_KEYWORD_TOKEN(typeof, -1069) 74 | MAKE_KEYWORD_TOKEN(__typeof__, -1070) 75 | MAKE_KEYWORD_TOKEN(typeof_unqual, -1071) 76 | MAKE_KEYWORD_TOKEN(union, -1072) 77 | MAKE_KEYWORD_TOKEN(unsigned, -1073) 78 | MAKE_KEYWORD_TOKEN(void, -1074) 79 | MAKE_KEYWORD_TOKEN(volatile, -1075) 80 | MAKE_KEYWORD_TOKEN(while, -1076) 81 | MAKE_KEYWORD_TOKEN(_Atomic, -1077) 82 | MAKE_KEYWORD_TOKEN(_BitInt, -1078) 83 | MAKE_KEYWORD_TOKEN(_Complex, -1079) 84 | MAKE_KEYWORD_TOKEN(_Generic, -1080) 85 | MAKE_KEYWORD_TOKEN(_Imaginary, -1081) 86 | MAKE_KEYWORD_TOKEN(_Noreturn, -1082) 87 | // some extension keywords 88 | MAKE_KEYWORD_TOKEN(_Operator, -2049) 89 | MAKE_KEYWORD_TOKEN(__attribute__, -2050) 90 | MAKE_KEYWORD_TOKEN(__declspec, -2051) 91 | MAKE_KEYWORD_TOKEN(__vectorcall, -2052) 92 | MAKE_KEYWORD_TOKEN(__thiscall, -2053) 93 | MAKE_KEYWORD_TOKEN(asm, -2054) 94 | MAKE_KEYWORD_TOKEN(__asm__, -2055) 95 | #undef MAKE_KEYWORD_TOKEN 96 | #endif 97 | 98 | #ifdef TOKEN 99 | // defined specifically for comment parsing work 100 | TOKEN(tok_forward_slash, '/') 101 | TOKEN(tok_id, -1) 102 | TOKEN(tok_num_literal, -2) 103 | TOKEN(tok_str_literal, -3) 104 | TOKEN(tok_line_comment, -4) 105 | TOKEN(tok_block_comment, -5) 106 | TOKEN(tok_newline, -6) 107 | TOKEN(tok_tab, -7) 108 | // special embed stream token 109 | TOKEN(tok_pp_embed, -8) 110 | #endif 111 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/parse/ast_module.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace a_c_compiler { 20 | enum function_specifier { 21 | funcspec_inline = 0x1, 22 | funcspec__Noreturn = 0x2, 23 | }; 24 | 25 | enum class type_modifier : unsigned char { 26 | tm_none = 0, 27 | tm_signed, 28 | tm_unsigned, 29 | tm__Atomic, // _Atomic(int) 30 | }; 31 | 32 | enum class type_category : unsigned char { 33 | tc_none, 34 | tc_void, 35 | tc_bool, 36 | tc_char, 37 | tc_short, 38 | tc_int, 39 | tc_long, 40 | tc_longlong, 41 | tc__BitInt, 42 | tc_float, 43 | tc_double, 44 | tc_longdouble, 45 | tc_longlongdouble, 46 | tc_union, 47 | tc_struct, 48 | tc_enum, 49 | tc_function, 50 | tc_array, 51 | tc_variable_length_array, 52 | tc_vla = tc_variable_length_array, 53 | tc_data_pointer, 54 | tc_function_pointer, 55 | tc_nullptr, 56 | tc_auto, // inferred type 57 | tc__Padding, // extension: struct foo { int meow; _Padding(16) padding; uint16_t bark; }; 58 | tc_array_span // extension: array span 59 | }; 60 | 61 | enum class qualifier : unsigned char { 62 | none = 0b0000, 63 | q_const = 0b0001, 64 | q__Atomic = 0b0010, // e.g. _Atomic int 65 | q_volatile = 0b0100, 66 | q_restrict = 0b1000, 67 | }; 68 | 69 | enum storage_class_specifier : unsigned short { 70 | none = 0b00000000000, 71 | scs_static = 0b00000000001, 72 | scs_extern = 0b00000000010, 73 | scs_constexpr = 0b00000000100, 74 | scs_register = 0b00000001000, 75 | scs_thread_local = 0b00000010000, 76 | scs_typedef = 0b00000100000, 77 | scs_auto = 0b00001000000 78 | }; 79 | using sc_specifier = storage_class_specifier; 80 | 81 | struct type_data; 82 | struct type { 83 | constexpr type() noexcept = default; 84 | constexpr type(const type&) noexcept = default; 85 | constexpr type(type&&) noexcept = default; 86 | constexpr type& operator=(const type&) noexcept = default; 87 | constexpr type& operator=(type&&) noexcept = default; 88 | 89 | constexpr explicit type(std::size_t ref_index) noexcept : m_ref(ref_index) { 90 | } 91 | 92 | constexpr std::size_t index() const noexcept { 93 | return m_ref; 94 | } 95 | 96 | type_data& data() const noexcept; 97 | 98 | private: 99 | std::uint_least32_t m_ref; 100 | }; 101 | 102 | struct type_data { 103 | type_modifier modifier; 104 | type_category category = type_category::tc_none; 105 | qualifier qualifiers; 106 | std::uint32_t specifiers; 107 | std::size_t bit_size; // for _BitInt and _Padding and friends 108 | std::vector sub_types; 109 | 110 | static type get_new_type(); 111 | 112 | type& pointee_type() { 113 | ZTD_ASSERT_MESSAGE("Must be a pointer type.", 114 | category == type_category::tc_data_pointer 115 | || category == type_category::tc_function_pointer); 116 | ZTD_ASSERT_MESSAGE( 117 | "There must be at least 1 available sub-type.", sub_types.size() == 1); 118 | return sub_types[0]; 119 | } 120 | 121 | type& element_type() { 122 | ZTD_ASSERT_MESSAGE("Must be an array type.", 123 | category == type_category::tc_array || category == type_category::tc_vla 124 | || category == type_category::tc_array_span); 125 | ZTD_ASSERT_MESSAGE( 126 | "There must be at least 1 available sub-type.", sub_types.size() == 1); 127 | return sub_types[0]; 128 | } 129 | 130 | std::span member_types() { 131 | ZTD_ASSERT_MESSAGE("Must be a structure or union type.", 132 | category == type_category::tc_struct || category == type_category::tc_union); 133 | return sub_types; 134 | } 135 | 136 | type& return_type() { 137 | ZTD_ASSERT_MESSAGE( 138 | "Must be a function type.", category == type_category::tc_function); 139 | ZTD_ASSERT_MESSAGE("Must have at least one type.", sub_types.size() >= 1); 140 | // Return Type + Parameter Layout 141 | // [ R | P | P | P ] 142 | // ^ ^ 143 | // [ R ] 144 | // ^ 145 | return sub_types[0]; 146 | } 147 | 148 | std::span parameter_types() { 149 | ZTD_ASSERT_MESSAGE( 150 | "Must be a function type.", category == type_category::tc_function); 151 | ZTD_ASSERT_MESSAGE("Must have at least one type.", sub_types.size() >= 1); 152 | // Return Type + Parameter Layout 153 | // [ R | P | P | P ] 154 | // ^ ^ 155 | // [ R ] 156 | // ^ 157 | return std::span(sub_types.data() + 1, sub_types.data() + sub_types.size()); 158 | } 159 | }; 160 | 161 | struct static_assert_declaration { }; 162 | struct operator_declaration { }; 163 | struct alias_declaration { }; 164 | struct attribute { 165 | std::vector tokens; 166 | }; 167 | 168 | struct typed_ast_node { 169 | type t; 170 | }; 171 | 172 | struct member_declaration { 173 | type t; 174 | std::size_t alignment; 175 | std::size_t bit_field_size; 176 | std::size_t bit_field_position; // extension 177 | }; 178 | 179 | struct struct_declaration { 180 | type t; 181 | std::size_t alignment; 182 | std::vector attributes; 183 | std::vector members; 184 | }; 185 | 186 | struct parameter_declaration { 187 | type t; 188 | }; 189 | 190 | struct function_declaration { 191 | type t; 192 | unsigned char funcspecs = 0; 193 | std::vector attributes; 194 | std::vector members; 195 | }; 196 | 197 | struct statement { 198 | std::vector tokens; 199 | }; 200 | 201 | struct compound_statement { 202 | std::vector statements; 203 | }; 204 | 205 | struct function_definition { 206 | function_declaration declaration; 207 | compound_statement body; 208 | }; 209 | 210 | using declaration = std::variant; 212 | 213 | using external_declaration = std::variant; 214 | 215 | struct translation_unit { 216 | std::vector declarations; 217 | }; 218 | 219 | struct ast_module { 220 | translation_unit top_level; 221 | 222 | void dump() const { 223 | /* */ 224 | } 225 | }; 226 | } /* namespace a_c_compiler */ 227 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/parse/ast_node.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | namespace a_c_compiler { 10 | struct ast_node { }; 11 | } /* namespace a_c_compiler */ 12 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/parse/parse.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace a_c_compiler { 16 | 17 | ast_module parse(token_vector const& toks, const global_options& global_opts, 18 | diagnostic_handles& diag_handles) noexcept; 19 | 20 | } /* namespace a_c_compiler */ 21 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/parse/parser_diagnostic.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace a_c_compiler { 12 | enum class parser_diagnostic_id { 13 | #define DIAGNOSTIC(SRC_NAME, FMT_STRING) SRC_NAME, 14 | #include "parser_diagnostic.inl.h" 15 | #undef DIAGNOSTIC 16 | }; 17 | 18 | struct parser_diagnostic { 19 | parser_diagnostic_id id; 20 | std::string_view format; 21 | }; 22 | 23 | namespace parser_err { 24 | #define DIAGNOSTIC(SRC_NAME, FMT_STRING) \ 25 | inline constexpr const parser_diagnostic SRC_NAME { parser_diagnostic_id::SRC_NAME, \ 26 | FMT_STRING }; 27 | #include "parser_diagnostic.inl.h" 28 | #undef DIAGNOSTIC 29 | } // namespace parser_err 30 | } // namespace a_c_compiler 31 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/parse/parser_diagnostic.inl.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #ifdef DIAGNOSTIC 9 | DIAGNOSTIC(out_of_tokens, "out of tokens") 10 | DIAGNOSTIC(unrecognized_token, "unrecognized token '{}'") 11 | DIAGNOSTIC(unimplemented_keyword, "unimplemented keyword '{}'") 12 | DIAGNOSTIC(expected_attribute_identifier, 13 | "expected an identifier, or a double colon (`::`)-joined set of identifiers") 14 | DIAGNOSTIC(unbalanced_token_sequence, 15 | "expected a balanced set of parentheses, square brackets, or curly brackets, but received an " 16 | "unexpected {}") 17 | #endif 18 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/parse/parser_diagnostic_reporter.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace a_c_compiler { 15 | 16 | struct parser_diagnostic_reporter { 17 | constexpr parser_diagnostic_reporter(diagnostic_handles& handles) noexcept 18 | : m_handles(handles) { 19 | } 20 | 21 | [[nodiscard]] diagnostic_handles& handles() noexcept { 22 | return this->m_handles; 23 | } 24 | 25 | template 26 | void report(parser_diagnostic const& diagnostic, std::string_view file_name, 27 | file_offset_info const& source_location, FmtArgs&&... format_args) noexcept; 28 | 29 | private: 30 | diagnostic_handles& m_handles; 31 | }; 32 | 33 | } /* namespace a_c_compiler */ 34 | 35 | 36 | #include "parser_diagnostic_reporter.template.h" 37 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/parse/parser_diagnostic_reporter.template.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | 10 | #include "parser_diagnostic_reporter.h" 11 | 12 | #include "parser_diagnostic.h" 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace a_c_compiler { 21 | template 22 | void parser_diagnostic_reporter::report(parser_diagnostic const& diagnostic, 23 | std::string_view file_name, file_offset_info const& source_Location, 24 | FmtArgs&&... format_args) noexcept { 25 | file_name = file_name.size() ? file_name : ""; 26 | fmt::print(this->m_handles.error_handle(), "{} ({}, {})\n❌ ", file_name, 27 | source_Location.lineno, source_Location.column); 28 | fmt::vprint(this->m_handles.error_handle(), diagnostic.format, 29 | fmt::make_format_args(format_args...)); 30 | fmt::print(this->m_handles.error_handle(), "\n"); 31 | } 32 | } // namespace a_c_compiler 33 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/reporting/diagnostic_handles.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | namespace a_c_compiler { 16 | 17 | struct diagnostic_handles { 18 | diagnostic_handles() noexcept; 19 | diagnostic_handles(std::ostream& stdout_handle, FILE* arg_c_stdout_handle, 20 | std::ostream& stderr_handle, FILE* arg_c_stderr_handle, 21 | std::optional arg_c_maybe_error_handle = std::nullopt, 22 | std::ostream* maybe_error_handle = nullptr, 23 | std::optional arg_c_maybe_warning_handle = std::nullopt, 24 | std::ostream* maybe_warning_handle = nullptr, 25 | std::optional arg_c_maybe_debug_handle = std::nullopt, 26 | std::ostream* maybe_debug_handle = nullptr, 27 | std::optional arg_c_maybe_info_handle = std::nullopt, 28 | std::ostream* maybe_info_handle = nullptr) noexcept; 29 | 30 | [[nodiscard]] constexpr FILE* c_stdout_handle() const noexcept { 31 | return this->m_c_stdout_handle; 32 | } 33 | 34 | [[nodiscard]] constexpr FILE* c_stderr_handle() const noexcept { 35 | return this->m_c_stderr_handle; 36 | } 37 | 38 | [[nodiscard]] constexpr FILE* c_error_handle() const noexcept { 39 | return this->m_maybe_c_error_handle ? this->m_maybe_c_error_handle.value() 40 | : this->c_stderr_handle(); 41 | } 42 | 43 | [[nodiscard]] constexpr FILE* c_warning_handle() const noexcept { 44 | return this->m_maybe_c_warning_handle ? this->m_maybe_c_warning_handle.value() 45 | : this->c_stderr_handle(); 46 | } 47 | 48 | [[nodiscard]] constexpr FILE* c_debug_handle() const noexcept { 49 | return this->m_maybe_c_debug_handle ? this->m_maybe_c_debug_handle.value() 50 | : this->c_stderr_handle(); 51 | } 52 | 53 | [[nodiscard]] constexpr FILE* c_info_handle() const noexcept { 54 | return this->m_maybe_c_info_handle ? this->m_maybe_c_info_handle.value() 55 | : this->c_stderr_handle(); 56 | } 57 | 58 | [[nodiscard]] constexpr std::ostream& stdout_handle() const noexcept { 59 | return this->m_stdout_handle; 60 | } 61 | 62 | [[nodiscard]] constexpr std::ostream& stderr_handle() const noexcept { 63 | return this->m_stderr_handle; 64 | } 65 | 66 | [[nodiscard]] constexpr std::ostream& error_handle() const noexcept { 67 | return this->m_maybe_error_handle ? *this->m_maybe_error_handle 68 | : this->stderr_handle(); 69 | } 70 | 71 | [[nodiscard]] constexpr std::ostream& warning_handle() const noexcept { 72 | return this->m_maybe_warning_handle ? *this->m_maybe_warning_handle 73 | : this->stderr_handle(); 74 | } 75 | 76 | [[nodiscard]] constexpr std::ostream& debug_handle() const noexcept { 77 | return this->m_maybe_debug_handle ? *this->m_maybe_debug_handle 78 | : this->stderr_handle(); 79 | } 80 | 81 | [[nodiscard]] constexpr std::ostream& info_handle() const noexcept { 82 | return this->m_maybe_info_handle ? *this->m_maybe_info_handle 83 | : this->stderr_handle(); 84 | } 85 | 86 | private: 87 | FILE* m_c_stdout_handle; 88 | FILE* m_c_stderr_handle; 89 | std::ostream& m_stdout_handle; 90 | std::ostream& m_stderr_handle; 91 | 92 | std::optional m_maybe_c_error_handle; 93 | std::optional m_maybe_c_warning_handle; 94 | std::optional m_maybe_c_debug_handle; 95 | std::optional m_maybe_c_info_handle; 96 | std::ostream* m_maybe_error_handle; 97 | std::ostream* m_maybe_warning_handle; 98 | std::ostream* m_maybe_debug_handle; 99 | std::ostream* m_maybe_info_handle; 100 | }; 101 | } // namespace a_c_compiler 102 | -------------------------------------------------------------------------------- /fe/include/a_c_compiler/fe/reporting/logger.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace a_c_compiler { 19 | 20 | struct log_collection { }; 21 | 22 | struct logger { 23 | inline static constexpr const std::size_t default_indent_width = 1; 24 | 25 | constexpr logger(std::ostream& arg_stream, FILE* arg_c_stream, 26 | std::size_t arg_indent_width = default_indent_width) noexcept 27 | : m_stream(arg_stream) 28 | , m_c_stream(arg_c_stream) 29 | , m_indent_level(0) 30 | , m_indent_width(arg_indent_width) { 31 | } 32 | 33 | void indent() noexcept; 34 | void incr_indent() noexcept; 35 | void decr_indent() noexcept; 36 | 37 | [[nodiscard]] constexpr FILE* c_handle() const noexcept { 38 | return m_c_stream; 39 | } 40 | 41 | [[nodiscard]] constexpr std::ostream& handle() const noexcept { 42 | return m_stream; 43 | } 44 | 45 | [[nodiscard]] constexpr std::size_t indent_width() const noexcept { 46 | return m_indent_width; 47 | } 48 | 49 | [[nodiscard]] constexpr std::size_t indent_level() const noexcept { 50 | return m_indent_level; 51 | } 52 | 53 | private: 54 | std::ostream& m_stream; 55 | FILE* m_c_stream; 56 | std::size_t m_indent_level; 57 | std::size_t m_indent_width; 58 | }; 59 | 60 | struct scope_logger { 61 | scope_logger(std::string scope_name, std::function&& entry_callback, 62 | logger& target, std::optional logfile = std::nullopt) noexcept; 63 | 64 | ~scope_logger() noexcept; 65 | 66 | private: 67 | logger& m_logger; 68 | std::string scope_name; 69 | std::optional logfile; 70 | 71 | void indent() noexcept; 72 | }; 73 | } // namespace a_c_compiler 74 | -------------------------------------------------------------------------------- /fe/sources/a_c_compiler/fe/lex/lex.cpp: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | /* Place lexed literals and identifiers in these vectors for the parser to 20 | * access later. Leave numeric literals as strings becaues it is the parser's 21 | * job to figure out what type the literal should be parsed to. */ 22 | static std::vector lexed_numeric_literals; 23 | static std::vector lexed_string_literals; 24 | static std::vector lexed_ids; 25 | 26 | namespace a_c_compiler { 27 | 28 | /* Maps tokens to offsets into source files */ 29 | static std::vector token_source_info; 30 | 31 | file_offset_info source_info_for_token(size_t tok_idx) { 32 | return token_source_info[tok_idx]; 33 | } 34 | 35 | void dump_tokens_into(token_vector const& toks, std::ostream& output_stream) noexcept { 36 | size_t id_idx = 0, numlit_idx = 0, strlit_idx = 0; 37 | static constexpr size_t width = 15; 38 | output_stream << std::setw(width) << "line:column" 39 | << " | token\n"; 40 | for (auto [t, foi] : toks) { 41 | std::stringstream ss; 42 | ss << foi.lineno << ":" << foi.column; 43 | output_stream << std::setw(width) << ss.str() << " | "; 44 | switch (t) { 45 | 46 | #define CHAR_TOKEN(TOK, LIT) \ 47 | case TOK: \ 48 | output_stream << #TOK; \ 49 | break; 50 | 51 | #define KEYWORD_TOKEN(TOK, LIT, KEYWORD) \ 52 | case TOK: \ 53 | output_stream << #TOK; \ 54 | break; 55 | 56 | #include 57 | #undef CHAR_TOKEN 58 | #undef KEYWORD_TOKEN 59 | 60 | case tok_block_comment: 61 | output_stream << "tok_block_comment"; 62 | break; 63 | 64 | case tok_line_comment: 65 | output_stream << "tok_line_comment"; 66 | break; 67 | 68 | case tok_newline: 69 | output_stream << "tok_newline"; 70 | break; 71 | 72 | case tok_tab: 73 | output_stream << "tok_tab"; 74 | break; 75 | 76 | case tok_id: 77 | output_stream << "tok_id: " << lexed_id(id_idx++); 78 | break; 79 | 80 | case tok_num_literal: 81 | output_stream << "tok_num_literal: " << lexed_numeric_literal(numlit_idx++); 82 | break; 83 | 84 | case tok_str_literal: 85 | output_stream << "str_literal: " << lexed_string_literal(strlit_idx++); 86 | break; 87 | 88 | case tok_pp_embed: 89 | output_stream << "pp_embed_literal"; 90 | break; 91 | 92 | default: 93 | ZTD_ASSERT_MESSAGE("Got invalid token", false); 94 | } 95 | output_stream << "\n"; 96 | } 97 | } 98 | 99 | void dump_tokens(token_vector const& toks) noexcept { 100 | dump_tokens_into(toks, std::cout); 101 | } 102 | 103 | std::string_view lexed_numeric_literal(size_t index) noexcept { 104 | return lexed_numeric_literals[index].data(); 105 | } 106 | std::string_view lexed_id(size_t index) noexcept { 107 | return lexed_ids[index].data(); 108 | } 109 | std::string_view lexed_string_literal(size_t index) noexcept { 110 | return lexed_string_literals[index].data(); 111 | } 112 | 113 | token_vector lex(fs::path const& source_file, const global_options& global_opts, 114 | diagnostic_handles& diag_handles) noexcept { 115 | token_vector toks; 116 | toks.reserve(2048); 117 | 118 | FILE* fp = 119 | #if ZTD_IS_ON(ZTD_LIBVCXX) 120 | _wfopen(source_file.c_str(), L"r") 121 | #else 122 | std::fopen(source_file.c_str(), "r") 123 | #endif 124 | ; 125 | ZTD_ASSERT_MESSAGE("Couldn't open file", fp); 126 | char c; 127 | size_t lineno = 0, column = 0; 128 | 129 | /* Get next character while tracking source location info */ 130 | auto getc = [&]() { 131 | c = fgetc(fp); 132 | if (c == '\n') { 133 | lineno++; 134 | column = 0; 135 | } 136 | else { 137 | column++; 138 | } 139 | return c; 140 | }; 141 | 142 | c = std::fgetc(fp); 143 | while (c != EOF) { 144 | switch (c) { 145 | case ' ': 146 | break; 147 | 148 | case '\t': 149 | // toks.push_back({ tok_tab, { lineno, column } }); 150 | break; 151 | 152 | /* Handle comments */ 153 | case '/': { 154 | file_offset_info foi { lineno, column }; 155 | getc(); 156 | /* Line comment */ 157 | if (c == '/') { 158 | while (c != '\n') { 159 | getc(); 160 | } 161 | toks.push_back({ tok_line_comment, foi }); 162 | break; 163 | } 164 | 165 | /* Block comment */ 166 | else if (c == '*') { 167 | while (true) { 168 | if (getc() == '*' && getc() == '/') { 169 | toks.push_back({ tok_block_comment, foi }); 170 | break; 171 | } 172 | ZTD_ASSERT_MESSAGE("unterminated block comment", c != EOF); 173 | } 174 | break; 175 | } 176 | else { 177 | toks.push_back({ tok_forward_slash, foi }); 178 | } 179 | } 180 | 181 | /* Char-like tokens */ 182 | #define CHAR_TOKEN(TOK, LIT) case LIT: 183 | #include 184 | toks.push_back({ (token_id)c, { lineno, column } }); 185 | break; 186 | #undef CHAR_TOKEN 187 | 188 | case '\n': 189 | toks.push_back({ tok_newline, { lineno, column } }); 190 | break; 191 | 192 | case '"': { 193 | toks.push_back({ tok_str_literal, { lineno, column } }); 194 | std::string lit = ""; 195 | while (getc() != '"') { 196 | lit += c; 197 | } 198 | lexed_string_literals.push_back(lit); 199 | } break; 200 | 201 | /* Numeric literals */ 202 | case '0': 203 | case '1': 204 | case '2': 205 | case '3': 206 | case '4': 207 | case '5': 208 | case '6': 209 | case '7': 210 | case '8': 211 | case '9': 212 | case '.': { 213 | file_offset_info foi { lineno, column }; 214 | std::string lit = ""; 215 | lit += c; 216 | while (std::isdigit(getc()) || c == '.') { 217 | lit += c; 218 | } 219 | 220 | /* numeric literal type suffixes */ 221 | if (c == 'f' or c == 'd') { 222 | lit += c; 223 | } 224 | else { 225 | ungetc(c, fp); 226 | } 227 | 228 | lexed_numeric_literals.push_back(lit); 229 | toks.push_back({ tok_num_literal, foi }); 230 | break; 231 | } 232 | } 233 | 234 | /* Identifier */ 235 | if (std::isalpha(c) or c == '_') { 236 | file_offset_info foi { lineno, column }; 237 | std::string lit = ""; 238 | lit += c; 239 | 240 | while (std::isalnum(c = fgetc(fp)) or c == '_') { 241 | lit += c; 242 | } 243 | std::ungetc(c, fp); 244 | if (false) { } 245 | // if it matches a keyword's spelling, it's a keyword 246 | #define KEYWORD_TOKEN(TOK, INTVAL, KEYWORD) \ 247 | else if (lit == #KEYWORD) { \ 248 | toks.push_back({ TOK, foi }); \ 249 | } 250 | #include 251 | #undef KEYWORD_TOKEN 252 | else { 253 | lexed_ids.push_back(lit); 254 | toks.push_back({ tok_id, foi }); 255 | } 256 | } 257 | getc(); 258 | } 259 | return toks; 260 | } 261 | 262 | } // namespace a_c_compiler 263 | -------------------------------------------------------------------------------- /fe/sources/a_c_compiler/fe/parse/ast_module.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | 6 | namespace a_c_compiler { 7 | namespace { 8 | std::vector type_data_table; 9 | } 10 | 11 | type type_data::get_new_type() { 12 | std::size_t index = type_data_table.size(); 13 | type_data_table.push_back(type_data {}); 14 | return type { index }; 15 | } 16 | 17 | type_data& type::data() const noexcept { 18 | return type_data_table[m_ref]; 19 | } 20 | } // namespace a_c_compiler 21 | -------------------------------------------------------------------------------- /fe/sources/a_c_compiler/fe/parse/parse.cpp: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #define DEBUGGING() this->global_opts.get_feature_flag(1, 0x1) 19 | #define DEBUG(FORMATSTR, ...) \ 20 | if (DEBUGGING()) { \ 21 | this->m_logger.indent(); \ 22 | std::fprintf( \ 23 | this->m_debug_logger.c_handle(), "parser:%s:" FORMATSTR, __func__, __VA_ARGS__); \ 24 | } 25 | #define DEBUGS(DEBUGSTR) \ 26 | if (DEBUGGING()) { \ 27 | this->m_logger.indent(); \ 28 | std::fprintf(this->m_debug_logger.c_handle(), "parser:%s:" DEBUGSTR, __func__); \ 29 | } 30 | #define ENTER_PARSE_FUNCTION() \ 31 | scope_logger current_scope_logger( \ 32 | __func__, \ 33 | [&](logger& logger) { \ 34 | auto loc = this->current_token().source_location; \ 35 | std::fprintf(logger.c_handle(), ":%zu:%zu:", loc.lineno, loc.column); \ 36 | }, \ 37 | this->m_debug_logger); 38 | 39 | namespace a_c_compiler { 40 | 41 | struct parser { 42 | using next_token_t = std::expected, 43 | std::reference_wrapper>; 44 | 45 | std::size_t m_toks_index; 46 | std::size_t m_last_identifier_string_index = 0; 47 | std::vector m_token_history_stack; 48 | token_vector const& m_toks; 49 | parser_diagnostic_reporter& m_reporter; 50 | const global_options& m_global_opts; 51 | logger m_debug_logger; 52 | 53 | /* Which ident are we operating on? This allows us to get a handle to the 54 | * string representation of an id. */ 55 | std::size_t id_index = 0; 56 | 57 | constexpr parser(std::size_t toks_index, token_vector const& toks, 58 | parser_diagnostic_reporter& reporter, const global_options& global_opts) noexcept 59 | : m_toks_index(toks_index) 60 | , m_toks(toks) 61 | , m_reporter(reporter) 62 | , m_global_opts(global_opts) 63 | , m_debug_logger( 64 | reporter.handles().debug_handle(), reporter.handles().c_debug_handle(), 1) { 65 | } 66 | 67 | const token& current_token() noexcept { 68 | const token& target_token = m_toks[m_toks_index]; 69 | return target_token; 70 | } 71 | 72 | next_token_t peek_token(std::size_t peek_by = 1) noexcept { 73 | size_t current_toks_index = m_toks_index; 74 | if (m_toks.size() - current_toks_index < peek_by) { 75 | return std::unexpected(parser_err::out_of_tokens); 76 | } 77 | const token& target_token = m_toks[m_toks_index + peek_by]; 78 | return target_token; 79 | } 80 | 81 | next_token_t get_next_token() noexcept { 82 | ++m_toks_index; 83 | if (m_toks_index == m_toks.size()) { 84 | return std::unexpected(parser_err::out_of_tokens); 85 | } 86 | const token& target_token = m_toks[m_toks_index]; 87 | return target_token; 88 | } 89 | 90 | void advance_token_index(std::size_t advance_by = 1) noexcept { 91 | ZTD_ASSERT_MESSAGE("Cannot advance beyond end of stream", 92 | advance_by < (m_toks.size() - m_toks_index)); 93 | m_toks_index += advance_by; 94 | } 95 | 96 | void recede_token_index(std::size_t recede_by = 1) noexcept { 97 | ZTD_ASSERT_MESSAGE( 98 | "Cannot recede past beginning of token stream", recede_by > m_toks_index); 99 | m_toks_index -= recede_by; 100 | } 101 | 102 | void unget_token() noexcept { 103 | ZTD_ASSERT_MESSAGE( 104 | "Cannot unget token at beginning of token stream", m_toks_index > 0); 105 | m_toks_index--; 106 | } 107 | 108 | void eat_token(token_id expected_token) noexcept { 109 | auto maybe_tok = get_next_token(); 110 | ZTD_ASSERT_MESSAGE("expected token", maybe_tok); 111 | token got_token = maybe_tok.value(); 112 | ZTD_ASSERT_MESSAGE("got unexpected token", got_token.id == expected_token); 113 | } 114 | 115 | void pop_token_index() { 116 | m_toks_index = m_token_history_stack.back(); 117 | m_token_history_stack.pop_back(); 118 | } 119 | 120 | void push_token_index() { 121 | m_token_history_stack.push_back(m_toks_index); 122 | } 123 | 124 | bool has_more_tokens() noexcept { 125 | return m_toks_index < m_toks.size(); 126 | } 127 | 128 | token find_first_of(const std::vector& toks) noexcept { 129 | push_token_index(); 130 | while (std::find(toks.begin(), toks.end(), current_token().id) == toks.end()) { 131 | if (!get_next_token().has_value()) { 132 | break; 133 | } 134 | } 135 | token found_tok = current_token(); 136 | pop_token_index(); 137 | return found_tok; 138 | } 139 | 140 | std::string_view next_id_value() noexcept { 141 | id_index++; 142 | return current_id_value(); 143 | } 144 | 145 | std::string_view current_id_value() const noexcept { 146 | return lexed_id(id_index); 147 | } 148 | 149 | #define KEYWORD_TOKEN(TOK, INTVAL, KEYWORD) \ 150 | bool parse_##KEYWORD(translation_unit& tu) { \ 151 | auto const& tok = current_token(); \ 152 | m_reporter.report(parser_err::unimplemented_keyword, "", tok.source_location, #KEYWORD); \ 153 | return false; \ 154 | } 155 | #include 156 | #undef KEYWORD_TOKEN 157 | 158 | using maybe_attribute_t 159 | = std::expected>; 160 | 161 | enum class balanced_token_seq_behavior { 162 | normal = 0b00, 163 | ignore_initial = 0b01, 164 | ignore_all = 0b10 165 | }; 166 | 167 | enum class balanced_delimeter { 168 | parenthesis = 0, 169 | square_bracket = 1, 170 | curly_bracket = 2, 171 | }; 172 | 173 | struct seen_delimeter { 174 | size_t& count; 175 | balanced_delimeter delimeter; 176 | }; 177 | 178 | struct balanced_token_delimeters { 179 | size_t parentheses; 180 | size_t curly_brackets; 181 | size_t square_brackets; 182 | std::optional last_seen; 183 | 184 | [[nodiscard]] constexpr bool unclosed_delimeters() const noexcept { 185 | return last_seen.has_value(); 186 | } 187 | }; 188 | 189 | template 190 | balanced_token_delimeters consume_balanced_token_sequence(OnToken&& on_token, 191 | balanced_token_seq_behavior behavior, 192 | balanced_delimeter initial_delimeter = balanced_delimeter::parenthesis) noexcept { 193 | balanced_token_delimeters delimeters = {}; 194 | std::vector delimeter_stack( 195 | &initial_delimeter, &initial_delimeter + 1); 196 | 197 | for (; delimeters.unclosed_delimeters();) { 198 | if (!has_more_tokens()) { 199 | break; 200 | } 201 | // keep going until the sequence is terminated 202 | const token& tok = current_token(); 203 | const auto maybe_manage_delimeter = [&tok, &delimeter_stack, 204 | &delimeters]() noexcept { 205 | switch (tok.id) { 206 | case tok_l_paren: 207 | delimeter_stack.push_back(balanced_delimeter::parenthesis); 208 | ++delimeters.parentheses; 209 | break; 210 | case tok_l_square_bracket: 211 | delimeter_stack.push_back(balanced_delimeter::square_bracket); 212 | ++delimeters.square_brackets; 213 | break; 214 | case tok_l_curly_bracket: 215 | delimeter_stack.push_back(balanced_delimeter::curly_bracket); 216 | ++delimeters.curly_brackets; 217 | break; 218 | case tok_r_paren: 219 | if (delimeter_stack.back() != balanced_delimeter::parenthesis) { 220 | return; 221 | } 222 | delimeter_stack.pop_back(); 223 | --delimeters.parentheses; 224 | break; 225 | case tok_r_square_bracket: 226 | if (delimeter_stack.back() != balanced_delimeter::square_bracket) { 227 | return; 228 | } 229 | delimeter_stack.pop_back(); 230 | --delimeters.square_brackets; 231 | break; 232 | case tok_r_curly_bracket: 233 | if (delimeter_stack.back() != balanced_delimeter::curly_bracket) { 234 | return; 235 | } 236 | delimeter_stack.pop_back(); 237 | --delimeters.curly_brackets; 238 | break; 239 | default: 240 | break; 241 | } 242 | }; 243 | maybe_manage_delimeter(); 244 | if (behavior == balanced_token_seq_behavior::ignore_initial 245 | && delimeter_stack.size() == 0) { 246 | break; 247 | } 248 | if (!on_token(tok)) { 249 | break; 250 | } 251 | } 252 | if (!delimeter_stack.empty()) { 253 | delimeters.last_seen = delimeter_stack.back(); 254 | } 255 | return delimeters; 256 | } 257 | 258 | maybe_attribute_t parse_attribute() noexcept { 259 | // `attribute-token` 260 | // TODO: store attribute token names 261 | attribute attr {}; 262 | const token& first_token = current_token(); 263 | if (first_token.id != tok_id) { 264 | // failure 265 | return std::unexpected(parser_err::expected_attribute_identifier); 266 | } 267 | attr.tokens.push_back(first_token); 268 | advance_token_index(1); 269 | constexpr const auto next_token_is_colon = [](const next_token_t& maybe_next_tok) { 270 | return maybe_next_tok.has_value() && maybe_next_tok->get().id == tok_colon; 271 | }; 272 | for (; current_token().id == tok_colon && next_token_is_colon(peek_token());) { 273 | advance_token_index(2); 274 | const token& expecting_id_tok = current_token(); 275 | if (expecting_id_tok.id != tok_id) { 276 | // failure 277 | return std::unexpected(parser_err::expected_attribute_identifier); 278 | } 279 | attr.tokens.push_back(expecting_id_tok); 280 | advance_token_index(1); 281 | } 282 | if (current_token().id == tok_l_paren) { 283 | // expect attribute arguments are this point, and consume a balanced 284 | // token sequence, started with 285 | // `( balanced-token-seq )` 286 | // consume all attribute arguments 287 | attr.tokens.push_back(current_token()); 288 | const auto on_token = [&attr](const token& tok) noexcept { 289 | attr.tokens.push_back(tok); 290 | return true; 291 | }; 292 | advance_token_index(1); 293 | balanced_token_delimeters delimeters = consume_balanced_token_sequence(on_token, 294 | balanced_token_seq_behavior::ignore_initial, 295 | balanced_delimeter::parenthesis); 296 | if (delimeters.unclosed_delimeters()) { 297 | const token& stop_token = current_token(); 298 | m_reporter.report(parser_err::unbalanced_token_sequence, "", 299 | stop_token.source_location, (char)stop_token.id); 300 | } 301 | } 302 | return attr; 303 | } 304 | 305 | size_t parse_attribute_list(std::vector& attributes) noexcept { 306 | size_t number_of_successfully_parsed_attributes = 0; 307 | // loop until double r square bracket 308 | for (;;) { 309 | const token& tok = current_token(); 310 | switch (tok.id) { 311 | case tok_comma: 312 | // this means we COULD get another attribute; just loop around 313 | break; 314 | case tok_r_square_bracket: { 315 | auto maybe_expected_second_r_square_bracket = peek_token(); 316 | if (!maybe_expected_second_r_square_bracket.has_value()) { 317 | return number_of_successfully_parsed_attributes; // there are no more 318 | // tokens? No more 319 | // attributes. 320 | } 321 | const token& expected_second_r_square_bracket 322 | = *maybe_expected_second_r_square_bracket; 323 | if (expected_second_r_square_bracket.id == tok_r_square_bracket) { 324 | // we are finished and we can leave. 325 | return number_of_successfully_parsed_attributes; 326 | } 327 | } break; 328 | default: { 329 | auto maybe_current_attribute = parse_attribute(); 330 | if (maybe_current_attribute.has_value()) { 331 | attributes.push_back(std::move(*maybe_current_attribute)); 332 | ++number_of_successfully_parsed_attributes; 333 | } 334 | } break; 335 | } 336 | } 337 | return true; 338 | } 339 | 340 | size_t parse_attribute_specifier_sequence( 341 | translation_unit& tu, std::vector& attributes) noexcept { 342 | ENTER_PARSE_FUNCTION(); 343 | std::size_t number_of_successfully_parsed_attribute_specifiers = 0; 344 | for (;;) { 345 | auto maybe_expected_second_l_square_bracket = peek_token(); 346 | const token& expected_l_square_bracket = current_token(); 347 | if (expected_l_square_bracket.id != tok_l_square_bracket 348 | || !maybe_expected_second_l_square_bracket.has_value()) { 349 | return number_of_successfully_parsed_attribute_specifiers; 350 | } 351 | const token& expected_second_l_square_bracket 352 | = *maybe_expected_second_l_square_bracket; 353 | if (expected_second_l_square_bracket.id != tok_l_square_bracket) { 354 | return number_of_successfully_parsed_attribute_specifiers; 355 | } 356 | advance_token_index(2); 357 | size_t successfully_parsed_attributes = parse_attribute_list(attributes); 358 | number_of_successfully_parsed_attribute_specifiers 359 | += successfully_parsed_attributes; 360 | eat_token(tok_r_square_bracket); 361 | } 362 | 363 | return number_of_successfully_parsed_attribute_specifiers; 364 | } 365 | 366 | bool parse_storage_class_specifier( 367 | translation_unit& tu, function_definition& fd, type ty) noexcept { 368 | ENTER_PARSE_FUNCTION(); 369 | switch (current_token().id) { 370 | case tok_keyword_static: 371 | ty.data().specifiers |= storage_class_specifier::scs_static; 372 | break; 373 | case tok_keyword_extern: 374 | ty.data().specifiers |= storage_class_specifier::scs_extern; 375 | break; 376 | case tok_keyword_constexpr: 377 | ty.data().specifiers |= storage_class_specifier::scs_constexpr; 378 | break; 379 | case tok_keyword_register: 380 | ty.data().specifiers |= storage_class_specifier::scs_register; 381 | break; 382 | case tok_keyword_thread_local: 383 | ty.data().specifiers |= storage_class_specifier::scs_thread_local; 384 | break; 385 | case tok_keyword_typedef: 386 | ty.data().specifiers |= storage_class_specifier::scs_typedef; 387 | break; 388 | case tok_keyword_auto: 389 | // While the C standard says this is a Storage Class Specifier, that is 390 | // literally just a Temporary Stop Gap™ input by the C Committee 391 | // because we did not have enough time to write the rules out for 392 | // it being a proper type versus storage class specifier. It should allow for 393 | // both, provided it has `auto auto` when we want to use it to make a 394 | // block-scope variable whose type is automatically deduced. 395 | // 396 | // Otherwise, `auto` by itself means type deduction unless there's a real type 397 | // in the type name at some point. 398 | ty.data().specifiers |= storage_class_specifier::scs_auto; 399 | break; 400 | default: 401 | return false; 402 | } 403 | get_next_token(); 404 | return true; 405 | } 406 | 407 | void merge_type_categories(type ty, type_category tc) { 408 | /* If the two type categories are given as type specifiers, they may need 409 | * to be merged. E.g. long int and long do not need to be merged, we can 410 | * just take the former. long double however must be merged together into 411 | * the long double type category. */ 412 | #define TYPE_SPECIFIER_MERGE_RULE(BASETYPE, NEWTYPESPEC, NEWTYPE) \ 413 | if (ty.data().category == type_category::BASETYPE && tc == type_category::NEWTYPESPEC) { \ 414 | ty.data().category = type_category::NEWTYPE; \ 415 | return; \ 416 | } 417 | TYPE_SPECIFIER_MERGE_RULE(tc_long, tc_double, tc_longdouble); 418 | TYPE_SPECIFIER_MERGE_RULE(tc_long, tc_longdouble, tc_longlongdouble); 419 | TYPE_SPECIFIER_MERGE_RULE(tc_long, tc_int, tc_long); 420 | TYPE_SPECIFIER_MERGE_RULE(tc_longlong, tc_int, tc_longlong); 421 | #undef TYPE_SPECIFIER_MERGE_RULE 422 | /* If none of the rules match, just assign the new type to the function's 423 | * type category */ 424 | ty.data().category = tc; 425 | } 426 | 427 | void merge_type_categories(type ty, type_modifier tm) { 428 | ZTD_ASSERT_MESSAGE("unexpected multiple type modifiers", 429 | ty.data().modifier == type_modifier::tm_none); 430 | ty.data().modifier = tm; 431 | } 432 | 433 | bool parse_type_specifier(translation_unit& tu, function_definition& fd, type ty) { 434 | ENTER_PARSE_FUNCTION(); 435 | switch (current_token().id) { 436 | case tok_keyword_void: 437 | ty.data().category = type_category::tc_void; 438 | break; 439 | case tok_keyword_char: 440 | ty.data().category = type_category::tc_char; 441 | break; 442 | case tok_keyword_bool: 443 | ty.data().category = type_category::tc_bool; 444 | break; 445 | case tok_keyword_short: 446 | ty.data().category = type_category::tc_short; 447 | break; 448 | case tok_keyword_int: 449 | merge_type_categories(ty, type_category::tc_int); 450 | break; 451 | case tok_keyword_long: 452 | merge_type_categories(ty, type_category::tc_long); 453 | break; 454 | case tok_keyword_float: 455 | merge_type_categories(ty, type_category::tc_float); 456 | break; 457 | case tok_keyword_double: 458 | merge_type_categories(ty, type_category::tc_double); 459 | break; 460 | case tok_keyword_signed: 461 | merge_type_categories(ty, type_modifier::tm_signed); 462 | break; 463 | case tok_keyword_unsigned: 464 | merge_type_categories(ty, type_modifier::tm_unsigned); 465 | break; 466 | case tok_keyword__BitInt: 467 | case tok_keyword__Complex: 468 | // case tok_keyword__Decimal32: TODO 469 | // case tok_keyword__Decimal64: TODO 470 | // case tok_keyword__Decimal128: TODO 471 | ZTD_ASSERT_MESSAGE("unsupported type specifier", false); 472 | // TODO: atomic-type-specifier 473 | // TODO: struct-or-union-specifier 474 | // TODO: enum-specifier 475 | // TODO: typedef-name 476 | // TODO: typeof-specifier 477 | default: 478 | return false; 479 | } 480 | get_next_token(); 481 | return true; 482 | } 483 | 484 | bool parse_type_qualifier(translation_unit& tu, function_definition& fd, type ty) { 485 | ENTER_PARSE_FUNCTION(); 486 | return false; 487 | } 488 | 489 | bool parse_alignment_specifier(translation_unit& tu, function_definition& fd, type ty) { 490 | ENTER_PARSE_FUNCTION(); 491 | return false; 492 | } 493 | 494 | /* 495 | * type_specifier_qualifier ::= type-specifier | type-qualifier | alignment-specifier 496 | */ 497 | bool parse_type_specifier_qualifier( 498 | translation_unit& tu, function_definition& fd, type ty) { 499 | ENTER_PARSE_FUNCTION(); 500 | if (parse_type_specifier(tu, fd, ty)) 501 | return true; 502 | if (parse_type_qualifier(tu, fd, ty)) 503 | return true; 504 | if (parse_alignment_specifier(tu, fd, ty)) 505 | return true; 506 | return false; 507 | } 508 | 509 | bool parse_function_specifier(translation_unit& tu, function_definition& fd) { 510 | ENTER_PARSE_FUNCTION(); 511 | switch (current_token().id) { 512 | case tok_keyword_inline: 513 | fd.declaration.funcspecs |= function_specifier::funcspec_inline; 514 | break; 515 | case tok_keyword__Noreturn: 516 | fd.declaration.funcspecs |= function_specifier::funcspec__Noreturn; 517 | break; 518 | default: 519 | return false; 520 | } 521 | get_next_token(); 522 | return true; 523 | } 524 | 525 | /* 526 | * declaration-specifier ::= 527 | * storage-class-specifier 528 | * | type-specifier-qualifier 529 | * | function-specifier 530 | */ 531 | bool parse_declaration_specifier(translation_unit& tu, function_definition& fd, type ty) { 532 | ENTER_PARSE_FUNCTION(); 533 | if (parse_storage_class_specifier(tu, fd, ty)) 534 | return true; 535 | 536 | if (parse_type_specifier_qualifier(tu, fd, ty)) 537 | return true; 538 | 539 | if (parse_function_specifier(tu, fd)) 540 | return true; 541 | 542 | return false; 543 | } 544 | 545 | /* 546 | * declaration-specifiers ::= 547 | * declaration-specifier attribute-specifier-sequence? 548 | * | declaration-specifier declaration-specifiers 549 | */ 550 | bool parse_declaration_specifiers( 551 | translation_unit& tu, function_definition& fd, type ty) { 552 | ENTER_PARSE_FUNCTION(); 553 | while (parse_declaration_specifier(tu, fd, ty)) { 554 | // parse_attribute_specifier_sequence(tu, fd, ty); 555 | } 556 | 557 | /* empty declspecs is valid */ 558 | return true; 559 | } 560 | 561 | /* 562 | * 563 | */ 564 | bool parse_type_qualifier_list(translation_unit tu, function_definition fd) { 565 | ENTER_PARSE_FUNCTION(); 566 | return false; 567 | } 568 | 569 | /* 570 | * 571 | */ 572 | bool parse_array_declarator(translation_unit tu, function_definition fd) { 573 | ENTER_PARSE_FUNCTION(); 574 | return false; 575 | } 576 | 577 | bool parse_parameter_type_list( 578 | translation_unit tu, function_definition fd, std::vector& typelist) { 579 | ENTER_PARSE_FUNCTION(); 580 | return false; 581 | } 582 | 583 | /* 584 | * function-declarator ::= 585 | * direct-declarator ( parameter-type-list? ) 586 | */ 587 | bool parse_function_declarator(translation_unit tu, function_definition fd) { 588 | ENTER_PARSE_FUNCTION(); 589 | std::string funcname; 590 | push_token_index(); 591 | // should be parse_declarator 592 | // if (!parse_declarator(tu, fd)) { 593 | if (!parse_identifier(tu, fd, funcname)) { 594 | pop_token_index(); 595 | return false; 596 | } 597 | if (current_token().id != tok_l_paren) { 598 | pop_token_index(); 599 | return false; 600 | } 601 | get_next_token(); 602 | std::vector typelist; 603 | if (!parse_parameter_type_list(tu, fd, typelist)) { 604 | pop_token_index(); 605 | return false; 606 | } 607 | eat_token(tok_r_paren); 608 | return true; 609 | } 610 | 611 | 612 | /* 613 | * pointer ::= 614 | * * attribute-specifier-sequence? type-qualifier-list? 615 | * | * attribute-specifier-sequence? type-qualifier-list? pointer 616 | */ 617 | bool parse_pointer(translation_unit& tu, function_definition& fd) { 618 | ENTER_PARSE_FUNCTION(); 619 | if (current_token().id != tok_asterisk) 620 | return false; 621 | while (current_token().id == tok_asterisk) { 622 | // parse_attribute_specifier_sequence(tu, fd); 623 | parse_type_qualifier_list(tu, fd); 624 | get_next_token(); 625 | } 626 | return true; 627 | } 628 | 629 | bool parse_identifier(translation_unit& tu, function_definition& fd, std::string& idval) { 630 | ENTER_PARSE_FUNCTION(); 631 | switch (current_token().id) { 632 | case tok_id: 633 | idval = lexed_id(m_last_identifier_string_index++); 634 | break; 635 | case tok_keyword_int: 636 | idval = "int"; 637 | break; 638 | default: 639 | return false; 640 | } 641 | get_next_token(); 642 | return true; 643 | } 644 | 645 | /* 646 | * direct-declarator ::= 647 | * identifier attribute-specifier-sequence? 648 | * | ( declarator ) 649 | * | array-declarator attribute-specifier-sequence? 650 | * | function-declarator attribute-specifier-sequence? 651 | */ 652 | bool parse_direct_declarator(translation_unit& tu, function_definition& fd) { 653 | ENTER_PARSE_FUNCTION(); 654 | std::string idval; 655 | 656 | // If we find lparen before the next '{' or ';', it's probably a function 657 | // declarator 658 | const token t = find_first_of({ tok_l_paren, tok_l_curly_bracket, tok_semicolon }); 659 | if (t.id == tok_l_paren && parse_function_declarator(tu, fd)) { 660 | // parse_attribute_specifier_sequence(tu, fd); 661 | return true; 662 | } 663 | 664 | if (parse_identifier(tu, fd, idval)) { 665 | // parse_attribute_specifier_sequence(tu, fd); 666 | return true; 667 | } 668 | 669 | if (current_token().id == tok_l_paren) { 670 | get_next_token(); 671 | if (!parse_declarator(tu, fd)) { 672 | unget_token(); 673 | return false; 674 | } 675 | eat_token(tok_r_paren); 676 | } 677 | 678 | if (parse_array_declarator(tu, fd)) { 679 | // parse_attribute_specifier_sequence(tu, fd); 680 | return true; 681 | } 682 | 683 | return false; 684 | } 685 | 686 | /* 687 | * declarator ::= pointer? direct-declarator 688 | */ 689 | bool parse_declarator(translation_unit& tu, function_definition& fd) { 690 | ENTER_PARSE_FUNCTION(); 691 | parse_pointer(tu, fd); 692 | if (!parse_direct_declarator(tu, fd)) 693 | return false; 694 | get_next_token(); 695 | return true; 696 | } 697 | 698 | bool parse_function_body(translation_unit& tu, function_definition& fd) { 699 | ENTER_PARSE_FUNCTION(); 700 | return false; 701 | } 702 | 703 | /* 704 | * function-definition ::= 705 | * attribute-specifier-sequence? declaration-specifiers declarator function-body 706 | */ 707 | bool parse_function_definition(translation_unit& tu) { 708 | ENTER_PARSE_FUNCTION(); 709 | function_definition fd; 710 | fd.declaration.t = type_data::get_new_type(); 711 | 712 | parse_attribute_specifier_sequence(tu, fd.declaration.attributes); 713 | 714 | type return_type = type_data::get_new_type(); 715 | if (!parse_declaration_specifiers(tu, fd, return_type)) 716 | return false; 717 | 718 | if (!parse_declarator(tu, fd)) 719 | return false; 720 | 721 | if (!parse_function_body(tu, fd)) 722 | return false; 723 | 724 | return true; 725 | } 726 | 727 | /* 728 | * 729 | */ 730 | bool parse_declaration(translation_unit& tu) { 731 | ENTER_PARSE_FUNCTION(); 732 | return false; 733 | } 734 | 735 | /* 736 | * \brief Attempt to parse a declaration 737 | * \returns true if declaration parse was successful. 738 | * 739 | * external-declaration ::= function-definition | declaration 740 | */ 741 | bool parse_external_declaration(translation_unit& tu) noexcept { 742 | ENTER_PARSE_FUNCTION(); 743 | if (m_toks.empty()) { 744 | return false; 745 | } 746 | 747 | push_token_index(); 748 | if (parse_function_definition(tu)) 749 | return true; 750 | pop_token_index(); 751 | 752 | push_token_index(); 753 | if (parse_declaration(tu)) 754 | return true; 755 | pop_token_index(); 756 | 757 | /* Keep track of first and last token when searching for a declaration. 758 | * Figure out the details later. */ 759 | auto const& start_token = current_token(); 760 | 761 | /* To determine if we're working with a var decl or a function decl, we 762 | * must first try to parse an ident token. */ 763 | for (;;) { 764 | const auto tok = this->current_token(); 765 | switch (tok.id) { 766 | #define KEYWORD_TOKEN(TOK, INTVAL, KEYWORD) \ 767 | case TOK: \ 768 | return parse_##KEYWORD(tu); 769 | #include 770 | #undef KEYWORD_TOKEN 771 | 772 | default: 773 | // unrecognized token: report and bail! 774 | m_reporter.report( 775 | parser_err::unrecognized_token, "", tok.source_location, (int)tok.id); 776 | return false; 777 | } 778 | auto maybe_err = get_next_token(); 779 | if (!maybe_err.has_value()) { 780 | const auto wrapped_err = maybe_err.error(); 781 | m_reporter.report(wrapped_err, "", tok.source_location); 782 | break; 783 | } 784 | } 785 | 786 | return false; 787 | } 788 | 789 | translation_unit parse_translation_unit() noexcept { 790 | translation_unit tu {}; 791 | while (parse_external_declaration(tu)) { 792 | continue; 793 | } 794 | return tu; 795 | } 796 | }; 797 | 798 | 799 | 800 | ast_module parse(token_vector const& toks, const global_options& global_opts, 801 | diagnostic_handles& diag_handles) noexcept { 802 | parser_diagnostic_reporter reporter { diag_handles }; 803 | parser p(0, toks, reporter, global_opts); 804 | ast_module mod { p.parse_translation_unit() }; 805 | return mod; 806 | } 807 | 808 | } /* namespace a_c_compiler */ 809 | -------------------------------------------------------------------------------- /fe/sources/a_c_compiler/fe/parse/parser_diagnostic.cpp: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | -------------------------------------------------------------------------------- /fe/sources/a_c_compiler/fe/reporting/diagnostic_handles.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace a_c_compiler { 6 | diagnostic_handles::diagnostic_handles() noexcept 7 | : diagnostic_handles(std::cout, stdout, std::cerr, stderr) { 8 | } 9 | 10 | diagnostic_handles::diagnostic_handles(std::ostream& arg_stdout_handle, 11 | FILE* arg_c_stdout_handle, std::ostream& arg_stderr_handle, FILE* arg_c_stderr_handle, 12 | std::optional arg_c_maybe_error_handle, std::ostream* arg_maybe_error_handle, 13 | std::optional arg_c_maybe_warning_handle, std::ostream* arg_maybe_warning_handle, 14 | std::optional arg_c_maybe_debug_handle, std::ostream* arg_maybe_debug_handle, 15 | std::optional arg_c_maybe_info_handle, 16 | std::ostream* arg_maybe_info_handle) noexcept 17 | : m_stdout_handle(arg_stdout_handle) 18 | , m_c_stdout_handle(arg_c_stdout_handle) 19 | , m_stderr_handle(arg_stderr_handle) 20 | , m_c_stderr_handle(arg_c_stderr_handle) 21 | , m_maybe_c_error_handle(std::move(arg_c_maybe_error_handle)) 22 | , m_maybe_c_warning_handle(std::move(arg_c_maybe_warning_handle)) 23 | , m_maybe_c_debug_handle(std::move(arg_c_maybe_debug_handle)) 24 | , m_maybe_c_info_handle(std::move(arg_c_maybe_info_handle)) 25 | , m_maybe_error_handle(std::move(arg_maybe_error_handle)) 26 | , m_maybe_warning_handle(std::move(arg_maybe_warning_handle)) 27 | , m_maybe_debug_handle(std::move(arg_maybe_debug_handle)) 28 | , m_maybe_info_handle(std::move(arg_maybe_info_handle)) { 29 | } 30 | } // namespace a_c_compiler 31 | -------------------------------------------------------------------------------- /fe/sources/a_c_compiler/fe/reporting/logger.cpp: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | namespace a_c_compiler { 18 | 19 | void logger::incr_indent() noexcept { 20 | ++this->m_indent_width; 21 | } 22 | 23 | void logger::decr_indent() noexcept { 24 | --this->m_indent_width; 25 | } 26 | 27 | void logger::indent() noexcept { 28 | for (std::size_t i = 0; i < m_indent_level; i++) 29 | for (std::size_t j = 0; j < m_indent_width; j++) 30 | fmt::print(this->handle(), "| "); 31 | } 32 | 33 | scope_logger::scope_logger(std::string scope_name, 34 | std::function&& entry_callback, logger& target, 35 | std::optional logfile) noexcept 36 | : m_logger(target), logfile(std::move(logfile)), scope_name(std::move(scope_name)) { 37 | // TODO: logfile handling 38 | m_logger.indent(); 39 | fmt::print(m_logger.handle(), "{}", scope_name); 40 | entry_callback(m_logger); 41 | fmt::println(m_logger.handle(), "\n"); 42 | m_logger.incr_indent(); 43 | } 44 | 45 | scope_logger::~scope_logger() noexcept { 46 | m_logger.decr_indent(); 47 | // logger::indent(); 48 | // fprintf(stderr, "%s\n", scope_name.c_str()); 49 | } 50 | 51 | void scope_logger::indent() noexcept { 52 | m_logger.indent(); 53 | } 54 | 55 | } // namespace a_c_compiler 56 | -------------------------------------------------------------------------------- /options/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # a_c_compiler 3 | # 4 | # © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | # All rights reserved. 6 | # ============================================================================ # 7 | 8 | 9 | file(GLOB_RECURSE a_c_compiler.options.sources 10 | LIST_DIRECTORIES OFF 11 | CONFIGURE_DEPENDS 12 | sources/**.cpp sources/**.c) 13 | 14 | add_library(a_c_compiler.options ${a_c_compiler.options.sources}) 15 | add_library(a_c_compiler::options ALIAS a_c_compiler.options) 16 | target_include_directories(a_c_compiler.options 17 | PUBLIC 18 | include 19 | ) 20 | -------------------------------------------------------------------------------- /options/include/a_c_compiler/options/global_options.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace a_c_compiler { 15 | 16 | struct global_options { 17 | constexpr global_options() : m_feature_flags() { 18 | } 19 | 20 | [[nodiscard]] constexpr bool get_feature_flag( 21 | std::size_t flag, std::size_t bit) const noexcept { 22 | return m_feature_flags[flag] & (1 << bit); 23 | } 24 | 25 | constexpr void set_feature_flag(std::size_t flag, std::size_t bit) noexcept { 26 | m_feature_flags[flag] |= (1 << bit); 27 | } 28 | 29 | private: 30 | inline static constexpr const std::size_t num_feature_flags = 32; 31 | std::uint64_t m_feature_flags[num_feature_flags]; 32 | }; 33 | } // namespace a_c_compiler 34 | -------------------------------------------------------------------------------- /options/include/a_c_compiler/version.h: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #pragma once 9 | 10 | #include 11 | -------------------------------------------------------------------------------- /options/sources/global_options.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # a_c_compiler 3 | # 4 | # © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | # All rights reserved. 6 | # ============================================================================ # 7 | 8 | function (a_c_compiler_test_make_file_check_lex_test prefix source_file) 9 | get_filename_component(source_name ${source_file} NAME_WE) 10 | set(compiler_test_name a_c_compiler.test.lex_test.${prefix}.${source_name}) 11 | set(check_test_name a_c_compiler.test.lex_test.${prefix}.${source_name}.file_check) 12 | set(check_test_input_file ${CMAKE_CURRENT_BINARY_DIR}/a_c_compiler.lex_test.${prefix}.${source_name}.output) 13 | 14 | add_test(NAME ${compiler_test_name} 15 | COMMAND a_c_compiler::driver 16 | --verbose 17 | -fdebug-lexer 18 | -fstop-after-phase lex 19 | --lex-output-file ${check_test_input_file} 20 | ${source_file} 21 | ) 22 | add_test(NAME ${check_test_name} 23 | COMMAND a_c_compiler::test::file_check 24 | ${source_file} 25 | --input-file ${check_test_input_file} 26 | ) 27 | set_tests_properties(${check_test_name} 28 | PROPERTIES 29 | DEPENDS ${compiler_test_name} 30 | REQUIRED_FILES ${check_test_input_file} 31 | ) 32 | endfunction() 33 | 34 | add_subdirectory(file_check) 35 | add_subdirectory(lex) 36 | add_subdirectory(parse) 37 | -------------------------------------------------------------------------------- /test/file_check/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # a_c_compiler 3 | # 4 | # © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | # All rights reserved. 6 | # ============================================================================ # 7 | 8 | file(GLOB_RECURSE file_check.sources 9 | LIST_DIRECTORIES NO 10 | CONFIGURE_DEPENDS 11 | source/**.c source/**.cpp) 12 | 13 | add_executable(a_c_compiler.test.file_check ${file_check.sources}) 14 | add_executable(a_c_compiler::test::file_check ALIAS a_c_compiler.test.file_check) 15 | target_compile_options(a_c_compiler.test.file_check 16 | PRIVATE 17 | ${--utf8-literal-encoding} 18 | ${--utf8-source-encoding} 19 | ${--disable-permissive} 20 | ${--warn-pedantic} 21 | ${--warn-all} 22 | ${--warn-extra} 23 | ${--warn-errors} 24 | ${--allow-alignas-extra-padding} 25 | ${--allow-stringop-overflow} ${--allow-stringop-overread} 26 | ${--allow-array-bounds} 27 | ) 28 | target_link_libraries(a_c_compiler.test.file_check 29 | PRIVATE 30 | ctre::ctre 31 | ztd::idk 32 | ) 33 | target_include_directories(a_c_compiler.test.file_check 34 | PRIVATE 35 | include 36 | ) 37 | -------------------------------------------------------------------------------- /test/file_check/source/main.cpp: -------------------------------------------------------------------------------- 1 | // ============================================================================= 2 | // a_c_compiler 3 | // 4 | // © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | // All rights reserved. 6 | // ============================================================================ // 7 | 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | enum class check_style : unsigned char { first = 0, next = 1 }; 20 | 21 | std::string_view to_string_view(check_style style) { 22 | switch (style) { 23 | case check_style::first: 24 | return "CHECK"; 25 | case check_style::next: 26 | return "CHECK-NEXT"; 27 | default: 28 | ZTD_UNREACHABLE(); 29 | } 30 | } 31 | 32 | struct check_result { 33 | std::string_view check_text; 34 | std::string_view remaining_file; 35 | bool successful; 36 | check_style style; 37 | }; 38 | using check_action = std::function; 39 | using check_chain = std::vector; 40 | 41 | int main(int argc, char* argv[]) { 42 | if (argc < 2) { 43 | std::cerr 44 | << "[error] file_check requires at least 1 argument, which is the source match_file " 45 | "present." 46 | << std::endl; 47 | return 127; 48 | } 49 | std::string_view match_file_name = argv[1]; 50 | int argument_index = 2; 51 | std::string input_file_name = ""; 52 | std::string match_file = ""; 53 | std::string input = ""; 54 | bool verbose = false; 55 | 56 | constexpr std::string_view verbose_flag_name = "--verbose"; 57 | constexpr std::string_view input_file_flag_name = "--input-file"; 58 | constexpr std::string_view end_of_flags_name = "--"; 59 | constexpr std::size_t input_file_flag_size = input_file_flag_name.size(); 60 | 61 | for (; argument_index < argc; ++argument_index) { 62 | std::string_view current_arg = argv[argument_index]; 63 | if (current_arg.starts_with(input_file_flag_name)) { 64 | // is it `=` style? 65 | if (current_arg.size() > input_file_flag_size 66 | && current_arg[input_file_flag_size] == '=') { 67 | // `=` style 68 | input_file_name = current_arg.substr(input_file_flag_size + 1); 69 | } 70 | else { 71 | // non `=` style 72 | if (current_arg != input_file_flag_name) { } 73 | // after verifying, take from second arg 74 | if (argument_index + 1 >= argc) { 75 | // not enough arguments; bail 76 | std::cerr << "[error] file_check `--input-file` flag requires a second " 77 | "argument with the input file" 78 | << std::endl; 79 | return 125; 80 | } 81 | input_file_name = argv[argument_index + 1]; 82 | // increment current index to ensure we do not double back 83 | ++argument_index; 84 | } 85 | } 86 | else if (current_arg == verbose_flag_name) { 87 | verbose = true; 88 | } 89 | else if (current_arg == end_of_flags_name) { 90 | // stop processing, exactly where we are 91 | ++argument_index; 92 | break; 93 | } 94 | else { 95 | // all unrecognized data is passed through... 96 | break; 97 | } 98 | } 99 | 100 | { 101 | std::ifstream match_file_stream(match_file_name.data(), std::ios::binary); 102 | if (match_file_stream) { 103 | match_file_stream >> std::noskipws; 104 | std::istreambuf_iterator file_stream_first(match_file_stream); 105 | std::istreambuf_iterator file_stream_last {}; 106 | match_file.append(file_stream_first, file_stream_last); 107 | } 108 | else { 109 | std::cerr << "[error] file_check could not read the match file \"" << match_file_name 110 | << "\"" << std::endl; 111 | return 63; 112 | } 113 | } 114 | 115 | if (input_file_name.empty()) { 116 | const int first_argument_index = argument_index; 117 | for (int i = argument_index; i < argc; ++i) { 118 | std::string_view current_arg(argv[i]); 119 | if (i > first_argument_index) { 120 | // emulate some kind of whitespace? 121 | // TODO: command-line accurate simulation of given whitespace during 122 | // argument dump... 123 | input += " "; 124 | } 125 | input += current_arg; 126 | } 127 | } 128 | else { 129 | std::ifstream input_file_stream(input_file_name.data(), std::ios::binary); 130 | if (input_file_stream) { 131 | input_file_stream >> std::noskipws; 132 | std::istreambuf_iterator file_stream_first(input_file_stream); 133 | std::istreambuf_iterator file_stream_last {}; 134 | input_file_name.append(file_stream_first, file_stream_last); 135 | } 136 | else { 137 | std::cerr << "[error] file_check could not read the input file \"" << input_file_name 138 | << "\"" << std::endl; 139 | return 62; 140 | } 141 | } 142 | 143 | if (verbose) { 144 | std::cout << "[info] using input data consumed from\n\t"; 145 | if (input_file_name.empty()) { 146 | std::cout << "standard output"; 147 | } 148 | else { 149 | std::cout << "file \"" << input_file_name << "\""; 150 | } 151 | std::cout << "\nthat will be checked with directives found within file\n\t\"" 152 | << match_file_name << "\"" << std::endl; 153 | } 154 | 155 | // match file checks can be singular in nature, e.g. 156 | // 157 | // CHECK: bark 158 | // CHECK: woof 159 | // 160 | // Or they can be chained, where it depends on the last check done and expect things to 161 | // progress in a linear order, e.g. 162 | // 163 | // CHECK: meow 164 | // CHECK-NEXT: purr 165 | // 166 | // Each `CHECK` creates a new check_chain, with the first entry in the chain being what's in 167 | // the `CHECK`. Each `CHECK-NEXT` creates a new `check_action` within the pre-existing 168 | // `check_chain`, meaning there is an error if there's already not a check chain in there. 169 | 170 | // Run CTRE to get all the targets we need to search for 171 | constexpr const auto check_regex = ctre::range; 172 | 173 | std::size_t potential_checks = 0; 174 | std::vector check_chains {}; 175 | for (auto [whole_match, first_group, expected] : check_regex(match_file)) { 176 | ++potential_checks; 177 | check_style style = first_group ? check_style::next : check_style::first; 178 | if (first_group) { 179 | if (check_chains.empty() || check_chains.back().empty()) { 180 | // FAILURE: there was a CHECK-NEXT without a CHECK, cannot be chained 181 | std::cerr << "[error] there was a `CHECK-NEXT` that was not preceeded by a " 182 | "`CHECK`:\n" 183 | << whole_match << std::endl; 184 | return 61; 185 | } 186 | } 187 | else { 188 | check_chains.emplace_back(); 189 | } 190 | auto& current_chain = check_chains.back(); 191 | std::string_view check_text = expected.view(); 192 | current_chain.emplace_back( 193 | [style, check_text](std::string_view, std::string_view remaining_file) { 194 | auto find_it = remaining_file.find(check_text); 195 | if (find_it == std::string::npos) { 196 | return check_result { 197 | check_text, 198 | remaining_file, 199 | false, 200 | style, 201 | }; 202 | } 203 | return check_result { 204 | check_text, 205 | remaining_file.substr(find_it + check_text.size()), 206 | true, 207 | style, 208 | }; 209 | }); 210 | } 211 | 212 | 213 | if (verbose) { 214 | std::cout << "[info] produced " << potential_checks 215 | << " matches from the match file to check!" << std::endl; 216 | } 217 | 218 | 219 | for (std::size_t current_check_chain_index = 0; 220 | current_check_chain_index < check_chains.size(); ++current_check_chain_index) { 221 | const auto& current_check_chain = check_chains[current_check_chain_index]; 222 | // check a specific chain, subset match_file as we go 223 | std::string_view remaining_file = match_file; 224 | for (std::size_t current_check_index = 0; 225 | current_check_index < current_check_chain.size(); ++current_check_index) { 226 | const auto& current_check = current_check_chain[current_check_index]; 227 | const auto check_result = current_check(match_file, remaining_file); 228 | if (!check_result.successful) { 229 | std::cerr << "[fail] ❌ check failed\n" 230 | << to_string_view(check_result.style) << ": " 231 | << check_result.check_text << std::endl; 232 | return 1; 233 | } 234 | else if (verbose) { 235 | std::cout << "[pass] ✅ check passed\n" 236 | << to_string_view(check_result.style) << ": " 237 | << check_result.check_text << std::endl; 238 | } 239 | remaining_file = check_result.remaining_file; 240 | } 241 | // when the loop ends and we go back to the top, we reset `remaining_file`, 242 | // which means we effectively check the whole match_file again to find the match we want. 243 | // this should keep our checks running normally and nominally. 244 | } 245 | 246 | return 0; 247 | } 248 | -------------------------------------------------------------------------------- /test/lex/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # a_c_compiler 3 | # 4 | # © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | # All rights reserved. 6 | # ============================================================================ # 7 | 8 | file(GLOB_RECURSE lex_test_sources 9 | LIST_DIRECTORIES OFF 10 | CONFIGURE_DEPENDS 11 | *.c) 12 | foreach(test_source_file ${lex_test_sources}) 13 | a_c_compiler_test_make_file_check_lex_test(lex ${test_source_file}) 14 | endforeach() 15 | -------------------------------------------------------------------------------- /test/lex/keyword.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Ensure all keywords are lexed as keywords and not passed to parser as 3 | * identifiers. 4 | * 5 | * TODO: make use of all keywords 6 | */ 7 | // CHECK: tok_keyword_int 8 | int main() { 9 | // CHECK: tok_keyword_typedef 10 | typedef int int; 11 | // CHECK: tok_keyword_register 12 | register int foo; 13 | // CHECK: tok_keyword_float 14 | { float var; } 15 | // CHECK: tok_keyword_double 16 | { double var; } 17 | // CHECK: tok_keyword_long 18 | { long var; } 19 | // CHECK: tok_keyword_unsigned 20 | { unsigned var; } 21 | // CHECK: tok_keyword_char 22 | { char var; } 23 | // CHECK: tok_keyword_short 24 | { short var; } 25 | } 26 | -------------------------------------------------------------------------------- /test/lex/main.c: -------------------------------------------------------------------------------- 1 | int main(int argc, char** argv) { 2 | return 0; 3 | } 4 | // CHECK: tok_keyword_int 5 | // CHECK: tok_id: main 6 | // CHECK: tok_l_paren 7 | // CHECK: tok_keyword_int 8 | // CHECK: tok_id: argc 9 | // CHECK: tok_comma 10 | // CHECK: tok_keyword_char 11 | // CHECK: tok_asterisk 12 | // CHECK: tok_asterisk 13 | // CHECK: tok_id: argv 14 | // CHECK: tok_r_paren 15 | // CHECK: tok_l_curly_bracket 16 | // CHECK: tok_keyword_return 17 | // CHECK: tok_num_literal: 0 18 | // CHECK: tok_semicolon 19 | // CHECK: tok_r_curly_bracket 20 | -------------------------------------------------------------------------------- /test/lex/numlit.c: -------------------------------------------------------------------------------- 1 | void foo() { 2 | int i = 123; 3 | float j = 12.3; 4 | float k = 234324.f; 5 | float l = 0.0f; 6 | } 7 | // CHECK: tok_num_literal: 123 8 | // CHECK: tok_num_literal: 12.3 9 | // CHECK: tok_num_literal: 234324.f 10 | // CHECK: tok_num_literal: 0.0f 11 | -------------------------------------------------------------------------------- /test/lex/strlit.c: -------------------------------------------------------------------------------- 1 | char * string = "this is a string"; 2 | // CHECK: str_literal: this is a string 3 | -------------------------------------------------------------------------------- /test/parse/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # ============================================================================= 2 | # a_c_compiler 3 | # 4 | # © Asher Mancinelli & JeanHeyd "ThePhD" Meneide 5 | # All rights reserved. 6 | # ============================================================================ # 7 | 8 | file(GLOB_RECURSE lex_test_sources 9 | LIST_DIRECTORIES OFF 10 | CONFIGURE_DEPENDS 11 | *.c) 12 | foreach(test_source_file ${lex_test_sources}) 13 | a_c_compiler_test_make_file_check_lex_test(parse ${test_source_file}) 14 | endforeach() 15 | -------------------------------------------------------------------------------- /test/parse/attribute.c: -------------------------------------------------------------------------------- 1 | [[meow]]; 2 | [[foo::bar]]; 3 | [[vendor::something_or_other]]; 4 | [[vendor::with_args(1, 6.0f, igjuogs)]]; 5 | -------------------------------------------------------------------------------- /test/parse/funcdef.c: -------------------------------------------------------------------------------- 1 | int foo(int bar) { return bar; } 2 | -------------------------------------------------------------------------------- /test/parse/struct.c: -------------------------------------------------------------------------------- 1 | struct s { 2 | int field; 3 | }; 4 | 5 | typedef struct { 6 | bool field; 7 | } var; 8 | // CHECK: unimplemented keyword 'struct' 9 | -------------------------------------------------------------------------------- /test/parse/typedef.c: -------------------------------------------------------------------------------- 1 | typedef int my_int; 2 | // CHECK: unimplemented keyword 3 | --------------------------------------------------------------------------------