├── LICENSE ├── Makefile ├── README.md ├── compiler.cpp ├── compiler.h ├── error.cpp ├── error.h ├── file ├── arithmetic.c └── intended_error.c ├── lexer.h ├── parser.h ├── scanner.h ├── test_lexer.cpp ├── test_scanner.cpp ├── token.cpp └── token.h /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | LEXER_OBJECTS = test_lexer.o token.o error.o 2 | SCANNER_OBJECTS = test_scanner.o error.o 3 | 4 | CXX = g++ -std=c++14 -g 5 | EXE = test_lexer test_scanner 6 | 7 | test_lexer : $(LEXER_OBJECTS) 8 | $(CXX) -o test_lexer $(LEXER_OBJECTS) 9 | test_scanner : $(SCANNER_OBJECTS) 10 | $(CXX) -o test_scanner $(SCANNER_OBJECTS) 11 | 12 | test_lexer.o : lexer.h error.h scanner.h token.h 13 | test_scanner.o : scanner.h error.h 14 | error.o : error.h 15 | token.o : token.h 16 | 17 | # lexer.h : error.h scanner.h token.h 18 | # scanner.h : error.h 19 | 20 | .PHONY : clean 21 | clean : 22 | -rm *.o $(EXE) 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OverView 2 | A Small But Complete Compile System : Lexer, Parser, Assembler And Linker 3 | 4 | # Require 5 | GCC, C++14 6 | 7 | # Make 8 | 1. Delete intermediate files and executable files : `make clean` 9 | 1. Generate scanner's test program : `make test_scanner` 10 | 1. Generate lexer's test program : `make test_lexer` 11 | 12 | # Run 13 | 1. test scanner : `./test_scanner` 14 | 1. test lexer : `./test_lexer` 15 | 16 | # Reference code 17 | [cit : a C-like compile system](https://github.com/fanzhidongyzby/cit) 18 | 19 | # ToDo 20 | Parser, Assembler And Linker -------------------------------------------------------------------------------- /compiler.cpp: -------------------------------------------------------------------------------- 1 | #include "compiler.h" 2 | 3 | namespace akan { 4 | bool Compiler::show_char_ = false; 5 | bool Compiler::show_token_ = false; 6 | bool Compiler::show_symtab_ = false; 7 | bool Compiler::show_op_ir_ = false; 8 | bool Compiler::show_block_ = false; 9 | bool Compiler::show_help_ = false; 10 | bool Compiler::optim_ = false; 11 | } // namespace akan 12 | -------------------------------------------------------------------------------- /compiler.h: -------------------------------------------------------------------------------- 1 | namespace akan { 2 | class Compiler { 3 | friend class Scanner; 4 | 5 | private: 6 | static bool show_char_; // show character 7 | static bool show_token_; // show lexical mark 8 | static bool show_symtab_; // show symbol table 9 | static bool show_ir_; // show intermediate representation 10 | static bool show_op_ir_; // show optimized IR 11 | static bool show_block_; // show basic block and control flow 12 | static bool show_help_; // show help 13 | static bool optim_; // whether to optimize 14 | public: 15 | Compiler(const Compiler &) = delete; 16 | Compiler &operator(const Compiler &) = delete; 17 | ~Compiler() = delete; 18 | 19 | static void Compile(const char *file) { 20 | Scanner scanner(file); 21 | Error error(&scanner); 22 | Lexer lexer(scanner); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /error.cpp: -------------------------------------------------------------------------------- 1 | #include "error.h" 2 | #include "scanner.h" 3 | 4 | namespace akan { 5 | std::shared_ptr Error::scanner_ = nullptr; 6 | int Error::error_num_ = 0; 7 | int Error::warn_num_ = 0; 8 | 9 | const char *lexical_error_name[] = {"String misses right double quote.", 10 | "Binary number has no entity data.", 11 | "Hexadecimal number has no entity data.", 12 | "Character misses right single quote.", 13 | "Not support null character.", 14 | "Or operator should be double &.", 15 | "Multi-line comment does not end normally.", 16 | "Lexical notation does not exist."}; 17 | 18 | void Error::PrintLexicalError(int code) { 19 | IncrErrorNum(); 20 | printf("%s LexicalError: %s.\n", scanner_->GetFile(), 21 | scanner_->GetLine(), scanner_->GetCol(), lexical_error_name[code]); 22 | } 23 | 24 | void Error::PrintSyntaxError(int code, std::shared_ptr token){ 25 | } 26 | 27 | } // namespace akan 28 | -------------------------------------------------------------------------------- /error.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace akan { 8 | enum CommonError { FATAL, ERROR, WARN }; 9 | enum LexicalError { 10 | STR_NO_R_QUOTE, 11 | BI_NUM_NO_ENTITY, 12 | HEX_NUM_NO_ENTITY, 13 | CHAR_NO_R_QUOTE, 14 | NOT_SUPPORT_NULL_CHAR, 15 | OR_NO_PAIR, 16 | COMMENT_NO_END, 17 | TOKEN_NO_EXIST 18 | }; 19 | 20 | enum SyntaxError { 21 | TYPE_LOST, 22 | TYPE_WRONG, 23 | ID_LOST, 24 | ID_WRONG, 25 | NUM_LOST, 26 | NUM_WRONG, 27 | LITERAL_LOST, 28 | LITERAL_WRONG, 29 | COMMA_LOST, 30 | COMMA_WRONG, 31 | SEMICON_LOST, 32 | SEMICON_WRONG, 33 | ASSIGN_LOST, 34 | ASSIGN_WRONG, 35 | COLON_LOST, 36 | COLON_WRONG, 37 | WHILE_LOST, 38 | WHILE_WRONG, 39 | LPAREN_LOST, 40 | LPAREN_WRONG, 41 | RPAREN_LOST, 42 | RPAREN_WRONG, 43 | LBRACK_LOST, 44 | LBRACK_WRONG, 45 | RBRACK_LOST, 46 | RBRACK_WRONG, 47 | LBRACE_LOST, 48 | LBRACE_WRONG, 49 | RBRACE_LOST, 50 | RBRACE_WRONG 51 | }; 52 | 53 | enum SemanticError { 54 | VAR_RE_DEF, // variable redefinition 55 | FUN_RE_DEF, // function redefinition 56 | VAR_UN_DEC, // variable not declared 57 | FUN_UN_DEC, // function not declared 58 | FUN_DEC_ERR, // function declaration is not consistent with definition 59 | FUN_CALL_ERR, // function argument is not consistent with parameter 60 | DEC_INIT_DENY, // declare variable with an initializer 61 | EXTERN_FUN_DEF, // declare function with extern 62 | ARRAY_LEN_INVALID, // array length is invalid 63 | VAR_INIT_ERR, // variable intializer has a wrong type 64 | GLB_INIT_ERR, // global initializer is not constant value 65 | VOID_VAR, // void variable 66 | EXPR_NOT_LEFT_VAL, // invalid left-value expression 67 | ASSIGN_TYPE_ERROR, // assign type dismatch 68 | EXPR_IS_BASE, // expression is of a base type 69 | EXPR_NOT_BASE, // expression is of not a base type 70 | ARR_TYPE_ERR, // array type error 71 | EXPR_IS_VOID, // expression is of a void type 72 | BREAK_ERR, // break occurs outside loop or switch-case 73 | CONTINUE_ERR, // continue occurs outside a loop 74 | RETURN_ERR // return type is no consistent with function type 75 | }; 76 | 77 | template 78 | void PrintCommonError(CommonError common_error, Args &&... args) { 79 | switch (common_error) { 80 | case FATAL: 81 | printf(":"); 82 | printf(std::forward(args)...); 83 | break; 84 | case ERROR: 85 | printf(":"); 86 | printf(std::forward(args)...); 87 | break; 88 | case WARN: 89 | printf(":"); 90 | printf(std::forward(args)...); 91 | break; 92 | } 93 | } 94 | 95 | class Scanner; 96 | class Token; 97 | class Error { 98 | static std::shared_ptr scanner_; 99 | static int error_num_; 100 | static int warn_num_; 101 | 102 | public: 103 | Error(const Error &) = delete; 104 | Error &operator=(const Error &) = delete; 105 | ~Error() = delete; 106 | 107 | static void IncrErrorNum() { ++error_num_; } 108 | static void IncrWarnNum() { ++warn_num_; } 109 | static int GetErrorNum() { return error_num_; } 110 | static int GetWarnNum() { return warn_num_; } 111 | static void Clear() { 112 | scanner_ = nullptr; 113 | error_num_ = 0; 114 | warn_num_ = 0; 115 | } 116 | static void SetScanner(std::shared_ptr scanner) { 117 | scanner_ = scanner; 118 | } 119 | 120 | static void PrintLexicalError(int code); 121 | static void PrintSyntaxError(int code, std::shared_ptr token); 122 | static void PrintSemanticError(int code, const std::string &name = ""); 123 | static void PrintSemanticWarning(int code, const std::string &name = ""); 124 | }; 125 | } // namespace akan 126 | -------------------------------------------------------------------------------- /file/arithmetic.c: -------------------------------------------------------------------------------- 1 | /*Arithmetic*/ 2 | 3 | int main() { 4 | int a = 1 + 2; 5 | int b = 3 - 4; 6 | int c = 5 * 6; 7 | int d = 7 / 8; 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /file/intended_error.c: -------------------------------------------------------------------------------- 1 | int main(){ 2 | "love; 3 | int a =0x; 4 | /* * 5 | } 6 | -------------------------------------------------------------------------------- /lexer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "error.h" 3 | #include "scanner.h" 4 | #include "token.h" 5 | #include 6 | #include 7 | #include 8 | 9 | namespace akan { 10 | class Lexer { 11 | private: 12 | std::shared_ptr scanner_; 13 | char ch_ = ' '; 14 | std::shared_ptr token_; 15 | 16 | void SkipWhiteSpace() { 17 | while (ch_ == ' ' || ch_ == '\n' || ch_ == '\t') { 18 | ch_ = scanner_->Scan(); 19 | } 20 | } 21 | 22 | bool Scan(char need) { 23 | ch_ = scanner_->Scan(); 24 | if (ch_ == need) 25 | return true; 26 | else 27 | return false; 28 | } 29 | 30 | bool IsHexChar(char ch) { 31 | return std::isdigit(ch) || (ch >= 'A' && ch <= 'F') || 32 | (ch >= 'a' && ch <= 'f'); 33 | } 34 | 35 | void Scan() { 36 | ch_ = scanner_->Scan(); 37 | return; 38 | } 39 | 40 | void TokenizeIdentifierOrKeywords() { 41 | std::string name; 42 | do { 43 | name.push_back(ch_); 44 | // Eat one more character here 45 | ch_ = scanner_->Scan(); 46 | } while (std::isalpha(ch_) || ch_ == '_'); 47 | if (Keyword::IsKeyword(name)) { 48 | token_ = std::make_shared(name); 49 | } else { 50 | token_ = std::make_shared(name); 51 | } 52 | return; 53 | } 54 | 55 | void TokenizeString() { 56 | std::string str; 57 | while (!Scan('"')) { 58 | if (ch_ == '\\') { 59 | Scan(); 60 | switch (ch_) { 61 | case 'n': 62 | str.push_back('\n'); 63 | break; 64 | case '\\': 65 | str.push_back('\\'); 66 | break; 67 | case 't': 68 | str.push_back('\t'); 69 | break; 70 | case '"': 71 | str.push_back('"'); 72 | break; 73 | case '0': 74 | str.push_back('\0'); 75 | break; 76 | case '\n': 77 | break; 78 | case -1: 79 | // Eat one more character here 80 | Error::PrintLexicalError(STR_NO_R_QUOTE); 81 | token_ = std::make_shared(ERR); 82 | return; 83 | default: 84 | str.push_back(ch_); 85 | } 86 | } else if (ch_ == '\n' || ch_ == -1) { 87 | // Eat one more character here 88 | Error::PrintLexicalError(STR_NO_R_QUOTE); 89 | token_ = std::make_shared(ERR); 90 | return; 91 | } else { 92 | str.push_back(ch_); 93 | } 94 | } 95 | token_ = std::make_shared(str); 96 | // Eat one more character here 97 | Scan(); 98 | } 99 | 100 | void TokenizeNumber() { 101 | int val = 0; 102 | // Decimal 103 | if (ch_ != 0) { 104 | do { 105 | val = val * 10 + ch_ - '0'; 106 | // Eat one more character here 107 | Scan(); 108 | } while (std::isdigit(ch_)); 109 | } else { 110 | Scan(); 111 | // Hexadecimal 112 | if (ch_ == 'x') { 113 | Scan(); 114 | if (IsHexChar(ch_)) { 115 | do { 116 | val = val * 16 + ch_; 117 | if (std::isdigit(ch_)) 118 | val -= '0'; 119 | else if (ch_ >= 'A' && ch_ <= 'F') 120 | val += 10 - 'A'; 121 | else if (ch_ >= 'a' && ch_ <= 'f') 122 | val += 10 - 'a'; 123 | // Eat one more character here 124 | Scan(); 125 | } while (IsHexChar(ch_)); 126 | } else { 127 | // Eat one more character here 128 | Error::PrintLexicalError(HEX_NUM_NO_ENTITY); 129 | token_ = std::make_shared(ERR); 130 | return; 131 | } 132 | } 133 | // Binary 134 | else if (ch_ == 'b') { 135 | Scan(); 136 | if (ch_ >= '0' && ch_ <= '1') { 137 | do { 138 | val = val * 2 + ch_ - '0'; 139 | // Eat one more character here 140 | Scan(); 141 | } while (ch_ >= '0' && ch_ <= '1'); 142 | } else { 143 | // Eat oone more character here 144 | Error::PrintLexicalError(BI_NUM_NO_ENTITY); 145 | token_ = std::make_shared(ERR); 146 | return; 147 | } 148 | } 149 | // Octal 150 | else if (ch_ >= '0' && ch_ <= '7') { 151 | do { 152 | val = val * 8 + ch_ - '0'; 153 | // Eat one more character here 154 | Scan(); 155 | } while (ch_ >= '0' && ch_ <= '7'); 156 | } 157 | } 158 | token_ = std::make_shared(val); 159 | } 160 | 161 | void TokenizeCharacter() { 162 | Scan(); 163 | char c; 164 | // Escape character 165 | if (ch_ = '\\') { 166 | Scan(); 167 | if (ch_ == 'n') 168 | c = '\n'; 169 | else if (ch_ == '\\') 170 | c = '\\'; 171 | else if (ch_ == 't') 172 | c = '\t'; 173 | else if (ch_ == '0') 174 | c = '\0'; 175 | else if (ch_ == '\'') 176 | c = '\''; 177 | // End of file or line break 178 | else if (ch_ == -1 || ch_ == '\n') { 179 | // Eat one more character here 180 | Error::PrintLexicalError(CHAR_NO_R_QUOTE); 181 | token_ = std::make_shared(ERR); 182 | return; 183 | } 184 | // Non-escape character 185 | else 186 | c = ch_; 187 | } else if (ch_ == -1 || ch_ == '\n') { 188 | // Eat one more character here 189 | Error::PrintLexicalError(CHAR_NO_R_QUOTE); 190 | token_ = std::make_shared(ERR); 191 | return; 192 | } 193 | // No entity 194 | else if (ch_ == '\'') { 195 | // Eat one more character here 196 | Error::PrintLexicalError(NOT_SUPPORT_NULL_CHAR); 197 | token_ = std::make_shared(ERR); 198 | return; 199 | } 200 | // Non-escape character 201 | else { 202 | c = ch_; 203 | } 204 | if (Scan('\'')) { 205 | token_ = std::make_shared(c); 206 | // Eat one more character here 207 | Scan(); 208 | return; 209 | } else { 210 | // Eat one more character here 211 | Error::PrintLexicalError(CHAR_NO_R_QUOTE); 212 | token_ = std::make_shared(ERR); 213 | return; 214 | } 215 | } 216 | 217 | void TokenizeDelimiter() { 218 | switch (ch_) { 219 | // Ignore macro 220 | case '#': 221 | while (ch_ != '\n' && ch_ != -1) 222 | // Eat one more character here 223 | Scan(); 224 | break; 225 | case '+': 226 | token_ = std::make_shared(Scan('+') ? INC : ADD); 227 | // Eat one more character here 228 | Scan(); 229 | break; 230 | case '-': 231 | token_ = std::make_shared(Scan('-') ? DEC : SUB); 232 | // Eat one more character here 233 | Scan(); 234 | break; 235 | case '*': 236 | token_ = std::make_shared(MUL); 237 | // Eat one more character here 238 | Scan(); 239 | break; 240 | case '/': 241 | Scan(); 242 | // Single-line comment 243 | if (ch_ == '/') { 244 | while (ch_ != '\n' && ch_ != -1) { 245 | // Eat one more character here 246 | Scan(); 247 | } 248 | // make pointer null if match a comment 249 | token_ = nullptr; 250 | return; 251 | } 252 | // Multi-line comment 253 | else if (ch_ == '*') { 254 | while (!Scan(-1)) { 255 | if (ch_ == '*') { 256 | if (Scan('/')) 257 | break; 258 | } 259 | } 260 | if (ch_ == -1) { 261 | // Eat one more character here 262 | Error::PrintLexicalError(COMMENT_NO_END); 263 | token_ = std::make_shared(ERR); 264 | return; 265 | } else { 266 | // Eat one more character here 267 | Scan(); 268 | // make pointer null if match a comment 269 | token_ = nullptr; 270 | return; 271 | } 272 | } 273 | // Division operator 274 | else { 275 | token_ = std::make_shared(DIV); 276 | // Eat one more character here 277 | Scan(); 278 | return; 279 | } 280 | case '%': 281 | token_ = std::make_shared(MOD); 282 | // Eat one more character here 283 | Scan(); 284 | break; 285 | case '>': 286 | token_ = std::make_shared(Scan('=') ? GE : GT); 287 | // Eat one more character here 288 | Scan(); 289 | break; 290 | case '<': 291 | token_ = std::make_shared(Scan('=') ? LE : LT); 292 | // Eat one more character here 293 | Scan(); 294 | break; 295 | case '=': 296 | token_ = std::make_shared(Scan('=') ? EQU : ASSIGN); 297 | // Eat one more character here 298 | Scan(); 299 | break; 300 | case '&': 301 | token_ = std::make_shared(Scan('&') ? AND : LEA); 302 | // Eat one more character here 303 | Scan(); 304 | break; 305 | case '|': 306 | if (Scan('|')) { 307 | token_ = std::make_shared(OR); 308 | // Eat one more character here 309 | Scan(); 310 | return; 311 | } else { 312 | token_ = std::make_shared(ERR); 313 | // Eat one more character here 314 | Error::PrintLexicalError(OR_NO_PAIR); 315 | return; 316 | } 317 | case ',': 318 | token_ = std::make_shared(COMMA); 319 | // Eat one more character here 320 | Scan(); 321 | break; 322 | case ':': 323 | token_ = std::make_shared(COLON); 324 | // Eat one more character here 325 | Scan(); 326 | break; 327 | case ';': 328 | token_ = std::make_shared(SEMICON); 329 | // Eat one more character here 330 | Scan(); 331 | break; 332 | case '(': 333 | token_ = std::make_shared(LPAREN); 334 | // Eat one more character here 335 | Scan(); 336 | break; 337 | case ')': 338 | token_ = std::make_shared(RPAREN); 339 | // Eat one more character here 340 | Scan(); 341 | break; 342 | case '[': 343 | token_ = std::make_shared(LBRACK); 344 | // Eat one more character here 345 | Scan(); 346 | break; 347 | case ']': 348 | token_ = std::make_shared(RBRACK); 349 | // Eat one more character here 350 | Scan(); 351 | break; 352 | case '{': 353 | token_ = std::make_shared(LBRACE); 354 | // Eat one more character here 355 | Scan(); 356 | break; 357 | case '}': 358 | token_ = std::make_shared(RBRACE); 359 | // Eat one more character here 360 | Scan(); 361 | break; 362 | case -1: 363 | token_ = std::make_shared(END); 364 | break; 365 | default: 366 | token_ = std::make_shared(ERR); 367 | // Eat one more character here 368 | Error::PrintLexicalError(TOKEN_NO_EXIST); 369 | } 370 | } 371 | 372 | public: 373 | Lexer(std::shared_ptr scanner) : scanner_(scanner) { 374 | Error::SetScanner(scanner); 375 | } 376 | Lexer(const Lexer &) = delete; 377 | Lexer &operator=(const Lexer &) = delete; 378 | ~Lexer() = default; 379 | // All Tokenize function should eat one more character except that an error 380 | // occurs or scanner reaches the end of the file. 381 | std::shared_ptr Tokenize() { 382 | // Use a loop here is to skip the comment and print out all lexical error. 383 | do { 384 | SkipWhiteSpace(); 385 | if (std::isalpha(ch_) || ch_ == '_') 386 | TokenizeIdentifierOrKeywords(); 387 | else if (ch_ == '"') 388 | TokenizeString(); 389 | else if (std::isdigit(ch_)) 390 | TokenizeNumber(); 391 | else if (ch_ == '\'') 392 | TokenizeCharacter(); 393 | else 394 | TokenizeDelimiter(); 395 | if (token_ && token_->GetTag() != ERR) 396 | return token_; 397 | } while (ch_ != -1); 398 | return std::make_shared(END); 399 | } 400 | 401 | private: 402 | // Debug helper 403 | static void TestImpl(const char *file_name) { 404 | Lexer lexer(std::make_shared(file_name)); 405 | std::shared_ptr token; 406 | do { 407 | token = lexer.Tokenize(); 408 | std::printf("%10s\t", Token::GetTagName(token->GetTag()).c_str()); 409 | std::fflush(stdout); 410 | std::printf("%20s\n", token->ToString().c_str()); 411 | std::fflush(stdout); 412 | } while (token->GetTag() != END); 413 | std::printf("Finish the lex for %s\n", file_name); 414 | } 415 | 416 | public: 417 | static void MainTest(int argc = 0, char *argv[] = nullptr) { 418 | TestImpl("file/arithmetic.c"); 419 | printf("\n"); 420 | TestImpl("file/intended_error.c"); 421 | } 422 | }; 423 | } // namespace akan 424 | -------------------------------------------------------------------------------- /parser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "lexer.h" 3 | #include 4 | #include 5 | #include 6 | #define PARSER_DEBUG 7 | 8 | namespace skan { 9 | class Parser { 10 | private: 11 | std::shared_ptr lexer_; 12 | std::shared_ptr token_; 13 | std::shared_ptr symbol_table_; 14 | std::shared_ptr ir_generator_; 15 | 16 | void Move() { 17 | token_ = lexer_.Tokenize(); 18 | #ifdef PARSER_DEBUG 19 | std::printf("%s\n", token_->ToString().c_str()); 20 | std::fflush(stdout); 21 | #endif 22 | } 23 | 24 | bool Match(Tag tag) { return token_->GetTag() == tag; } 25 | 26 | bool MatchThenMove(Tag tag) { 27 | if (Match(tag)) { 28 | Move(); 29 | return true; 30 | } else { 31 | return false; 32 | } 33 | } 34 | 35 | bool IsType() { return Token::IsType(token_->GetTag()); } 36 | bool IsTag(std::initializer_list tags) { 37 | for (auto tag : tags) { 38 | if (token_->GetTag() == tag) 39 | return true; 40 | } 41 | return false; 42 | } 43 | 44 | void RecoverFromError(bool condition, SyntaxError lost_error, 45 | SyntaxError wrong_error) { 46 | if (condition) { 47 | PrintSyntaxError(lost_error, token_); 48 | } else { 49 | PrintSyntaxError(wrong_error, token_); 50 | Move(); 51 | } 52 | } 53 | 54 | // -> 55 | void ParseProgram() { 56 | if (Match(END)) { 57 | return; 58 | } else { 59 | ParseSegment(); 60 | ParseProgram(); 61 | } 62 | } 63 | 64 | // ->kw_extern | 65 | void ParseSegment() { 66 | bool has_extern = MatchThenMove(KW_EXTERN); 67 | Tag tag = ParseType(); 68 | ParseDef(has_extern, tag); 69 | } 70 | 71 | // ->kw_int | kw_char | kw_void 72 | Tag ParseType() { 73 | if (IsType()) { 74 | Move(); 75 | return token_->GetTag(); 76 | } else { 77 | RecoverFromError(IsTag({ID,MUL}),TYPE_LOST, TYPE_WRONG); 78 | return KW_INT; 79 | } 80 | } 81 | 82 | public: 83 | Parser(const Parser &) = delete; 84 | Parser &operator=(const Parser &) = delete; 85 | ~Parser = default; 86 | 87 | Parser(std::shared_ptr lexer, 88 | std::shared_ptr symbol_table, 89 | std::shared_ptr ir_generator) 90 | : lexer_(lexer), symbol_table_(symbol_table), 91 | ir_generator_(ir_generator) {} 92 | 93 | void Parse() { 94 | Move(); 95 | ParseProgram(); 96 | } 97 | }; 98 | } // namespace skan 99 | -------------------------------------------------------------------------------- /scanner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "error.h" 3 | #include 4 | #include 5 | 6 | namespace akan { 7 | class Scanner { 8 | 9 | static void CloseFile(std::FILE *fp) { 10 | if (fp) { 11 | std::fclose(fp); 12 | } 13 | } 14 | // File 15 | const char *file_name_ = nullptr; 16 | std::unique_ptr file_; 17 | 18 | // Characters read 19 | static constexpr int buf_len_ = 80; // Length of scan buffer 20 | char line_[buf_len_]; 21 | 22 | // Read status 23 | int line_len_ = 0; // Length of current line 24 | int read_pos_ = -1; // Read position 25 | char last_ch_ = 0; // Last character, used to judge the line break position 26 | int line_num_ = 1; // Row Number 27 | int col_num_ = 0; // Column Number 28 | 29 | // Debug helper 30 | static std::string ShowChar(char ch) { 31 | char s[16]; 32 | switch (ch) { 33 | case -1: 34 | std::sprintf(s, "%s <%d>", "EOF", ch); 35 | break; 36 | case '\n': 37 | std::sprintf(s, "%s <%d>", "\\n", ch); 38 | break; 39 | case '\t': 40 | std::sprintf(s, "%s <%d>", "\\t", ch); 41 | break; 42 | case ' ': 43 | std::sprintf(s, "%s <%d>", "blank", ch); 44 | break; 45 | default: 46 | std::sprintf(s, "%c <%d>", ch, ch); 47 | } 48 | return std::string(s); 49 | } 50 | 51 | public: 52 | Scanner(const char *name) 53 | : file_name_(name), file_(std::fopen(name, "r"), &CloseFile) { 54 | if (!file_) { 55 | PrintCommonError( 56 | FATAL, "Fail to open the file %s! Please check filename and path.\n", 57 | name); 58 | Error::IncrErrorNum(); 59 | } 60 | } 61 | 62 | Scanner(const Scanner &) = delete; 63 | Scanner &operator=(const Scanner &) = delete; 64 | ~Scanner() = default; 65 | 66 | // Scan characters from buffer 67 | int Scan() { 68 | if (!file_) 69 | return -1; 70 | if (read_pos_ == line_len_ - 1) { 71 | line_len_ = fread(line_, 1, buf_len_, file_.get()); // reload buffer data 72 | if (line_len_ == 0) { // no data 73 | // indicate end of file 74 | line_len_ = 1; 75 | line_[0] = -1; 76 | last_ch_ = -1; 77 | return -1; 78 | } 79 | read_pos_ = -1; // restore reading position 80 | } 81 | ++read_pos_; 82 | char ch = line_[read_pos_]; // get the new char 83 | if (last_ch_ == '\n') { // start new line 84 | ++line_num_; 85 | col_num_ = 0; 86 | } else { 87 | ++col_num_; 88 | } 89 | last_ch_ = ch; 90 | return ch; 91 | } 92 | 93 | // Getter 94 | const char *GetFile() const { return file_name_; } 95 | int GetLine() const { return line_num_; } 96 | int GetCol() const { return col_num_; } 97 | 98 | private: 99 | static void TestImpl(const char *file_name) { 100 | Scanner scanner(file_name); 101 | char ch; 102 | do { 103 | ch = scanner.Scan(); 104 | std::printf("%8s\tline: %3d\tcol: %3d\n", ShowChar(ch).c_str(), 105 | scanner.GetLine(), scanner.GetCol()); 106 | } while (ch != -1); 107 | std::printf("Finish the scan for %s\n", file_name); 108 | } 109 | 110 | public: 111 | static void MainTest(int argc = 0, char *argv[] = nullptr) { 112 | TestImpl("file/arithmetic.c"); 113 | } 114 | }; 115 | } // namespace akan 116 | -------------------------------------------------------------------------------- /test_lexer.cpp: -------------------------------------------------------------------------------- 1 | #include "lexer.h" 2 | using namespace akan; 3 | 4 | int main() { 5 | 6 | Lexer::MainTest(); 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /test_scanner.cpp: -------------------------------------------------------------------------------- 1 | #include "scanner.h" 2 | using namespace akan; 3 | 4 | int main() { 5 | 6 | Scanner::MainTest(); 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /token.cpp: -------------------------------------------------------------------------------- 1 | #include "token.h" 2 | namespace akan { 3 | std::array Token::tag_name_ = { 4 | "ERROR", "EOF", "ID", "int", "char", "void", 5 | "extern", "Number", "Character", "String", "!", "&", 6 | "+", "-", "*", "/", "%", "++", 7 | "--", ">", ">=", "<", "<=", "==", 8 | "!=", "&&", "||", "(", ")", "[", 9 | "]", "{", "}", ",", ":", ";", 10 | "=", "if", "else", "switch", "else", "default", 11 | "while", "do", "for", "break", "continue", "return"}; 12 | 13 | std::unordered_map Keyword::keywords_ = { 14 | {"int", KW_INT}, {"char", KW_CHAR}, 15 | {"void", KW_VOID}, {"extern", KW_EXTERN}, 16 | {"if", KW_IF}, {"else", KW_ELSE}, 17 | {"switch", KW_SWITCH}, {"case", KW_CASE}, 18 | {"default", KW_DEFAULT}, {"while", KW_WHILE}, 19 | {"do", KW_DO}, {"for", KW_FOR}, 20 | {"break", KW_BREAK}, {"continue", KW_CONTINUE}, 21 | {"return", KW_RETURN}}; 22 | 23 | std::string Token::ToString() const { return tag_name_[tag_]; } 24 | 25 | std::string Keyword::ToString() const { return "[Keyword]: " + name_; } 26 | 27 | std::string Delimiter::ToString() const { return "[Delimiter]: " + name_; } 28 | 29 | std::string Identifier::ToString() const { 30 | return "[" + Token::ToString() + "]: " + name_; 31 | } 32 | 33 | std::string String::ToString() const { 34 | return "[" + Token::ToString() + "]: " + content_; 35 | } 36 | 37 | std::string Number::ToString() const { 38 | return "[" + Token::ToString() + "]: " + std::to_string(value_); 39 | } 40 | 41 | std::string Character::ToString() const { 42 | return "[" + Token::ToString() + "] " + std::string(1, ch_); 43 | } 44 | 45 | bool Keyword::IsKeyword(const std::string &name) { 46 | return keywords_.find(name) != keywords_.end(); 47 | } 48 | } // namespace akan 49 | -------------------------------------------------------------------------------- /token.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | namespace akan { 7 | enum Tag { 8 | ERR, 9 | END, 10 | ID, 11 | KW_INT, 12 | KW_CHAR, 13 | KW_VOID, 14 | KW_EXTERN, 15 | NUM, 16 | CH, 17 | STR, 18 | NOT, 19 | LEA, 20 | ADD, 21 | SUB, 22 | MUL, 23 | DIV, 24 | MOD, 25 | INC, 26 | DEC, 27 | GT, 28 | GE, 29 | LT, 30 | LE, 31 | EQU, 32 | NEQU, 33 | AND, 34 | OR, 35 | LPAREN, 36 | RPAREN, 37 | LBRACK, 38 | RBRACK, 39 | LBRACE, 40 | RBRACE, 41 | COMMA, 42 | COLON, 43 | SEMICON, 44 | ASSIGN, 45 | KW_IF, 46 | KW_ELSE, 47 | KW_SWITCH, 48 | KW_CASE, 49 | KW_DEFAULT, 50 | KW_WHILE, 51 | KW_DO, 52 | KW_FOR, 53 | KW_BREAK, 54 | KW_CONTINUE, 55 | KW_RETURN 56 | }; 57 | class Token { 58 | private: 59 | Tag tag_; 60 | 61 | protected: 62 | static std::array tag_name_; 63 | 64 | public: 65 | Tag GetTag() { return tag_; } 66 | static std::string GetTagName(Tag tag) { return tag_name_[tag]; } 67 | static bool IsType(Tag tag) { 68 | return tag == KW_INT || tag == KW_CHAR || tag == KW_VOID; 69 | } 70 | Token(Tag tag) : tag_(tag) {} 71 | Token(const Token &) = default; 72 | Token &operator=(const Token &) = default; 73 | virtual std::string ToString() const; 74 | virtual ~Token() = default; 75 | }; 76 | 77 | class Keyword : public Token { 78 | std::string name_; 79 | static std::unordered_map keywords_; 80 | 81 | public: 82 | std::string GetName() { return name_; } 83 | Keyword(const std::string &name) : Token(keywords_[name]), name_(name) {} 84 | Keyword(const Keyword &) = default; 85 | Keyword &operator=(const Keyword &) = default; 86 | virtual std::string ToString() const override; 87 | virtual ~Keyword() = default; 88 | static bool IsKeyword(const std::string &name); 89 | }; 90 | 91 | class Delimiter : public Token { 92 | std::string name_; 93 | 94 | public: 95 | std::string GetName() { return name_; } 96 | Delimiter(Tag tag) : Token(tag), name_(tag_name_[tag]) {} 97 | Delimiter(const Delimiter &) = default; 98 | Delimiter &operator=(const Delimiter &) = default; 99 | virtual std::string ToString() const override; 100 | virtual ~Delimiter() = default; 101 | }; 102 | 103 | class Identifier : public Token { 104 | std::string name_; 105 | 106 | public: 107 | std::string GetName() { return name_; } 108 | Identifier(const std::string &name) : Token(ID), name_(name) {} 109 | Identifier(const Identifier &) = default; 110 | Identifier &operator=(const Identifier &) = default; 111 | virtual std::string ToString() const override; 112 | virtual ~Identifier() = default; 113 | }; 114 | 115 | class String : public Token { 116 | std::string content_; 117 | 118 | public: 119 | std::string GetContent() { return content_; } 120 | String(const std::string &content) : Token(STR), content_(content) {} 121 | String(const String &) = default; 122 | String &operator=(const String &) = default; 123 | virtual std::string ToString() const override; 124 | virtual ~String() = default; 125 | }; 126 | 127 | class Number : public Token { 128 | int value_; 129 | 130 | public: 131 | int GetValue() { return value_; } 132 | Number(int value) : Token(NUM), value_(value) {} 133 | Number(const Number &) = default; 134 | Number &operator=(const Number &) = default; 135 | virtual std::string ToString() const override; 136 | virtual ~Number() = default; 137 | }; 138 | 139 | class Character : public Token { 140 | char ch_; 141 | 142 | public: 143 | char GetContent() { return ch_; } 144 | Character(char ch) : Token(CH), ch_(ch) {} 145 | Character(const Character &) = default; 146 | Character &operator=(const Character &) = default; 147 | virtual std::string ToString() const override; 148 | virtual ~Character() = default; 149 | }; 150 | 151 | } // namespace akan 152 | --------------------------------------------------------------------------------