├── src ├── Logger.h ├── Environment.h ├── parser │ ├── EvaGrammar.bnf │ └── EvaParser.h └── EvaLLVM.h ├── compile-run.sh ├── README.md ├── LICENSE ├── eva-llvm.cpp └── test.eva /src/Logger.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Programming Language with LLVM 3 | * 4 | * Course info: 5 | * http://dmitrysoshnikov.com/courses/programming-language-with-llvm/ 6 | * 7 | * (C) 2023-present Dmitry Soshnikov 8 | */ 9 | 10 | /** 11 | * Logger and error reporter. 12 | */ 13 | 14 | #ifndef Logger_h 15 | #define Logger_h 16 | 17 | #include 18 | #include 19 | 20 | class ErrorLogMessage : public std::basic_ostringstream { 21 | public: 22 | ~ErrorLogMessage() { 23 | std::cerr << "Fatal error: " << str().c_str(); 24 | exit(EXIT_FAILURE); 25 | } 26 | }; 27 | 28 | #define DIE ErrorLogMessage() 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /compile-run.sh: -------------------------------------------------------------------------------- 1 | # 2 | # Programming Language with LLVM 3 | # 4 | # Course info: http://dmitrysoshnikov.com/courses/programming-language-with-llvm/ 5 | # 6 | # (C) 2023-present Dmitry Soshnikov 7 | # 8 | 9 | # Compile main: 10 | clang++ -o eva-llvm `llvm-config --cxxflags --ldflags --system-libs --libs core` eva-llvm.cpp -fexceptions 11 | 12 | # Run main: 13 | ./eva-llvm 14 | 15 | # Execute generated IR: 16 | # lli ./out.ll 17 | 18 | # Optimize the output: 19 | opt ./out.ll -O3 -S -o ./out-opt.ll 20 | 21 | # Compile ./out.ll with GC: 22 | # 23 | # Note: to install GC_malloc: 24 | # 25 | # brew install libgc 26 | # 27 | clang++ -O3 -I/usr/local/include/gc/ ./out-opt.ll /usr/local/lib/libgc.a -o ./out 28 | 29 | # Run the compiled program: 30 | ./out 31 | 32 | # Print result: 33 | echo $? 34 | 35 | printf "\n" 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Programming Language with LLVM 2 | 3 | This is a repository for the [Programming Language with LLVM](http://dmitrysoshnikov.com/courses/programming-language-with-llvm/) course. 4 | 5 | As mentioned in the class description, we want our students to understand and implement every piece of detail from the VM themselves, instead of copy-pasting from the final solution. 6 | 7 | Therefore, the source code here provides only the overall structure of the project, leaving the missing parts as assignments. The _"Implement here..."_ comments with references to appropriate video lectures, show specific places which are needed to be completed in order to finalize the full working interpreter. 8 | 9 | Example: 10 | 11 | ```cpp 12 | if (op == "var") { 13 | // Implement here: see Lecture 14 14 | } 15 | ``` 16 | 17 | ## Enroll 18 | 19 | You can enroll to the full course here: 20 | 21 | - On [dmitrysoshnikov.education](https://www.dmitrysoshnikov.education/p/programming-language-with-llvm) school 22 | 23 | More details, classes, articles and info is on [http://dmitrysoshnikov.com](http://dmitrysoshnikov.com). -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Dmitry Soshnikov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /eva-llvm.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Programming Language with LLVM 3 | * 4 | * Course info: 5 | * http://dmitrysoshnikov.com/courses/programming-language-with-llvm/ 6 | * 7 | * (C) 2023-present Dmitry Soshnikov 8 | */ 9 | 10 | /** 11 | * Eva LLVM executable. 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include "./src/EvaLLVM.h" 19 | 20 | void printHelp() { 21 | std::cout << "\nUsage: eva-llvm [options]\n\n" 22 | << "Options:\n" 23 | << " -e, --expression Expression to parse\n" 24 | << " -f, --file File to parse\n\n"; 25 | } 26 | 27 | int main(int argc, char const *argv[]) { 28 | if (argc != 3) { 29 | printHelp(); 30 | return 0; 31 | } 32 | 33 | /** 34 | * Expression mode. 35 | */ 36 | std::string mode = argv[1]; 37 | 38 | /** 39 | * Program to execute. 40 | */ 41 | std::string program; 42 | 43 | /** 44 | * Simple expression. 45 | */ 46 | if (mode == "-e") { 47 | program = argv[2]; 48 | } 49 | 50 | /** 51 | * Eva file. 52 | */ 53 | else if (mode == "-f") { 54 | // Read the file: 55 | std::ifstream programFile(argv[2]); 56 | std::stringstream buffer; 57 | buffer << programFile.rdbuf() << "\n"; 58 | 59 | // Program: 60 | program = buffer.str(); 61 | } 62 | 63 | /** 64 | * Compiler instance. 65 | */ 66 | EvaLLVM vm; 67 | 68 | /** 69 | * Generate LLVM IR. 70 | */ 71 | vm.exec(program); 72 | 73 | return 0; 74 | } -------------------------------------------------------------------------------- /src/Environment.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Programming Language with LLVM 3 | * 4 | * Course info: 5 | * http://dmitrysoshnikov.com/courses/programming-language-with-llvm/ 6 | * 7 | * (C) 2023-present Dmitry Soshnikov 8 | */ 9 | 10 | /** 11 | * Environment class (aka Symbol Table). 12 | */ 13 | 14 | #ifndef Environment_h 15 | #define Environment_h 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include "./Logger.h" 22 | #include "llvm/IR/Value.h" 23 | 24 | /** 25 | * Environment: names storage. 26 | */ 27 | class Environment : public std::enable_shared_from_this { 28 | public: 29 | /** 30 | * Creates an environment with the given record. 31 | */ 32 | Environment(std::map record, 33 | std::shared_ptr parent) 34 | : record_(record), parent_(parent) {} 35 | 36 | /** 37 | * Creates a variable with the given name and value. 38 | */ 39 | llvm::Value* define(const std::string& name, llvm::Value* value) { 40 | // Implement here... 41 | } 42 | 43 | /** 44 | * Returns the value of a defined variable, or throws 45 | * if the variable is not defined. 46 | */ 47 | llvm::Value* lookup(const std::string& name) { 48 | return resolve(name)->record_[name]; 49 | } 50 | 51 | private: 52 | /** 53 | * Returns specific environment in which a variable is defined, or 54 | * throws if a variable is not defined. 55 | */ 56 | std::shared_ptr resolve(const std::string& name) { 57 | // Implement here... 58 | } 59 | 60 | /** 61 | * Bindings storage 62 | */ 63 | std::map record_; 64 | 65 | /** 66 | * Parent link 67 | */ 68 | std::shared_ptr parent_; 69 | }; 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /test.eva: -------------------------------------------------------------------------------- 1 | /** 2 | * Parent class. 3 | */ 4 | (class Point null 5 | (begin 6 | 7 | (var x 0) 8 | (var y 0) 9 | 10 | (def constructor (self x y) 11 | (begin 12 | (set (prop self x) x) 13 | (set (prop self y) y))) 14 | 15 | (def calc (self) 16 | (+ (prop self x) (prop self y))) 17 | 18 | ) 19 | ) 20 | 21 | /** 22 | * Child class. 23 | */ 24 | (class Point3D Point 25 | (begin 26 | 27 | (var z 100) 28 | 29 | (def constructor (self x y z) 30 | (begin 31 | ((method (super Point3D) constructor) self x y) 32 | (set (prop self z) z))) 33 | 34 | (def calc (self) 35 | (+ ((method (super Point3D) calc) self) (prop self z))) 36 | 37 | 38 | )) 39 | 40 | (var p1 (new Point 10 20)) 41 | (var p2 (new Point3D 100 200 300)) 42 | 43 | (printf "p2.x = %d\n" (prop p2 x)) 44 | (printf "p2.y = %d\n" (prop p2 y)) 45 | (printf "p2.z = %d\n" (prop p2 z)) 46 | 47 | (printf "Point3D.calc result = %d\n" ((method p2 calc) p2)) 48 | 49 | (def check ((obj Point)) 50 | (begin 51 | ((method obj calc) obj))) 52 | 53 | (check p1) // Point.calc 54 | (check p2) // Point3D.calc 55 | 56 | /** 57 | 58 | What's next? 59 | 60 | 1. Optimizing compiler: 61 | 62 | opt [...] 63 | 64 | 2. Arrays / lists 65 | 66 | (list 1 2 3) -> llvm::ArrayType 67 | 68 | 3. Custom Garbage Collector hooks -> https://llvm.org/docs/GarbageCollection.html + "Essentials of Garbage Collectors" 69 | 70 | 4. Interfaces: 71 | 72 | (interface Callable ... (def __call__ (self) throw) ) 73 | 74 | (class Transformer Callable ...) 75 | 76 | 5. Rest arguments: 77 | 78 | (interface Callable ... (def __call__ (self ...) throw) ) 79 | 80 | 6. Opaque pointers: i32* -> ptr, i8* -> ptr, etc 81 | 82 | 7. LLVM IR & MLIR 83 | 84 | 8. (async def fetch (...) ...) -> (await fetch ...) 85 | 86 | 87 | */ -------------------------------------------------------------------------------- /src/parser/EvaGrammar.bnf: -------------------------------------------------------------------------------- 1 | /** 2 | * Eva grammar (S-expression). 3 | * 4 | * syntax-cli -g src/parser/EvaGrammar.bnf -m LALR1 -o src/parser/EvaParser.h 5 | * 6 | * Examples: 7 | * 8 | * Atom: 42, foo, bar, "Hello World" 9 | * 10 | * List: (), (+ 5 x), (print "hello") 11 | */ 12 | 13 | // ----------------------------------------------- 14 | // Lexical grammar (tokens): 15 | 16 | %lex 17 | 18 | %% 19 | 20 | \/\/.* %empty 21 | \/\*[\s\S]*?\*\/ %empty 22 | 23 | \s+ %empty 24 | 25 | \"[^\"]*\" STRING 26 | 27 | \d+ NUMBER 28 | 29 | [\w\-+*=!<>/]+ SYMBOL 30 | 31 | /lex 32 | 33 | // ----------------------------------------------- 34 | // Syntactic grammar (BNF): 35 | 36 | %{ 37 | 38 | #include 39 | #include 40 | 41 | /** 42 | * Expression type. 43 | */ 44 | enum class ExpType { 45 | NUMBER, 46 | STRING, 47 | SYMBOL, 48 | LIST, 49 | }; 50 | 51 | /** 52 | * Expression. 53 | */ 54 | struct Exp { 55 | ExpType type; 56 | 57 | int number; 58 | std::string string; 59 | std::vector list; 60 | 61 | // Numbers: 62 | Exp(int number) : type(ExpType::NUMBER), number(number) {} 63 | 64 | // Strings, Symbols: 65 | Exp(std::string& strVal) { 66 | if (strVal[0] == '"') { 67 | type = ExpType::STRING; 68 | string = strVal.substr(1, strVal.size() - 2); 69 | } else { 70 | type = ExpType::SYMBOL; 71 | string = strVal; 72 | } 73 | } 74 | 75 | // Lists: 76 | Exp(std::vector list) : type(ExpType::LIST), list(list) {} 77 | 78 | }; 79 | 80 | using Value = Exp; 81 | 82 | %} 83 | 84 | %% 85 | 86 | Exp 87 | : Atom 88 | | List 89 | ; 90 | 91 | Atom 92 | : NUMBER { $$ = Exp(std::stoi($1)) } 93 | | STRING { $$ = Exp($1) } 94 | | SYMBOL { $$ = Exp($1) } 95 | ; 96 | 97 | List 98 | : '(' ListEntries ')' { $$ = $2 } 99 | ; 100 | 101 | ListEntries 102 | : %empty { $$ = Exp(std::vector{}) } 103 | | ListEntries Exp { $1.list.push_back($2); $$ = $1 } 104 | ; 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /src/EvaLLVM.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Programming Language with LLVM 3 | * 4 | * Course info: 5 | * http://dmitrysoshnikov.com/courses/programming-language-with-llvm/ 6 | * 7 | * (C) 2023-present Dmitry Soshnikov 8 | */ 9 | 10 | /** 11 | * Eva to LLVR IR compiler. 12 | */ 13 | 14 | #ifndef EvaLLVM_h 15 | #define EvaLLVM_h 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "llvm/IR/IRBuilder.h" 23 | #include "llvm/IR/LLVMContext.h" 24 | #include "llvm/IR/Module.h" 25 | #include "llvm/IR/Verifier.h" 26 | 27 | #include "./Environment.h" 28 | #include "./Logger.h" 29 | #include "./parser/EvaParser.h" 30 | 31 | using syntax::EvaParser; 32 | 33 | /** 34 | * Environment type. 35 | */ 36 | using Env = std::shared_ptr; 37 | 38 | /** 39 | * Class info. Contains struct type and field names. 40 | */ 41 | struct ClassInfo { 42 | llvm::StructType* cls; 43 | llvm::StructType* parent; 44 | std::map fieldsMap; 45 | std::map methodsMap; 46 | }; 47 | 48 | /** 49 | * Index of the vTable in the class fields. 50 | */ 51 | static const size_t VTABLE_INDEX = 0; 52 | 53 | /** 54 | * Each class has set of reserved fields at the 55 | * beginning of its layout. Currently it's only 56 | * the vTable used to resolve methods. 57 | */ 58 | static const size_t RESERVED_FIELDS_COUNT = 1; 59 | 60 | // Generic binary operator: 61 | #define GEN_BINARY_OP(Op, varName) \ 62 | do { \ 63 | auto op1 = gen(exp.list[1], env); \ 64 | auto op2 = gen(exp.list[2], env); \ 65 | return builder->Op(op1, op2, varName); \ 66 | } while (false) 67 | 68 | class EvaLLVM { 69 | public: 70 | EvaLLVM() : parser(std::make_unique()) { 71 | moduleInit(); 72 | setupExternFunctions(); 73 | setupGlobalEnvironment(); 74 | setupTargetTriple(); 75 | } 76 | 77 | /** 78 | * Executes a program. 79 | */ 80 | void exec(const std::string& program) { 81 | // 1. Parse the program 82 | auto ast = parser->parse("(begin " + program + ")"); 83 | 84 | // 2. Compile to LLVM IR: 85 | compile(ast); 86 | 87 | // Print generated code. 88 | module->print(llvm::outs(), nullptr); 89 | 90 | std::cout << "\n"; 91 | 92 | // 3. Save module IR to file: 93 | saveModuleToFile("./out.ll"); 94 | } 95 | 96 | private: 97 | /** 98 | * Compiles an expression. 99 | */ 100 | void compile(const Exp& ast) { 101 | // 1. Create main function: 102 | fn = createFunction( 103 | "main", 104 | llvm::FunctionType::get(/* return type */ builder->getInt32Ty(), 105 | /* vararg */ false), 106 | GlobalEnv); 107 | 108 | createGlobalVar("VERSION", builder->getInt32(42)); 109 | 110 | // 2. Compile main body: 111 | gen(ast, GlobalEnv); 112 | 113 | builder->CreateRet(builder->getInt32(0)); 114 | } 115 | 116 | /** 117 | * Main compile loop. 118 | */ 119 | llvm::Value* gen(const Exp& exp, Env env) { 120 | switch (exp.type) { 121 | /** 122 | * ---------------------------------------------- 123 | * Numbers. 124 | */ 125 | case ExpType::NUMBER: 126 | // Implement here... 127 | 128 | /** 129 | * ---------------------------------------------- 130 | * Strings. 131 | */ 132 | case ExpType::STRING: { 133 | // Implement here... 134 | } 135 | 136 | /** 137 | * ---------------------------------------------- 138 | * Symbols (variables, operators). 139 | */ 140 | case ExpType::SYMBOL: 141 | /** 142 | * Boolean. 143 | */ 144 | if (exp.string == "true" || exp.string == "false") { 145 | // Implement here... 146 | } else { 147 | // Variables and functions: 148 | 149 | // Implement here... 150 | } 151 | 152 | /** 153 | * ---------------------------------------------- 154 | * Lists. 155 | */ 156 | case ExpType::LIST: 157 | auto tag = exp.list[0]; 158 | 159 | /** 160 | * ---------------------------------------------- 161 | * Special cases. 162 | */ 163 | if (tag.type == ExpType::SYMBOL) { 164 | auto op = tag.string; 165 | 166 | // -------------------------------------------- 167 | // Binary math operations: 168 | 169 | if (op == "+") { 170 | // Implement here... 171 | } 172 | 173 | else if (op == "-") { 174 | // Implement here... 175 | } 176 | 177 | else if (op == "*") { 178 | // Implement here... 179 | } 180 | 181 | else if (op == "/") { 182 | // Implement here... 183 | } 184 | 185 | // -------------------------------------------- 186 | // Compare operations: (> 5 10) 187 | 188 | // UGT - unsigned, greater than 189 | else if (op == ">") { 190 | // Implement here... 191 | } 192 | 193 | // ULT - unsigned, less than 194 | else if (op == "<") { 195 | // Implement here... 196 | } 197 | 198 | // EQ - equal 199 | else if (op == "==") { 200 | // Implement here... 201 | } 202 | 203 | // NE - not equal 204 | else if (op == "!=") { 205 | // Implement here... 206 | } 207 | 208 | // UGE - greater or equal 209 | else if (op == ">=") { 210 | // Implement here... 211 | } 212 | 213 | // ULE - less or equal 214 | else if (op == "<=") { 215 | // Implement here... 216 | } 217 | 218 | // -------------------------------------------- 219 | // Branch instruction: 220 | 221 | /** 222 | * (if ) 223 | */ 224 | else if (op == "if") { 225 | // Compile : 226 | auto cond = gen(exp.list[1], env); 227 | 228 | // Implement here... 229 | } 230 | 231 | // -------------------------------------------- 232 | // While loop: 233 | 234 | /** 235 | * (while ) 236 | * 237 | */ 238 | else if (op == "while") { 239 | // Implement here... 240 | } 241 | 242 | // -------------------------------------------- 243 | // Function declaration: (def ) 244 | // 245 | 246 | else if (op == "def") { 247 | return compileFunction(exp, /* name */ exp.list[1].string, env); 248 | } 249 | 250 | // -------------------------------------------- 251 | // Variable declaration: (var x (+ y 10)) 252 | // 253 | // Typed: (var (x number) 42) 254 | // 255 | // Note: locals are allocated on the stack. 256 | 257 | if (op == "var") { 258 | // Implement here... 259 | } 260 | 261 | // -------------------------------------------- 262 | // Variable update: (set x 100) 263 | // Property update (set (prop self x) 100) 264 | 265 | else if (op == "set") { 266 | // Implement here... 267 | } 268 | 269 | // -------------------------------------------- 270 | // Blocks: (begin ) 271 | 272 | else if (op == "begin") { 273 | // Block scope: 274 | auto blockEnv = std::make_shared( 275 | std::map{}, env); 276 | 277 | // Compile each expression within the block. 278 | // Result is the last evaluated expression. 279 | llvm::Value* blockRes; 280 | 281 | for (auto i = 1; i < exp.list.size(); i++) { 282 | // Generate expression code. 283 | blockRes = gen(exp.list[i], blockEnv); 284 | } 285 | 286 | return blockRes; 287 | } 288 | 289 | // -------------------------------------------- 290 | // printf extern function: 291 | // 292 | // (printf "Value: %d" 42) 293 | // 294 | 295 | else if (op == "printf") { 296 | // Implement here... 297 | } 298 | 299 | // -------------------------------------------- 300 | // Class declaration: 301 | // 302 | // (class A ) 303 | // 304 | 305 | else if (op == "class") { 306 | // Implement here... 307 | } 308 | 309 | // -------------------------------------------- 310 | // New operator: 311 | // 312 | // (new ) 313 | // 314 | 315 | else if (op == "new") { 316 | return createInstance(exp, env, ""); 317 | } 318 | 319 | // -------------------------------------------- 320 | // Prop access: 321 | // 322 | // (prop ) 323 | // 324 | 325 | else if (op == "prop") { 326 | // Implement here... 327 | } 328 | 329 | // -------------------------------------------- 330 | // Method access: 331 | // 332 | // (method ) 333 | // 334 | // (method (super ) ) 335 | // 336 | 337 | else if (op == "method") { 338 | // Implement here... 339 | } 340 | 341 | // -------------------------------------------- 342 | // Function calls: 343 | // 344 | // (square 2) 345 | 346 | else { 347 | auto callable = gen(exp.list[0], env); 348 | 349 | // Implement here... 350 | } 351 | } 352 | 353 | // -------------------------------------------- 354 | // Method calls: 355 | // 356 | // ((method p getX) p 2) 357 | 358 | else { 359 | auto loadedMethod = (llvm::LoadInst*)gen(exp.list[0], env); 360 | 361 | auto fnTy = (llvm::FunctionType*)(loadedMethod->getPointerOperand() 362 | ->getType() 363 | ->getContainedType(0) 364 | ->getContainedType(0)); 365 | 366 | // Implement here... 367 | } 368 | 369 | break; 370 | } 371 | 372 | // Unreachable: 373 | return builder->getInt32(0); 374 | } 375 | 376 | /** 377 | * Returns field index. 378 | */ 379 | size_t getFieldIndex(llvm::StructType* cls, const std::string& fieldName) { 380 | auto fields = &classMap_[cls->getName().data()].fieldsMap; 381 | auto it = fields->find(fieldName); 382 | return std::distance(fields->begin(), it) + RESERVED_FIELDS_COUNT; 383 | } 384 | 385 | /** 386 | * Returns method index. 387 | */ 388 | size_t getMethodIndex(llvm::StructType* cls, const std::string& methodName) { 389 | // Implement here... 390 | } 391 | 392 | /** 393 | * Creates an instance of a class. 394 | */ 395 | llvm::Value* createInstance(const Exp& exp, Env env, 396 | const std::string& name) { 397 | // Implement here... 398 | } 399 | 400 | /** 401 | * Allocates an object of a given class on the heap. 402 | */ 403 | llvm::Value* mallocInstance(llvm::StructType* cls, const std::string& name) { 404 | // Implement here... 405 | } 406 | 407 | /** 408 | * Returns size of a type in bytes. 409 | */ 410 | size_t getTypeSize(llvm::Type* type_) { 411 | return module->getDataLayout().getTypeAllocSize(type_); 412 | } 413 | 414 | /** 415 | * Inherits parent class fields. 416 | */ 417 | void inheritClass(llvm::StructType* cls, llvm::StructType* parent) { 418 | // Implement here... 419 | } 420 | 421 | /** 422 | * Extracts fields and methods from a class expression. 423 | */ 424 | void buildClassInfo(llvm::StructType* cls, const Exp& clsExp, Env env) { 425 | // Implement here... 426 | } 427 | 428 | /** 429 | * Builds class body from class info. 430 | */ 431 | void buildClassBody(llvm::StructType* cls) { 432 | // Implement here... 433 | } 434 | 435 | /** 436 | * Creates a vtable per class. 437 | * 438 | * vTable stores method references to support 439 | * inheritance and methods overloading. 440 | */ 441 | void buildVTable(llvm::StructType* cls) { 442 | // Implement here... 443 | } 444 | 445 | /** 446 | * Tagged lists. 447 | */ 448 | bool isTaggedList(const Exp& exp, const std::string& tag) { 449 | return exp.type == ExpType::LIST && exp.list[0].type == ExpType::SYMBOL && 450 | exp.list[0].string == tag; 451 | } 452 | 453 | /** 454 | * (var ...) 455 | */ 456 | bool isVar(const Exp& exp) { return isTaggedList(exp, "var"); } 457 | 458 | /** 459 | * (def ...) 460 | */ 461 | bool isDef(const Exp& exp) { return isTaggedList(exp, "def"); } 462 | 463 | /** 464 | * (new ...) 465 | */ 466 | bool isNew(const Exp& exp) { return isTaggedList(exp, "new"); } 467 | 468 | /** 469 | * (prop ...) 470 | */ 471 | bool isProp(const Exp& exp) { return isTaggedList(exp, "prop"); } 472 | 473 | /** 474 | * (super ...) 475 | */ 476 | bool isSuper(const Exp& exp) { return isTaggedList(exp, "super"); } 477 | 478 | /** 479 | * Returns a type by name. 480 | */ 481 | llvm::StructType* getClassByName(const std::string& name) { 482 | return llvm::StructType::getTypeByName(*ctx, name); 483 | } 484 | 485 | /** 486 | * Extracts var or parameter name considering type. 487 | * 488 | * x -> x 489 | * (x number) -> x 490 | */ 491 | std::string extractVarName(const Exp& exp) { 492 | return exp.type == ExpType::LIST ? exp.list[0].string : exp.string; 493 | } 494 | 495 | /** 496 | * Extracts var or parameter type with i32 as default. 497 | * 498 | * x -> i32 499 | * (x number) -> number 500 | */ 501 | llvm::Type* extractVarType(const Exp& exp) { 502 | return exp.type == ExpType::LIST ? getTypeFromString(exp.list[1].string) 503 | : builder->getInt32Ty(); 504 | } 505 | 506 | /** 507 | * Returns LLVM type from string representation. 508 | */ 509 | llvm::Type* getTypeFromString(const std::string& type_) { 510 | // number -> i32 511 | if (type_ == "number") { 512 | return builder->getInt32Ty(); 513 | } 514 | 515 | // string -> i8* (aka char*) 516 | if (type_ == "string") { 517 | return builder->getInt8Ty()->getPointerTo(); 518 | } 519 | 520 | // Classes: 521 | return classMap_[type_].cls->getPointerTo(); 522 | } 523 | 524 | /** 525 | * Whether function has return type defined. 526 | */ 527 | bool hasReturnType(const Exp& fnExp) { 528 | return fnExp.list[3].type == ExpType::SYMBOL && 529 | fnExp.list[3].string == "->"; 530 | } 531 | 532 | /** 533 | * Exp function to LLVM function params. 534 | * 535 | * (def square ((number x)) -> number ...) 536 | * 537 | * llvm::FunctionType::get(returnType, paramTypes, false); 538 | */ 539 | llvm::FunctionType* extractFunctionType(const Exp& fnExp) { 540 | auto params = fnExp.list[2]; 541 | 542 | // Return type: 543 | auto returnType = hasReturnType(fnExp) 544 | ? getTypeFromString(fnExp.list[4].string) 545 | : builder->getInt32Ty(); 546 | 547 | // Parameter types: 548 | std::vector paramTypes{}; 549 | 550 | for (auto& param : params.list) { 551 | auto paramName = extractVarName(param); 552 | auto paramTy = extractVarType(param); 553 | 554 | // The `self` name is special, meaning instance of a class: 555 | paramTypes.push_back( 556 | paramName == "self" ? (llvm::Type*)cls->getPointerTo() : paramTy); 557 | } 558 | 559 | return llvm::FunctionType::get(returnType, paramTypes, /* varargs */ false); 560 | } 561 | 562 | /** 563 | * Compiles a function. 564 | * 565 | * Untyped: (def square (x) (* x x)) - i32 by default 566 | * 567 | * Typed: (def square ((x number)) -> number (* x x)) 568 | */ 569 | llvm::Value* compileFunction(const Exp& fnExp, std::string fnName, Env env) { 570 | // Implement here... 571 | } 572 | 573 | /** 574 | * Allocates a local variable on the stack. Result is the alloca instruction. 575 | */ 576 | llvm::Value* allocVar(const std::string& name, llvm::Type* type_, Env env) { 577 | varsBuilder->SetInsertPoint(&fn->getEntryBlock()); 578 | 579 | auto varAlloc = varsBuilder->CreateAlloca(type_, 0, name.c_str()); 580 | 581 | // Add to the environment: 582 | env->define(name, varAlloc); 583 | 584 | return varAlloc; 585 | } 586 | 587 | /** 588 | * Creates a global variable. 589 | */ 590 | llvm::GlobalVariable* createGlobalVar(const std::string& name, 591 | llvm::Constant* init) { 592 | // Implement here... 593 | } 594 | 595 | /** 596 | * Define external functions (from libc++) 597 | */ 598 | void setupExternFunctions() { 599 | // Implement here... 600 | } 601 | 602 | /** 603 | * Creates a function. 604 | */ 605 | llvm::Function* createFunction(const std::string& fnName, 606 | llvm::FunctionType* fnType, Env env) { 607 | // Implement here... 608 | } 609 | 610 | /** 611 | * Creates function prototype (defines the function, but not the body) 612 | */ 613 | llvm::Function* createFunctionProto(const std::string& fnName, 614 | llvm::FunctionType* fnType, Env env) { 615 | auto fn = llvm::Function::Create(fnType, llvm::Function::ExternalLinkage, 616 | fnName, *module); 617 | verifyFunction(*fn); 618 | 619 | // Install in the environment: 620 | env->define(fnName, fn); 621 | 622 | return fn; 623 | } 624 | 625 | /** 626 | * Creates function block. 627 | */ 628 | void createFunctionBlock(llvm::Function* fn) { 629 | // Implement here... 630 | } 631 | 632 | /** 633 | * Creates a basic block. If the `fn` is passed, the block is 634 | * automatically appended to the parent function. Otherwise, 635 | * the block should later be appended manually via 636 | * fn->getBasicBlockList().push_back(block); 637 | */ 638 | llvm::BasicBlock* createBB(std::string name, llvm::Function* fn = nullptr) { 639 | return llvm::BasicBlock::Create(*ctx, name, fn); 640 | } 641 | 642 | /** 643 | * Saves IR to file. 644 | */ 645 | void saveModuleToFile(const std::string& fileName) { 646 | std::error_code errorCode; 647 | llvm::raw_fd_ostream outLL(fileName, errorCode); 648 | module->print(outLL, nullptr); 649 | } 650 | 651 | /** 652 | * Initialize the module. 653 | */ 654 | void moduleInit() { 655 | // Open a new context and module. 656 | ctx = std::make_unique(); 657 | module = std::make_unique("EvaLLVM", *ctx); 658 | 659 | // Create a new builder for the module. 660 | builder = std::make_unique>(*ctx); 661 | 662 | // Vars builder: 663 | varsBuilder = std::make_unique>(*ctx); 664 | } 665 | 666 | /** 667 | * Sets up The Global Environment. 668 | */ 669 | void setupGlobalEnvironment() { 670 | // Implement here... 671 | } 672 | 673 | /** 674 | * Sets up target triple. 675 | */ 676 | void setupTargetTriple() { 677 | // llvm::sys::getDefaultTargetTriple() 678 | // Implement here... 679 | } 680 | 681 | /** 682 | * Parser. 683 | */ 684 | std::unique_ptr parser; 685 | 686 | /** 687 | * Global Environment (symbol table). 688 | */ 689 | std::shared_ptr GlobalEnv; 690 | 691 | /** 692 | * Currently compiling class. 693 | */ 694 | llvm::StructType* cls = nullptr; 695 | 696 | /** 697 | * Class info. 698 | */ 699 | std::map classMap_; 700 | 701 | /** 702 | * Currently compiling function. 703 | */ 704 | llvm::Function* fn; 705 | 706 | /** 707 | * Global LLVM context. 708 | * It owns and manages the core "global" data of LLVM's core 709 | * infrastructure, including the type and constant unique tables. 710 | */ 711 | std::unique_ptr ctx; 712 | 713 | /** 714 | * A Module instance is used to store all the information related to an 715 | * LLVM module. Modules are the top level container of all other LLVM 716 | * Intermediate Representation (IR) objects. Each module directly contains a 717 | * list of globals variables, a list of functions, a list of libraries (or 718 | * other modules) this module depends on, a symbol table, and various data 719 | * about the target's characteristics. 720 | * 721 | * A module maintains a GlobalList object that is used to hold all 722 | * constant references to global variables in the module. When a global 723 | * variable is destroyed, it should have no entries in the GlobalList. 724 | * The main container class for the LLVM Intermediate Representation. 725 | */ 726 | std::unique_ptr module; 727 | 728 | /** 729 | * Extra builder for variables declaration. 730 | * This builder always prepends to the beginning of the 731 | * function entry block. 732 | */ 733 | std::unique_ptr> varsBuilder; 734 | 735 | /** 736 | * IR Builder. 737 | * 738 | * This provides a uniform API for creating instructions and inserting 739 | * them into a basic block: either at the end of a BasicBlock, or at a 740 | * specific iterator location in a block. 741 | */ 742 | std::unique_ptr> builder; 743 | }; 744 | 745 | #endif -------------------------------------------------------------------------------- /src/parser/EvaParser.h: -------------------------------------------------------------------------------- 1 | /** 2 | * LR parser for C++ generated by the Syntax tool. 3 | * 4 | * https://www.npmjs.com/package/syntax-cli 5 | * 6 | * npm install -g syntax-cli 7 | * 8 | * syntax-cli --help 9 | * 10 | * To regenerate run: 11 | * 12 | * syntax-cli \ 13 | * --grammar ~/path-to-grammar-file \ 14 | * --mode \ 15 | * --output ~/ParserClassName.h 16 | */ 17 | #ifndef __Syntax_LR_Parser_h 18 | #define __Syntax_LR_Parser_h 19 | 20 | #pragma clang diagnostic push 21 | #pragma clang diagnostic ignored "-Wunused-private-field" 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | // ------------------------------------ 34 | // Module include prologue. 35 | // 36 | // Should include at least value/result type: 37 | // 38 | // type Value = <...>; 39 | // 40 | // Or struct Value { ... }; 41 | // 42 | // Can also include parsing hooks: 43 | // 44 | // void onParseBegin(const Parser& parser, const std::string& str) { 45 | // ... 46 | // } 47 | // 48 | // void onParseBegin(const Parser& parser, const Value& result) { 49 | // ... 50 | // } 51 | // 52 | // clang-format off 53 | #include 54 | #include 55 | 56 | /** 57 | * Expression type. 58 | */ 59 | enum class ExpType { 60 | NUMBER, 61 | STRING, 62 | SYMBOL, 63 | LIST, 64 | }; 65 | 66 | /** 67 | * Expression. 68 | */ 69 | struct Exp { 70 | ExpType type; 71 | 72 | int number; 73 | std::string string; 74 | std::vector list; 75 | 76 | // Numbers: 77 | Exp(int number) : type(ExpType::NUMBER), number(number) {} 78 | 79 | // Strings, Symbols: 80 | Exp(std::string& strVal) { 81 | if (strVal[0] == '"') { 82 | type = ExpType::STRING; 83 | string = strVal.substr(1, strVal.size() - 2); 84 | } else { 85 | type = ExpType::SYMBOL; 86 | string = strVal; 87 | } 88 | } 89 | 90 | // Lists: 91 | Exp(std::vector list) : type(ExpType::LIST), list(list) {} 92 | 93 | }; 94 | 95 | using Value = Exp; // clang-format on 96 | 97 | namespace syntax { 98 | 99 | /** 100 | * Tokenizer class. 101 | */ 102 | // clang-format off 103 | /** 104 | * Generic tokenizer used by the parser in the Syntax tool. 105 | * 106 | * https://www.npmjs.com/package/syntax-cli 107 | */ 108 | 109 | #ifndef __Syntax_Tokenizer_h 110 | #define __Syntax_Tokenizer_h 111 | 112 | class Tokenizer; 113 | 114 | // ------------------------------------------------------------------ 115 | // TokenType. 116 | 117 | enum class TokenType { 118 | __EMPTY = -1, 119 | // clang-format off 120 | NUMBER = 4, 121 | STRING = 5, 122 | SYMBOL = 6, 123 | TOKEN_TYPE_7 = 7, 124 | TOKEN_TYPE_8 = 8, 125 | __EOF = 9 126 | // clang-format on 127 | }; 128 | 129 | // ------------------------------------------------------------------ 130 | // Token. 131 | 132 | struct Token { 133 | TokenType type; 134 | std::string value; 135 | 136 | int startOffset; 137 | int endOffset; 138 | int startLine; 139 | int endLine; 140 | int startColumn; 141 | int endColumn; 142 | }; 143 | 144 | using SharedToken = std::shared_ptr; 145 | 146 | typedef TokenType (*LexRuleHandler)(const Tokenizer&, const std::string&); 147 | 148 | // ------------------------------------------------------------------ 149 | // Lex rule: [regex, handler] 150 | 151 | struct LexRule { 152 | std::regex regex; 153 | LexRuleHandler handler; 154 | }; 155 | 156 | // ------------------------------------------------------------------ 157 | // Token. 158 | 159 | enum TokenizerState { 160 | // clang-format off 161 | INITIAL 162 | // clang-format on 163 | }; 164 | 165 | // ------------------------------------------------------------------ 166 | // Tokenizer. 167 | 168 | class Tokenizer { 169 | public: 170 | /** 171 | * Initializes a parsing string. 172 | */ 173 | void initString(const std::string& str) { 174 | str_ = str; 175 | 176 | // Initialize states. 177 | states_.clear(); 178 | states_.push_back(TokenizerState::INITIAL); 179 | 180 | cursor_ = 0; 181 | currentLine_ = 1; 182 | currentColumn_ = 0; 183 | currentLineBeginOffset_ = 0; 184 | 185 | tokenStartOffset_ = 0; 186 | tokenEndOffset_ = 0; 187 | tokenStartLine_ = 0; 188 | tokenEndLine_ = 0; 189 | tokenStartColumn_ = 0; 190 | tokenEndColumn_ = 0; 191 | } 192 | 193 | /** 194 | * Whether there are still tokens in the stream. 195 | */ 196 | inline bool hasMoreTokens() { return cursor_ <= str_.length(); } 197 | 198 | /** 199 | * Returns current tokenizing state. 200 | */ 201 | TokenizerState getCurrentState() { return states_.back(); } 202 | 203 | /** 204 | * Enters a new state pushing it on the states stack. 205 | */ 206 | void pushState(TokenizerState state) { states_.push_back(state); } 207 | 208 | /** 209 | * Alias for `push_state`. 210 | */ 211 | void begin(TokenizerState state) { states_.push_back(state); } 212 | 213 | /** 214 | * Exits a current state popping it from the states stack. 215 | */ 216 | TokenizerState popState() { 217 | auto state = states_.back(); 218 | states_.pop_back(); 219 | return state; 220 | } 221 | 222 | /** 223 | * Returns next token. 224 | */ 225 | SharedToken getNextToken() { 226 | if (!hasMoreTokens()) { 227 | yytext = __EOF; 228 | return toToken(TokenType::__EOF); 229 | } 230 | 231 | auto strSlice = str_.substr(cursor_); 232 | 233 | auto lexRulesForState = lexRulesByStartConditions_.at(getCurrentState()); 234 | 235 | for (const auto& ruleIndex : lexRulesForState) { 236 | auto rule = lexRules_[ruleIndex]; 237 | std::smatch sm; 238 | 239 | if (std::regex_search(strSlice, sm, rule.regex)) { 240 | yytext = sm[0]; 241 | 242 | captureLocations_(yytext); 243 | cursor_ += yytext.length(); 244 | 245 | // Manual handling of EOF token (the end of string). Return it 246 | // as `EOF` symbol. 247 | if (yytext.length() == 0) { 248 | cursor_++; 249 | } 250 | 251 | auto tokenType = rule.handler(*this, yytext); 252 | 253 | if (tokenType == TokenType::__EMPTY) { 254 | return getNextToken(); 255 | } 256 | 257 | return toToken(tokenType); 258 | } 259 | } 260 | 261 | if (isEOF()) { 262 | cursor_++; 263 | yytext = __EOF; 264 | return toToken(TokenType::__EOF); 265 | } 266 | 267 | throwUnexpectedToken(std::string(1, strSlice[0]), currentLine_, 268 | currentColumn_); 269 | } 270 | 271 | /** 272 | * Whether the cursor is at the EOF. 273 | */ 274 | inline bool isEOF() { return cursor_ == str_.length(); } 275 | 276 | SharedToken toToken(TokenType tokenType) { 277 | return std::shared_ptr(new Token{ 278 | .type = tokenType, 279 | .value = yytext, 280 | .startOffset = tokenStartOffset_, 281 | .endOffset = tokenEndOffset_, 282 | .startLine = tokenStartLine_, 283 | .endLine = tokenEndLine_, 284 | .startColumn = tokenStartColumn_, 285 | .endColumn = tokenEndColumn_, 286 | }); 287 | } 288 | 289 | /** 290 | * Throws default "Unexpected token" exception, showing the actual 291 | * line from the source, pointing with the ^ marker to the bad token. 292 | * In addition, shows `line:column` location. 293 | */ 294 | [[noreturn]] void throwUnexpectedToken(const std::string& symbol, int line, 295 | int column) { 296 | std::stringstream ss{str_}; 297 | std::string lineStr; 298 | int currentLine = 1; 299 | 300 | while (currentLine++ <= line) { 301 | std::getline(ss, lineStr, '\n'); 302 | } 303 | 304 | auto pad = std::string(column, ' '); 305 | 306 | std::stringstream errMsg; 307 | 308 | errMsg << "Syntax Error:\n\n" 309 | << lineStr << "\n" 310 | << pad << "^\nUnexpected token \"" << symbol << "\" at " << line 311 | << ":" << column << "\n\n"; 312 | 313 | std::cerr << errMsg.str(); 314 | throw new std::runtime_error(errMsg.str().c_str()); 315 | } 316 | 317 | /** 318 | * Matched text. 319 | */ 320 | std::string yytext; 321 | 322 | private: 323 | /** 324 | * Captures token locations. 325 | */ 326 | void captureLocations_(const std::string& matched) { 327 | auto len = matched.length(); 328 | 329 | // Absolute offsets. 330 | tokenStartOffset_ = cursor_; 331 | 332 | // Line-based locations, start. 333 | tokenStartLine_ = currentLine_; 334 | tokenStartColumn_ = tokenStartOffset_ - currentLineBeginOffset_; 335 | 336 | // Extract `\n` in the matched token. 337 | std::stringstream ss{matched}; 338 | std::string lineStr; 339 | std::getline(ss, lineStr, '\n'); 340 | while (ss.tellg() > 0 && ss.tellg() <= len) { 341 | currentLine_++; 342 | currentLineBeginOffset_ = tokenStartOffset_ + ss.tellg(); 343 | std::getline(ss, lineStr, '\n'); 344 | } 345 | 346 | tokenEndOffset_ = cursor_ + len; 347 | 348 | // Line-based locations, end. 349 | tokenEndLine_ = currentLine_; 350 | tokenEndColumn_ = tokenEndOffset_ - currentLineBeginOffset_; 351 | currentColumn_ = tokenEndColumn_; 352 | } 353 | 354 | /** 355 | * Lexical rules. 356 | */ 357 | // clang-format off 358 | static constexpr size_t LEX_RULES_COUNT = 8; 359 | static std::array lexRules_; 360 | static std::map> lexRulesByStartConditions_; 361 | // clang-format on 362 | 363 | /** 364 | * Special EOF token. 365 | */ 366 | static std::string __EOF; 367 | 368 | /** 369 | * Tokenizing string. 370 | */ 371 | std::string str_; 372 | 373 | /** 374 | * Cursor for current symbol. 375 | */ 376 | int cursor_; 377 | 378 | /** 379 | * States. 380 | */ 381 | std::vector states_; 382 | 383 | /** 384 | * Line-based location tracking. 385 | */ 386 | int currentLine_; 387 | int currentColumn_; 388 | int currentLineBeginOffset_; 389 | 390 | /** 391 | * Location data of a matched token. 392 | */ 393 | int tokenStartOffset_; 394 | int tokenEndOffset_; 395 | int tokenStartLine_; 396 | int tokenEndLine_; 397 | int tokenStartColumn_; 398 | int tokenEndColumn_; 399 | }; 400 | 401 | // ------------------------------------------------------------------ 402 | // Lexical rule handlers. 403 | 404 | std::string Tokenizer::__EOF("$"); 405 | 406 | // clang-format off 407 | inline TokenType _lexRule1(const Tokenizer& tokenizer, const std::string& yytext) { 408 | return TokenType::TOKEN_TYPE_7; 409 | } 410 | 411 | inline TokenType _lexRule2(const Tokenizer& tokenizer, const std::string& yytext) { 412 | return TokenType::TOKEN_TYPE_8; 413 | } 414 | 415 | inline TokenType _lexRule3(const Tokenizer& tokenizer, const std::string& yytext) { 416 | return TokenType::__EMPTY; 417 | } 418 | 419 | inline TokenType _lexRule4(const Tokenizer& tokenizer, const std::string& yytext) { 420 | return TokenType::__EMPTY; 421 | } 422 | 423 | inline TokenType _lexRule5(const Tokenizer& tokenizer, const std::string& yytext) { 424 | return TokenType::__EMPTY; 425 | } 426 | 427 | inline TokenType _lexRule6(const Tokenizer& tokenizer, const std::string& yytext) { 428 | return TokenType::STRING; 429 | } 430 | 431 | inline TokenType _lexRule7(const Tokenizer& tokenizer, const std::string& yytext) { 432 | return TokenType::NUMBER; 433 | } 434 | 435 | inline TokenType _lexRule8(const Tokenizer& tokenizer, const std::string& yytext) { 436 | return TokenType::SYMBOL; 437 | } 438 | // clang-format on 439 | 440 | // ------------------------------------------------------------------ 441 | // Lexical rules. 442 | 443 | // clang-format off 444 | std::array Tokenizer::lexRules_ = {{ 445 | {std::regex(R"(^\()"), &_lexRule1}, 446 | {std::regex(R"(^\))"), &_lexRule2}, 447 | {std::regex(R"(^\/\/.*)"), &_lexRule3}, 448 | {std::regex(R"(^\/\*[\s\S]*?\*\/)"), &_lexRule4}, 449 | {std::regex(R"(^\s+)"), &_lexRule5}, 450 | {std::regex(R"(^"[^\"]*")"), &_lexRule6}, 451 | {std::regex(R"(^\d+)"), &_lexRule7}, 452 | {std::regex(R"(^[\w\-+*=!<>/]+)"), &_lexRule8} 453 | }}; 454 | std::map> Tokenizer::lexRulesByStartConditions_ = {{TokenizerState::INITIAL, {0, 1, 2, 3, 4, 5, 6, 7}}}; 455 | // clang-format on 456 | 457 | #endif 458 | // clang-format on 459 | 460 | #define POP_V() \ 461 | parser.valuesStack.back(); \ 462 | parser.valuesStack.pop_back() 463 | 464 | #define POP_T() \ 465 | parser.tokensStack.back(); \ 466 | parser.tokensStack.pop_back() 467 | 468 | #define PUSH_VR() parser.valuesStack.push_back(__) 469 | #define PUSH_TR() parser.tokensStack.push_back(__) 470 | 471 | /** 472 | * Parsing table type. 473 | */ 474 | enum class TE { 475 | Accept, 476 | Shift, 477 | Reduce, 478 | Transit, 479 | }; 480 | 481 | /** 482 | * Parsing table entry. 483 | */ 484 | struct TableEntry { 485 | TE type; 486 | int value; 487 | }; 488 | 489 | // clang-format off 490 | class EvaParser; 491 | // clang-format on 492 | 493 | using yyparse = EvaParser; 494 | 495 | typedef void (*ProductionHandler)(yyparse&); 496 | 497 | /** 498 | * Encoded production. 499 | * 500 | * opcode - encoded index 501 | * rhsLength - length of the RHS to pop. 502 | */ 503 | struct Production { 504 | int opcode; 505 | int rhsLength; 506 | ProductionHandler handler; 507 | }; 508 | 509 | // Key: Encoded symbol (terminal or non-terminal) index 510 | // Value: TableEntry 511 | using Row = std::map; 512 | 513 | /** 514 | * Parser class. 515 | */ 516 | // clang-format off 517 | class EvaParser { 518 | // clang-format on 519 | public: 520 | /** 521 | * Parsing values stack. 522 | */ 523 | std::vector valuesStack; 524 | 525 | /** 526 | * Token values stack. 527 | */ 528 | std::vector tokensStack; 529 | 530 | /** 531 | * Parsing states stack. 532 | */ 533 | std::vector statesStack; 534 | 535 | /** 536 | * Tokenizer. 537 | */ 538 | Tokenizer tokenizer; 539 | 540 | /** 541 | * Previous state to calculate the next one. 542 | */ 543 | int previousState; 544 | 545 | /** 546 | * Parses a string. 547 | */ 548 | Value parse(const std::string& str) { 549 | // clang-format off 550 | 551 | // clang-format on 552 | 553 | // Initialize the tokenizer and the string. 554 | tokenizer.initString(str); 555 | 556 | // Initialize the stacks. 557 | valuesStack.clear(); 558 | tokensStack.clear(); 559 | statesStack.clear(); 560 | 561 | // Initial 0 state. 562 | statesStack.push_back(0); 563 | 564 | auto token = tokenizer.getNextToken(); 565 | auto shiftedToken = token; 566 | 567 | // Main parsing loop. 568 | for (;;) { 569 | auto state = statesStack.back(); 570 | auto column = (int)token->type; 571 | 572 | if (table_[state].count(column) == 0) { 573 | throwUnexpectedToken(token); 574 | } 575 | 576 | auto entry = table_[state].at(column); 577 | 578 | // Shift a token, go to state. 579 | if (entry.type == TE::Shift) { 580 | // Push token. 581 | tokensStack.push_back(token->value); 582 | 583 | // Push next state number: "s5" -> 5 584 | statesStack.push_back(entry.value); 585 | 586 | shiftedToken = token; 587 | token = tokenizer.getNextToken(); 588 | } 589 | 590 | // Reduce by production. 591 | else if (entry.type == TE::Reduce) { 592 | auto productionNumber = entry.value; 593 | auto production = productions_[productionNumber]; 594 | 595 | tokenizer.yytext = shiftedToken->value; 596 | 597 | auto rhsLength = production.rhsLength; 598 | while (rhsLength > 0) { 599 | statesStack.pop_back(); 600 | rhsLength--; 601 | } 602 | 603 | // Call the handler. 604 | production.handler(*this); 605 | 606 | auto previousState = statesStack.back(); 607 | 608 | auto symbolToReduceWith = production.opcode; 609 | auto nextStateEntry = table_[previousState].at(symbolToReduceWith); 610 | assert(nextStateEntry.type == TE::Transit); 611 | 612 | statesStack.push_back(nextStateEntry.value); 613 | } 614 | 615 | // Accept the string. 616 | else if (entry.type == TE::Accept) { 617 | // Pop state number. 618 | statesStack.pop_back(); 619 | 620 | // Pop the parsed value. 621 | // clang-format off 622 | auto result = valuesStack.back(); valuesStack.pop_back(); 623 | // clang-format on 624 | 625 | if (statesStack.size() != 1 || statesStack.back() != 0 || 626 | tokenizer.hasMoreTokens()) { 627 | throwUnexpectedToken(token); 628 | } 629 | 630 | statesStack.pop_back(); 631 | 632 | // clang-format off 633 | 634 | // clang-format on 635 | 636 | return result; 637 | } 638 | } 639 | } 640 | 641 | private: 642 | /** 643 | * Throws parser error on unexpected token. 644 | */ 645 | [[noreturn]] void throwUnexpectedToken(SharedToken token) { 646 | if (token->type == TokenType::__EOF && !tokenizer.hasMoreTokens()) { 647 | std::string errMsg = "Unexpected end of input.\n"; 648 | std::cerr << errMsg; 649 | throw std::runtime_error(errMsg.c_str()); 650 | } 651 | tokenizer.throwUnexpectedToken(token->value, token->startLine, 652 | token->startColumn); 653 | } 654 | 655 | // clang-format off 656 | static constexpr size_t PRODUCTIONS_COUNT = 9; 657 | static std::array productions_; 658 | 659 | static constexpr size_t ROWS_COUNT = 11; 660 | static std::array table_; 661 | // clang-format on 662 | }; 663 | 664 | // ------------------------------------------------------------------ 665 | // Productions. 666 | 667 | // clang-format off 668 | void _handler1(yyparse& parser) { 669 | // Semantic action prologue. 670 | auto _1 = POP_V(); 671 | 672 | auto __ = _1; 673 | 674 | // Semantic action epilogue. 675 | PUSH_VR(); 676 | 677 | } 678 | 679 | void _handler2(yyparse& parser) { 680 | // Semantic action prologue. 681 | auto _1 = POP_V(); 682 | 683 | auto __ = _1; 684 | 685 | // Semantic action epilogue. 686 | PUSH_VR(); 687 | 688 | } 689 | 690 | void _handler3(yyparse& parser) { 691 | // Semantic action prologue. 692 | auto _1 = POP_V(); 693 | 694 | auto __ = _1; 695 | 696 | // Semantic action epilogue. 697 | PUSH_VR(); 698 | 699 | } 700 | 701 | void _handler4(yyparse& parser) { 702 | // Semantic action prologue. 703 | auto _1 = POP_T(); 704 | 705 | auto __ = Exp(std::stoi(_1)) ; 706 | 707 | // Semantic action epilogue. 708 | PUSH_VR(); 709 | 710 | } 711 | 712 | void _handler5(yyparse& parser) { 713 | // Semantic action prologue. 714 | auto _1 = POP_T(); 715 | 716 | auto __ = Exp(_1) ; 717 | 718 | // Semantic action epilogue. 719 | PUSH_VR(); 720 | 721 | } 722 | 723 | void _handler6(yyparse& parser) { 724 | // Semantic action prologue. 725 | auto _1 = POP_T(); 726 | 727 | auto __ = Exp(_1) ; 728 | 729 | // Semantic action epilogue. 730 | PUSH_VR(); 731 | 732 | } 733 | 734 | void _handler7(yyparse& parser) { 735 | // Semantic action prologue. 736 | parser.tokensStack.pop_back(); 737 | auto _2 = POP_V(); 738 | parser.tokensStack.pop_back(); 739 | 740 | auto __ = _2 ; 741 | 742 | // Semantic action epilogue. 743 | PUSH_VR(); 744 | 745 | } 746 | 747 | void _handler8(yyparse& parser) { 748 | // Semantic action prologue. 749 | 750 | 751 | auto __ = Exp(std::vector{}) ; 752 | 753 | // Semantic action epilogue. 754 | PUSH_VR(); 755 | 756 | } 757 | 758 | void _handler9(yyparse& parser) { 759 | // Semantic action prologue. 760 | auto _2 = POP_V(); 761 | auto _1 = POP_V(); 762 | 763 | _1.list.push_back(_2); auto __ = _1 ; 764 | 765 | // Semantic action epilogue. 766 | PUSH_VR(); 767 | 768 | } 769 | // clang-format on 770 | 771 | // clang-format off 772 | std::array yyparse::productions_ = {{{-1, 1, &_handler1}, 773 | {0, 1, &_handler2}, 774 | {0, 1, &_handler3}, 775 | {1, 1, &_handler4}, 776 | {1, 1, &_handler5}, 777 | {1, 1, &_handler6}, 778 | {2, 3, &_handler7}, 779 | {3, 0, &_handler8}, 780 | {3, 2, &_handler9}}}; 781 | // clang-format on 782 | 783 | // ------------------------------------------------------------------ 784 | // Parsing table. 785 | 786 | // clang-format off 787 | std::array yyparse::table_ = { 788 | Row {{0, {TE::Transit, 1}}, {1, {TE::Transit, 2}}, {2, {TE::Transit, 3}}, {4, {TE::Shift, 4}}, {5, {TE::Shift, 5}}, {6, {TE::Shift, 6}}, {7, {TE::Shift, 7}}}, 789 | Row {{9, {TE::Accept, 0}}}, 790 | Row {{4, {TE::Reduce, 1}}, {5, {TE::Reduce, 1}}, {6, {TE::Reduce, 1}}, {7, {TE::Reduce, 1}}, {8, {TE::Reduce, 1}}, {9, {TE::Reduce, 1}}}, 791 | Row {{4, {TE::Reduce, 2}}, {5, {TE::Reduce, 2}}, {6, {TE::Reduce, 2}}, {7, {TE::Reduce, 2}}, {8, {TE::Reduce, 2}}, {9, {TE::Reduce, 2}}}, 792 | Row {{4, {TE::Reduce, 3}}, {5, {TE::Reduce, 3}}, {6, {TE::Reduce, 3}}, {7, {TE::Reduce, 3}}, {8, {TE::Reduce, 3}}, {9, {TE::Reduce, 3}}}, 793 | Row {{4, {TE::Reduce, 4}}, {5, {TE::Reduce, 4}}, {6, {TE::Reduce, 4}}, {7, {TE::Reduce, 4}}, {8, {TE::Reduce, 4}}, {9, {TE::Reduce, 4}}}, 794 | Row {{4, {TE::Reduce, 5}}, {5, {TE::Reduce, 5}}, {6, {TE::Reduce, 5}}, {7, {TE::Reduce, 5}}, {8, {TE::Reduce, 5}}, {9, {TE::Reduce, 5}}}, 795 | Row {{3, {TE::Transit, 8}}, {4, {TE::Reduce, 7}}, {5, {TE::Reduce, 7}}, {6, {TE::Reduce, 7}}, {7, {TE::Reduce, 7}}, {8, {TE::Reduce, 7}}}, 796 | Row {{0, {TE::Transit, 10}}, {1, {TE::Transit, 2}}, {2, {TE::Transit, 3}}, {4, {TE::Shift, 4}}, {5, {TE::Shift, 5}}, {6, {TE::Shift, 6}}, {7, {TE::Shift, 7}}, {8, {TE::Shift, 9}}}, 797 | Row {{4, {TE::Reduce, 6}}, {5, {TE::Reduce, 6}}, {6, {TE::Reduce, 6}}, {7, {TE::Reduce, 6}}, {8, {TE::Reduce, 6}}, {9, {TE::Reduce, 6}}}, 798 | Row {{4, {TE::Reduce, 8}}, {5, {TE::Reduce, 8}}, {6, {TE::Reduce, 8}}, {7, {TE::Reduce, 8}}, {8, {TE::Reduce, 8}}} 799 | }; 800 | // clang-format on 801 | 802 | } // namespace syntax 803 | 804 | #endif --------------------------------------------------------------------------------