├── .gitignore ├── Compiler ├── .clang-format ├── AST │ ├── ast_nodes.cpp │ └── ast_nodes.h ├── helpers │ ├── char_source.cpp │ ├── char_source.h │ ├── command_pipe.h │ ├── compile_exception.h │ ├── lib.h │ ├── mixed.h │ └── token.h └── src │ ├── compiler.cpp │ ├── compiler.h │ ├── lexer.cpp │ ├── lexer.h │ ├── main.cpp │ ├── parser.cpp │ ├── parser.h │ ├── translator.cpp │ └── translator.h ├── README.md ├── initializer.ps1 └── initializer.sh /.gitignore: -------------------------------------------------------------------------------- 1 | /Compiler/cmake-build-debug/ 2 | .init_compiler.swp 3 | /Compiler/cmake-build-debug 4 | /Compiler/.idea 5 | /cpm/* 6 | /.idea* 7 | -------------------------------------------------------------------------------- /Compiler/.clang-format: -------------------------------------------------------------------------------- 1 | # Generated from CLion C/C++ Code Style settings 2 | BasedOnStyle: LLVM 3 | AccessModifierOffset: -4 4 | AlignAfterOpenBracket: Align 5 | AlignConsecutiveAssignments: None 6 | AlignOperands: Align 7 | AllowAllArgumentsOnNextLine: false 8 | AllowAllConstructorInitializersOnNextLine: false 9 | AllowAllParametersOfDeclarationOnNextLine: false 10 | AllowShortBlocksOnASingleLine: Always 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: All 13 | AllowShortIfStatementsOnASingleLine: Always 14 | AllowShortLambdasOnASingleLine: All 15 | AllowShortLoopsOnASingleLine: true 16 | AlwaysBreakAfterReturnType: None 17 | AlwaysBreakTemplateDeclarations: Yes 18 | BreakBeforeBraces: Custom 19 | BraceWrapping: 20 | AfterCaseLabel: false 21 | AfterClass: false 22 | AfterControlStatement: Never 23 | AfterEnum: false 24 | AfterFunction: false 25 | AfterNamespace: false 26 | AfterUnion: false 27 | BeforeCatch: false 28 | BeforeElse: false 29 | IndentBraces: false 30 | SplitEmptyFunction: false 31 | SplitEmptyRecord: true 32 | BreakBeforeBinaryOperators: None 33 | BreakBeforeTernaryOperators: true 34 | BreakConstructorInitializers: BeforeColon 35 | BreakInheritanceList: BeforeColon 36 | ColumnLimit: 0 37 | CompactNamespaces: false 38 | ContinuationIndentWidth: 8 39 | IndentCaseLabels: true 40 | IndentPPDirectives: None 41 | IndentWidth: 4 42 | KeepEmptyLinesAtTheStartOfBlocks: true 43 | MaxEmptyLinesToKeep: 2 44 | NamespaceIndentation: All 45 | ObjCSpaceAfterProperty: false 46 | ObjCSpaceBeforeProtocolList: true 47 | PointerAlignment: Right 48 | ReflowComments: false 49 | SpaceAfterCStyleCast: true 50 | SpaceAfterLogicalNot: false 51 | SpaceAfterTemplateKeyword: false 52 | SpaceBeforeAssignmentOperators: true 53 | SpaceBeforeCpp11BracedList: false 54 | SpaceBeforeCtorInitializerColon: true 55 | SpaceBeforeInheritanceColon: true 56 | SpaceBeforeParens: ControlStatements 57 | SpaceBeforeRangeBasedForLoopColon: true 58 | SpaceInEmptyParentheses: false 59 | SpacesBeforeTrailingComments: 0 60 | SpacesInAngles: false 61 | SpacesInCStyleCastParentheses: false 62 | SpacesInContainerLiterals: false 63 | SpacesInParentheses: false 64 | SpacesInSquareBrackets: false 65 | TabWidth: 4 66 | UseTab: Never 67 | -------------------------------------------------------------------------------- /Compiler/AST/ast_nodes.cpp: -------------------------------------------------------------------------------- 1 | #include "ast_nodes.h" 2 | 3 | void statement_node::add_node(const std::shared_ptr &node) { 4 | expressions.push_back(node); 5 | } 6 | 7 | variable_node::variable_node(token _name) : variable(std::move(_name)) {} 8 | 9 | number_node::number_node(token _num) : number(std::move(_num)) {} 10 | 11 | string_node::string_node(token str) : string(std::move(str)) {} 12 | 13 | binary_operation_node::binary_operation_node(token op, std::shared_ptr lft, std::shared_ptr rht) 14 | : op(std::move(op)), left(std::move(lft)), right(std::move(rht)) {} 15 | 16 | function_call::function_call(token func, std::vector> _args) 17 | : function(std::move(func)), args(std::move(_args)) {} 18 | 19 | variable_declaration::variable_declaration(token mod, std::shared_ptr var, std::shared_ptr val) 20 | : modifer(std::move(mod)), variable(std::move(var)), value(std::move(val)) {} -------------------------------------------------------------------------------- /Compiler/AST/ast_nodes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "../helpers/token.h" 7 | 8 | struct expression_node { 9 | virtual ~expression_node() = default; 10 | }; 11 | 12 | struct statement_node { 13 | std::vector> expressions; 14 | 15 | void add_node(const std::shared_ptr &node); 16 | }; 17 | 18 | struct variable_node : expression_node { 19 | token variable; 20 | 21 | variable_node(token _name); 22 | }; 23 | 24 | struct number_node : expression_node { 25 | token number; 26 | 27 | number_node(token _num); 28 | }; 29 | 30 | struct string_node : expression_node { 31 | token string; 32 | 33 | string_node(token str); 34 | }; 35 | 36 | struct binary_operation_node : expression_node { 37 | token op; 38 | std::shared_ptr left; 39 | std::shared_ptr right; 40 | 41 | binary_operation_node(token op, std::shared_ptr lft, std::shared_ptr rht); 42 | }; 43 | 44 | struct function_call : expression_node { 45 | token function; 46 | std::vector> args; 47 | 48 | function_call(token func, std::vector> _args); 49 | }; 50 | 51 | struct variable_declaration : expression_node { 52 | token modifer; 53 | std::shared_ptr variable; 54 | std::shared_ptr value; 55 | 56 | variable_declaration(token mod, std::shared_ptr var, std::shared_ptr val); 57 | }; 58 | -------------------------------------------------------------------------------- /Compiler/helpers/char_source.cpp: -------------------------------------------------------------------------------- 1 | #include "char_source.h" 2 | 3 | char_source::char_source(const std::string &file_name) : _ifstream(file_name) {} 4 | 5 | char char_source::next() { 6 | char ch; 7 | _ifstream.get(ch); 8 | if (ch == '\n') { 9 | _number_of_lines++; 10 | } 11 | return ch; 12 | } 13 | 14 | void char_source::open(const std::string &file_name) { 15 | _ifstream.open(file_name); 16 | if (!_ifstream.is_open()) { 17 | throw compile_exception("Cannot open input file or does not have permission"); 18 | } 19 | } 20 | 21 | void char_source::back() { 22 | _ifstream.unget(); 23 | } 24 | 25 | bool char_source::has_next() { 26 | return _ifstream && _ifstream.peek() != EOF; 27 | } 28 | 29 | uint32_t char_source::char_pos() { 30 | return _ifstream.tellg(); 31 | } 32 | 33 | uint32_t char_source::lines_pos() { 34 | return _number_of_lines; 35 | } 36 | -------------------------------------------------------------------------------- /Compiler/helpers/char_source.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "compile_exception.h" 6 | 7 | struct char_source { 8 | char_source() = default; 9 | char_source(const std::string &file_name); 10 | 11 | char next(); 12 | 13 | void open(const std::string &file_name); 14 | 15 | void back(); 16 | 17 | bool has_next(); 18 | 19 | uint32_t char_pos(); 20 | 21 | uint32_t lines_pos(); 22 | 23 | private: 24 | uint32_t _number_of_lines = 1; 25 | std::ifstream _ifstream; 26 | }; 27 | -------------------------------------------------------------------------------- /Compiler/helpers/command_pipe.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "token.h" 8 | 9 | template 10 | struct command_pipe { 11 | std::queue queue; 12 | std::mutex m; 13 | std::condition_variable cv; 14 | 15 | void push(T tokens) { 16 | std::unique_lock lock(m); 17 | queue.push(std::move(tokens)); 18 | lock.unlock(); 19 | cv.notify_one(); 20 | } 21 | 22 | T pop() { 23 | std::unique_lock lock(m); 24 | while (queue.empty()) { 25 | cv.wait(lock); 26 | } 27 | auto res = queue.front(); 28 | queue.pop(); 29 | lock.unlock(); 30 | return res; 31 | } 32 | }; -------------------------------------------------------------------------------- /Compiler/helpers/compile_exception.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct compile_exception : std::exception { 10 | explicit compile_exception(std::string str) : msg(std::move(str)) {} 11 | 12 | 13 | const char *what() const noexcept override { 14 | return msg.c_str(); 15 | } 16 | 17 | private: 18 | std::string msg; 19 | }; -------------------------------------------------------------------------------- /Compiler/helpers/lib.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | constexpr char path_separator = std::filesystem::path::preferred_separator; -------------------------------------------------------------------------------- /Compiler/helpers/mixed.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | class mixed { 7 | public: 8 | std::variant storage; 9 | 10 | mixed() = default; 11 | 12 | mixed(std::string str) : storage(str) {} 13 | 14 | mixed(int num) : storage(num) {} 15 | 16 | mixed(double num) : storage(num) {} 17 | 18 | mixed &operator=(std::string str) { 19 | storage = str; 20 | return *this; 21 | } 22 | 23 | mixed &operator=(float num) { 24 | storage = num; 25 | return *this; 26 | } 27 | 28 | mixed &operator=(int num) { 29 | storage = num; 30 | return *this; 31 | } 32 | 33 | mixed &operator=(const mixed &other) = default; 34 | 35 | friend mixed operator*(int lhs, const mixed &rhs); 36 | 37 | friend mixed operator+(const mixed &lhs, const mixed &rhs); 38 | 39 | friend mixed operator-(const mixed &lhs, const mixed &rhs); 40 | }; 41 | 42 | bool is_integer(const std::string &str) { 43 | size_t pos = 0; 44 | try { 45 | std::stoi(str, &pos); 46 | return pos == str.size(); 47 | } catch (std::invalid_argument &e) { 48 | return false; 49 | } 50 | } 51 | 52 | bool is_double(const std::string &str) { 53 | size_t pos = 0; 54 | try { 55 | std::stod(str, &pos); 56 | return pos == str.size(); 57 | } catch (std::invalid_argument &e) { 58 | return false; 59 | } 60 | } 61 | 62 | mixed operator+(const std::string &str, int num) { 63 | return str + std::to_string(num); 64 | } 65 | 66 | mixed operator+(int num, const std::string &str) { 67 | return std::to_string(num) + str; 68 | } 69 | 70 | mixed operator+(const std::string &str, double num) { 71 | return str + std::to_string(num); 72 | } 73 | 74 | mixed operator+(double num, const std::string &str) { 75 | return std::to_string(num) + str; 76 | } 77 | 78 | mixed operator-(const std::string &str, int num) { 79 | if (is_integer(str)) { 80 | return std::stoi(str) - num; 81 | } else if (is_double(str)) { 82 | return std::stod(str) - num; 83 | } else { 84 | return -num; 85 | } 86 | } 87 | 88 | mixed operator-(const std::string &str, double num) { 89 | if (is_integer(str)) { 90 | return std::stoi(str) - num; 91 | } else if (is_double(str)) { 92 | return std::stod(str) - num; 93 | } else { 94 | return -num; 95 | } 96 | } 97 | 98 | mixed operator-(int num, const std::string &str) { 99 | return -1 * (str - num); 100 | } 101 | 102 | mixed operator-(double num, const std::string &str) { 103 | return -1 * (str - num); 104 | } 105 | 106 | std::string operator-(const std::string &lhs, const std::string &rhs) { 107 | return ""; 108 | } 109 | 110 | std::string operator*(int lhs, const std::string &rhs) { 111 | return rhs; 112 | } 113 | 114 | mixed operator*(int lhs, const mixed &rhs) { 115 | return std::visit([&lhs](auto &&arg){ 116 | return mixed(lhs * arg); 117 | }, rhs.storage); 118 | } 119 | 120 | mixed operator+(const mixed &lhs, const mixed &rhs) { 121 | return std::visit([](auto && lhs, auto &&rhs) { 122 | return mixed(lhs + rhs); 123 | }, lhs.storage, rhs.storage); 124 | } 125 | 126 | mixed operator-(const mixed &lhs, const mixed &rhs) { 127 | return std::visit([](auto &&lhs, auto &&rhs) { 128 | return mixed(lhs - rhs); 129 | }, lhs.storage, rhs.storage); 130 | } 131 | 132 | void print(const mixed &var) { 133 | std::visit([](auto &&arg){ 134 | std::cout << arg << "\n"; 135 | }, var.storage); 136 | } 137 | 138 | template 139 | void print(T t) { 140 | std::cout << t << "\n"; 141 | } 142 | 143 | mixed input() { 144 | std::string str; 145 | std::cin >> str; 146 | if (is_integer(str)) { 147 | return std::stoi(str); 148 | } else if (is_double(str)) { 149 | return std::stod(str); 150 | } else { 151 | return str; 152 | } 153 | } 154 | 155 | mixed input(const std::string &output) { 156 | std::cout << output; 157 | return input(); 158 | } 159 | 160 | #define var mixed 161 | #define val const mixed 162 | -------------------------------------------------------------------------------- /Compiler/helpers/token.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | struct token_type { 9 | std::string name; 10 | std::regex regex; 11 | 12 | token_type(const std::string & _name, const std::string &_regex) 13 | : name(_name), regex(_regex) {} 14 | 15 | friend bool operator==(const token_type &lhs, const token_type &rhs) { 16 | return lhs.name == rhs.name; 17 | } 18 | 19 | friend bool operator!=(const token_type &lhs, const token_type &rhs) { 20 | return lhs.name != rhs.name; 21 | } 22 | 23 | friend std::ostream &operator<<(std::ostream &ostream, const token_type &rhs) { 24 | return ostream << rhs.name << " "; 25 | } 26 | }; 27 | 28 | struct token { 29 | std::string value; 30 | token_type type; 31 | uint32_t pos; 32 | 33 | token(std::string _value, token_type _type, uint32_t p) 34 | : value(std::move(_value)), type(std::move(_type)), pos(p) {} 35 | }; 36 | -------------------------------------------------------------------------------- /Compiler/src/compiler.cpp: -------------------------------------------------------------------------------- 1 | #include "compiler.h" 2 | 3 | #include 4 | 5 | compiler::compiler(const std::map &token_type_list, 6 | const std::set &special_symbols, const std::map &functions_arity, 7 | const std::map & function_value) 8 | : _lxr(token_type_list, special_symbols), _prs(token_type_list, functions_arity, function_value) {} 9 | 10 | void compiler::compile(const std::string &input, const std::string &output, const std::string &cpp_compiler) { 11 | _lxr.open(input); 12 | std::thread lexer_worker(&compiler::lexer_thread, this); 13 | std::shared_ptr root = std::make_shared(); 14 | std::thread parser_worker(&compiler::parser_thread, this, std::ref(root)); 15 | lexer_worker.join(); 16 | parser_worker.join(); 17 | check_exception(); 18 | translator::generate_cpp_code(root, "#include \"" + std::filesystem::current_path().parent_path().string() + 19 | path_separator + "Compiler" + path_separator + "helpers" + path_separator + "mixed.h\""); 20 | run(output, cpp_compiler); 21 | } 22 | 23 | void compiler::run(const std::string &output, const std::string &cpp_compiler) { 24 | #ifdef _WIN32 25 | STARTUPINFO si; 26 | PROCESS_INFORMATION pi; 27 | ZeroMemory(&si, sizeof(si)); 28 | si.cb = sizeof(si); 29 | ZeroMemory(&pi, sizeof(pi)); 30 | std::string cmdArgs = cpp_compiler + std::string(" temporary_cpp_code.cpp -std=c++17 -o ") + output; 31 | if (!CreateProcess(cpp_compiler.c_str(), cmdArgs.data(), 0, 0, 0, 0, 0, 0, &si, &pi)) { 32 | DeleteFile("temporary_cpp_code.cpp"); 33 | throw compile_exception("Failed to start c++ compiler"); 34 | } 35 | WaitForSingleObject(pi.hProcess, INFINITE); 36 | CloseHandle(pi.hProcess); 37 | CloseHandle(pi.hThread); 38 | if (!DeleteFile("temporary_cpp_code.cpp")) { 39 | std::cout << "Failed to delete temporary file\n"; 40 | } 41 | #elif __unix__ 42 | pid_t pid; 43 | int status; 44 | fflush(NULL); 45 | pid = fork(); 46 | switch (pid) { 47 | case -1: 48 | throw compile_exception("Failed to start c++ compiler"); 49 | break; 50 | case 0: 51 | execl(cpp_compiler.c_str(), cpp_compiler.c_str(), "temporary_cpp_code.cpp", "-o", output.c_str(), "-std=c++17", NULL); 52 | break; 53 | default: 54 | if (waitpid(pid, &status, 0) != -1) { 55 | if (status != 0) { 56 | std::cout << "C++ exited with status " << status << "\n"; 57 | } 58 | } 59 | if (std::remove("temporary_cpp_code.cpp")) { 60 | std::cout << "Failed to delete temporary file\n"; 61 | } 62 | break; 63 | } 64 | #else 65 | throw compile_exception("Your platform is unsupported. Now available Unix and Windows"); 66 | #endif 67 | } 68 | 69 | void compiler::lexer_thread() { 70 | try { 71 | auto tokens = _lxr.next_command(); 72 | while (!tokens.empty()) { 73 | _pipe.push(tokens); 74 | tokens = _lxr.next_command(); 75 | } 76 | _pipe.push({}); 77 | } catch (compile_exception &e) { 78 | _errors.push(std::current_exception()); 79 | _pipe.push({}); 80 | } 81 | } 82 | 83 | void compiler::parser_thread(std::shared_ptr &root) { 84 | try { 85 | auto tokens = _pipe.pop(); 86 | while (!tokens.empty()) { 87 | root->add_node(_prs.parse_statement(tokens)); 88 | tokens = _pipe.pop(); 89 | } 90 | } catch (compile_exception &e) { 91 | _errors.push(std::current_exception()); 92 | } 93 | } 94 | 95 | void compiler::check_exception() { 96 | if (!_errors.queue.empty()) { 97 | auto e = _errors.queue.front(); 98 | _errors.queue.pop(); 99 | std::rethrow_exception(e); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /Compiler/src/compiler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #ifdef _WIN32 7 | #include 8 | #elif __unix__ 9 | #include 10 | #include 11 | #include 12 | #endif 13 | 14 | 15 | #include "../helpers/command_pipe.h" 16 | #include "../helpers/lib.h" 17 | #include "parser.h" 18 | #include "translator.h" 19 | 20 | class compiler { 21 | lexer _lxr; 22 | parser _prs; 23 | command_pipe> _pipe; 24 | command_pipe _errors; 25 | 26 | static void run(const std::string &output, const std::string &cpp_compiler); 27 | 28 | void lexer_thread(); 29 | 30 | void parser_thread(std::shared_ptr &root); 31 | 32 | void check_exception(); 33 | 34 | public: 35 | compiler(const std::map &token_type_list,const std::set &special_symbols, 36 | const std::map &functions_arity, const std::map &function_value); 37 | 38 | void compile(const std::string &input, const std::string &output, const std::string &cpp_compiler); 39 | }; 40 | -------------------------------------------------------------------------------- /Compiler/src/lexer.cpp: -------------------------------------------------------------------------------- 1 | #include "lexer.h" 2 | 3 | lexer::lexer(std::map tokens, std::set symbols) 4 | : _token_type_list(std::move(tokens)), _special_symbols(std::move(symbols)) {} 5 | 6 | void lexer::open(const std::string &file_name) { 7 | _source.open(file_name); 8 | } 9 | 10 | std::vector lexer::next_command() { 11 | std::vector tokens; 12 | for (auto item = next_token(); item.has_value(); item = next_token()) { 13 | tokens.push_back(item.value()); 14 | if (item->type == _token_type_list.at("semicolon")) { 15 | break; 16 | } 17 | } 18 | return tokens; 19 | } 20 | 21 | void lexer::skip_white_space() { 22 | while (_source.has_next()) { 23 | char ch = _source.next(); 24 | if (ch != ' ' && ch != '\n' && ch != '\t') { 25 | _source.back(); 26 | break; 27 | } 28 | } 29 | } 30 | 31 | void lexer::skip_comment() { 32 | if (_source.next() != '/') { 33 | _source.back(); 34 | return; 35 | } 36 | char ch = _source.next(); 37 | if (ch == '/') { 38 | go_to_enter(); 39 | } else { 40 | size_t pos = _source.lines_pos(); 41 | if (ch == '\n') { 42 | pos--; 43 | } 44 | throw compile_exception("\n" + std::to_string(pos) + " | One slash instead of two expected\n"); 45 | } 46 | } 47 | 48 | void lexer::go_to_enter() { 49 | while (_source.has_next() && _source.next() != '\n') {} 50 | } 51 | 52 | std::optional lexer::next_token() { 53 | skip_white_space(); 54 | skip_comment(); 55 | skip_white_space(); 56 | if (!_source.has_next()) { 57 | return std::nullopt; 58 | } 59 | char ch = _source.next(); 60 | bool is_string = ch == '\"'; 61 | std::string current; 62 | if (_special_symbols.find(ch) == _special_symbols.end()) { 63 | while (_source.has_next()) { 64 | current += ch; 65 | ch = _source.next(); 66 | if (!is_string && _special_symbols.find(ch) != _special_symbols.end()) { 67 | _source.back(); 68 | break; 69 | } 70 | if (is_string && ch == '\"') { 71 | current += ch; 72 | break; 73 | } 74 | } 75 | } else { 76 | if (ch != ' ' && ch != '\n' && ch != '\t') { 77 | current += ch; 78 | } 79 | } 80 | auto it = std::find_if(_token_type_list.begin(), _token_type_list.end(), [¤t](const auto &type) { 81 | return std::regex_match(current, type.second.regex); 82 | }); 83 | if (it == _token_type_list.end()) { 84 | throw compile_exception("\n" + std::to_string(_source.lines_pos()) + " | Invalid language token '" + current + "'\n"); 85 | } 86 | return {token(current, it->second, _source.char_pos() - current.size() + 1)}; 87 | } 88 | -------------------------------------------------------------------------------- /Compiler/src/lexer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../helpers/char_source.h" 9 | #include "../helpers/compile_exception.h" 10 | #include "../helpers/token.h" 11 | 12 | class lexer { 13 | void skip_white_space(); 14 | 15 | void skip_comment(); 16 | 17 | void go_to_enter(); 18 | 19 | std::optional next_token(); 20 | 21 | std::map _token_type_list; 22 | std::set _special_symbols; 23 | char_source _source; 24 | 25 | public: 26 | lexer(std::map tokens, std::set symbols); 27 | 28 | void open(const std::string &file_name); 29 | 30 | std::vector next_command(); 31 | }; 32 | -------------------------------------------------------------------------------- /Compiler/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "compiler.h" 6 | 7 | int main(int argc, char *argv[]) { 8 | std::string cpp_compiler; 9 | std::string output_file; 10 | if (argc < 2) { 11 | std::cerr << "no input file\n" 12 | "use --help for help manual\n"; 13 | return 0; 14 | } else if (std::strcmp(argv[1], "--help") == 0) { 15 | std::cout << "help manual\n" 16 | "Usage: " + std::string(argv[0]) + " file [options]...\n" 17 | "Options:\n" 18 | "\t--help Display this information.\n" 19 | "\t-cmp= Use your . Compiler must support version from c++17. Default value is stored in details/cpp_compiler_path\n" 20 | "\t-o Place the output into \n"; 21 | return 0; 22 | } else { 23 | for (int i = 2; i < argc; ++i) { 24 | if (argv[i][0] != '-') { 25 | std::cerr << std::string("unexpected name ") + argv[i]; 26 | return 0; 27 | } else if (strlen(argv[i]) > 5 && argv[i][1] == 'c' && argv[i][2] == 'm' && argv[i][3] == 'p' && argv[i][4] == '=') { 28 | cpp_compiler = std::string((argv[i] + 5)); 29 | } else if (strlen(argv[i]) > 1 && argv[i][1] == 'o') { 30 | output_file = std::string(argv[++i]); 31 | } else { 32 | std::cerr << std::string("unexpected option ") + argv[i]; 33 | return 0; 34 | } 35 | } 36 | } 37 | if (cpp_compiler.empty()) { 38 | std::string app = argv[0]; 39 | std::ifstream ifstream(app.substr(0, app.find_last_of(path_separator)) + path_separator + "details" + path_separator + "cpp_compiler_path"); 40 | std::getline(ifstream, cpp_compiler); 41 | } 42 | if (output_file.empty()) { 43 | output_file = "output"; 44 | } 45 | try { 46 | std::map token_type_list = {{"number", token_type("number", "[+-]?([0-9]*[.]?[0-9]+|[0-9]+[.]?[0-9]*)")}, 47 | {"string", token_type("string", R"("[^"]*")")}, 48 | {"variable", token_type("variable", "^[a-zA-Z_$][a-zA-Z_$0-9]*$")}, 49 | {"function", token_type("function", "print|input")}, 50 | {"assign", token_type("assign", "[=]")}, 51 | {"operator", token_type("operator", "[+-]")}, 52 | {"lbracket", token_type("lbracket", "[(]")}, 53 | {"rbracket", token_type("rbracket", "[)]")}, 54 | {"modifier", token_type("modifier", "var|val")}, 55 | {"semicolon", token_type("semicolon", "[;]")}}; 56 | std::set special_symbols = {'=', '+', '-', ';', '(', ')', ' ', '\n', '\t'}; 57 | std::map functions_arity = {{"print", 1}, {"input", 0}}; 58 | std::map function_value = {{"print", 0}, {"input", 1}}; 59 | compiler cmp(token_type_list, special_symbols, functions_arity, function_value); 60 | cmp.compile(argv[1], output_file, cpp_compiler); 61 | } catch (compile_exception &e) { 62 | std::cerr << e.what(); 63 | } 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /Compiler/src/parser.cpp: -------------------------------------------------------------------------------- 1 | #include "parser.h" 2 | 3 | parser::parser(std::map types, std::map arity, std::map function_value) 4 | : _token_type_list(std::move(types)), _operators_arity(std::move(arity)), _function_value(std::move(function_value)) {} 5 | 6 | std::shared_ptr parser::parse_statement(std::vector tokens) { 7 | _src.open(std::move(tokens)); 8 | std::shared_ptr command = parse_expression(); 9 | require({_token_type_list.at("semicolon")}); 10 | return command; 11 | } 12 | 13 | std::shared_ptr parser::parse_expression() { 14 | std::vector expected = {_token_type_list.at("modifier"), _token_type_list.at("variable"), _token_type_list.at("function")}; 15 | auto current = require(expected); 16 | if (current.type.name == "modifier") { 17 | token variable = require({_token_type_list.at("variable")}); 18 | if (_defined_variables.find(variable.value) != _defined_variables.end()) { 19 | throw compile_exception("Multiply declaration of variable \"" + variable.value + "\""); 20 | } 21 | _defined_variables.insert({variable.value, current.value == "val"}); 22 | token assignment = require({_token_type_list.at("assign")}); 23 | return std::shared_ptr(new variable_declaration(current, std::make_shared(variable), parse_formula())); 24 | } else if (current.type.name == "variable") { 25 | if (_defined_variables.find(current.value) != _defined_variables.end() && 26 | !_defined_variables.at(current.value)) { 27 | _src.dec(); 28 | return parse_var_assign(parse_factor()); 29 | } else if (!_defined_variables.at(current.value)) { 30 | throw compile_exception("You cannot modify variable \"" + current.value + "\" because its was declared as const"); 31 | } else { 32 | throw compile_exception("You cannot use variable \"" + current.value + "\" before its declaration"); 33 | } 34 | } else { 35 | _src.dec(); 36 | auto function_node = parse_function(); 37 | return function_node; 38 | } 39 | } 40 | 41 | std::shared_ptr parser::parse_var_assign(const std::shared_ptr &variable_node) { 42 | auto var_name = dynamic_cast(variable_node.get())->variable.value; 43 | token assignment = require({_token_type_list.at("assign")}); 44 | auto right_formula = parse_formula(); 45 | return std::shared_ptr(new binary_operation_node(assignment, variable_node, right_formula)); 46 | } 47 | 48 | std::shared_ptr parser::parse_function() { 49 | auto current = require({_token_type_list.at("function")}); 50 | return std::shared_ptr(new function_call(current, parse_function_args(current))); 51 | } 52 | 53 | std::vector> parser::parse_function_args(const token & function) { 54 | require({_token_type_list.at("lbracket")}); 55 | std::vector> args; 56 | while (!match({_token_type_list.at("rbracket")})) { 57 | args.push_back(parse_formula()); 58 | } 59 | if (_operators_arity.at(function.value) > args.size()) { 60 | throw compile_exception("Wrong count of arguments for function '" + function.value + "'"); 61 | } 62 | return args; 63 | } 64 | 65 | std::shared_ptr parser::parse_brackets() { 66 | if (match({_token_type_list.at("lbracket")})) { 67 | std::shared_ptr expr = parse_formula(); 68 | require({_token_type_list.at("rbracket")}); 69 | return expr; 70 | } else if (match({_token_type_list.at("function")})) { 71 | _src.dec(); 72 | auto func = parse_function(); 73 | std::string func_name = dynamic_cast(func.get())->function.value; 74 | if (!_function_value.at(func_name)) { 75 | throw compile_exception(func_name + " hasn't return type"); 76 | } 77 | return func; 78 | } else { 79 | return parse_factor(); 80 | } 81 | } 82 | 83 | std::shared_ptr parser::parse_formula() { 84 | auto left = parse_brackets(); 85 | auto op = match({_token_type_list.at("operator")}); 86 | while (op) { 87 | auto right = parse_brackets(); 88 | left = std::shared_ptr(new binary_operation_node(op.value(), left, right)); 89 | op = match({_token_type_list.at("operator")}); 90 | } 91 | return left; 92 | } 93 | 94 | std::shared_ptr parser::parse_factor() { 95 | token current = _src.next(); 96 | if (current.type.name == "number") { 97 | return std::shared_ptr(new number_node(current)); 98 | } else if (current.type.name == "variable") { 99 | if (_defined_variables.find(current.value) == _defined_variables.end()) { 100 | throw compile_exception("You cannot use variable \"" + current.value + "\" before its declaration"); 101 | } 102 | return std::shared_ptr(new variable_node(current)); 103 | } else if (current.type.name == "string") { 104 | return std::shared_ptr(new string_node(current)); 105 | } else { 106 | generate_exception({_token_type_list.at("number"), _token_type_list.at("string"), _token_type_list.at("variable")}); 107 | return {}; 108 | } 109 | } 110 | 111 | std::optional parser::match(const std::vector &expected) { 112 | if (_src.has_next()) { 113 | token current = _src.next(); 114 | if (std::find(expected.begin(), expected.end(), current.type) != expected.end()) { 115 | return {current}; 116 | } else { 117 | _src.dec(); 118 | } 119 | } 120 | return {}; 121 | } 122 | 123 | token parser::require(const std::vector &expected) { 124 | auto current = match(expected); 125 | if (!current) { 126 | generate_exception(expected); 127 | } 128 | return current.value(); 129 | } 130 | 131 | void parser::generate_exception(const std::vector &expected) { 132 | std::string error_command; 133 | uint32_t token_pos = _src.pos(); 134 | while (_src.has_prev()) { 135 | _src.dec(); 136 | } 137 | uint32_t char_pos = 1; 138 | for (uint32_t i = 0; i < token_pos; ++i) { 139 | auto t = _src.next(); 140 | error_command += std::string(t.pos - char_pos, ' ') + t.value; 141 | char_pos = t.pos + t.value.size(); 142 | } 143 | std::ostringstream oss; 144 | std::copy(expected.begin(), expected.end(), std::ostream_iterator(oss, "or ")); 145 | throw compile_exception("In command\n" + error_command + " expected " + oss.str().substr(0, oss.str().size() - 3)); 146 | } 147 | -------------------------------------------------------------------------------- /Compiler/src/parser.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "../AST/ast_nodes.h" 6 | #include "lexer.h" 7 | 8 | class parser { 9 | class token_buffer { 10 | std::vector _buffer; 11 | uint32_t _pos; 12 | 13 | public: 14 | token_buffer() = default; 15 | 16 | token_buffer(std::vector buf) : _buffer(std::move(buf)), _pos(0) {} 17 | 18 | void open(std::vector buf) { 19 | _buffer = std::move(buf); 20 | _pos = 0; 21 | } 22 | 23 | bool has_next() const { 24 | return _pos < _buffer.size(); 25 | } 26 | 27 | bool has_prev() const { 28 | return _pos > 0; 29 | } 30 | 31 | token next() { 32 | return _buffer[_pos++]; 33 | } 34 | 35 | token look_back() { 36 | return _buffer[_pos - 1]; 37 | } 38 | 39 | void dec() { 40 | _pos--; 41 | } 42 | 43 | uint32_t pos() { 44 | return _pos; 45 | } 46 | }; 47 | 48 | std::map _token_type_list; 49 | std::map _operators_arity; 50 | std::map _function_value; 51 | std::map _defined_variables; 52 | token_buffer _src; 53 | 54 | std::optional match(const std::vector &expected); 55 | 56 | token require(const std::vector &expected); 57 | 58 | void generate_exception(const std::vector &expected); 59 | 60 | std::shared_ptr parse_expression(); 61 | 62 | std::shared_ptr parse_var_assign(const std::shared_ptr &variable_node); 63 | 64 | std::shared_ptr parse_function(); 65 | 66 | std::vector> parse_function_args(const token & function); 67 | 68 | std::shared_ptr parse_brackets(); 69 | 70 | std::shared_ptr parse_formula(); 71 | 72 | std::shared_ptr parse_factor(); 73 | 74 | public: 75 | parser(std::map types, std::map arity, std::map function_value); 76 | 77 | std::shared_ptr parse_statement(std::vector tokens); 78 | }; 79 | -------------------------------------------------------------------------------- /Compiler/src/translator.cpp: -------------------------------------------------------------------------------- 1 | #include "translator.h" 2 | 3 | 4 | std::string translator::command_translate(expression_node *node) { 5 | if (auto *n = dynamic_cast(node)) { 6 | return n->number.value; 7 | } else if (auto *v = dynamic_cast(node)) { 8 | return v->variable.value; 9 | } else if (auto *s = dynamic_cast(node)) { 10 | return "std::string(" + s->string.value + ")"; 11 | } else if (auto *b = dynamic_cast(node)) { 12 | return command_translate(b->left.get()) + " " + b->op.value + " " + command_translate(b->right.get()); 13 | } else if (auto *f = dynamic_cast(node)) { 14 | std::string command = f->function.value + "("; 15 | for (const auto &arg : f->args) { 16 | command += command_translate(arg.get()); 17 | } 18 | command += ")"; 19 | return command; 20 | } else if (auto *d = dynamic_cast(node)) { 21 | return d->modifer.value + " " + d->variable->variable.value + " = " + command_translate(d->value.get()) + ";"; 22 | } 23 | throw compile_exception("Undefined pointer"); 24 | } 25 | 26 | void translator::write_header(std::ofstream &ofstream, const std::string &include) { 27 | ofstream << include << "\n"; 28 | ofstream << "int main() {\n"; 29 | } 30 | 31 | void translator::write_source(std::ofstream &ofstream, const std::shared_ptr &root) { 32 | auto code = dynamic_cast(root.get()); 33 | for (const auto &command : code->expressions) { 34 | auto cmd = '\t' + command_translate(command.get()) + ";\n"; 35 | ofstream << cmd; 36 | } 37 | } 38 | 39 | void translator::write_tail(std::ofstream &ofstream) { 40 | ofstream << "\n\treturn 0;\n}"; 41 | } 42 | 43 | void translator::generate_cpp_code(const std::shared_ptr &root, const std::string &include) { 44 | std::ofstream temp_file("temporary_cpp_code.cpp"); 45 | if (!temp_file.is_open()) { 46 | throw compile_exception("Cannot create temporary file"); 47 | } 48 | write_header(temp_file, include); 49 | write_source(temp_file, root); 50 | write_tail(temp_file); 51 | } 52 | -------------------------------------------------------------------------------- /Compiler/src/translator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "../AST/ast_nodes.h" 6 | #include "../helpers/compile_exception.h" 7 | #include "../helpers/lib.h" 8 | 9 | class translator { 10 | static std::string command_translate(expression_node *node); 11 | 12 | static void write_header(std::ofstream &ofstream, const std::string &include); 13 | 14 | static void write_source(std::ofstream &ofstream, const std::shared_ptr &root); 15 | 16 | static void write_tail(std::ofstream &ofstream); 17 | 18 | public: 19 | static void generate_cpp_code(const std::shared_ptr &root, const std::string &include); 20 | }; 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Compiler Cpm 2 | 3 | description 4 | ----------- 5 | 6 | Let me introduce you new language called C+-. It consists of variables, constants, literals (string and numeric), print output function and input function. 7 | The language uses implicit dynamic typing: variables can store values of different types. There are 3 main types: 8 | * Int (8 bytes) 9 | * String 10 | * Float (8 bytes) 11 | 12 | The language also supports arithmetic operations "+" and "-" and assignment operator "=". All available types can stand on either side of arithmetic operations. In this case, the conversion rules are as follows (the order of the arguments is not important): 13 | * String + Int/Float --> number is converted to string 14 | * String - Int/Float --> The string is converted to an integer or fractional number, and if the string is not numeric, then its value is 0 15 | * Float (+, -) Int --> Integer converted to fractional 16 | 17 | Also the language has support for comments. A comment is a line starting with "//" and ending with a newline 18 | 19 | Code example 20 | ``` 21 | var str = "Hello"; // variable is set via var 22 | str = str + "!"; // variable can be changed 23 | print(str); // the print function takes one parameter 24 | ``` 25 | ``` 26 | str = 21 + 2 + 18; // can be written to the same variable number 27 | print(str); // -> 41 28 | ``` 29 | ``` 30 | val worldString = "World"; // the constant is set via val 31 | val answerNumber = 42; 32 | ``` 33 | ``` 34 | print(worldString + answerNumber); // -> World42 35 | print(worldString - answerNumber); // -> -42 36 | 37 | val pi = 3.14; 38 | print(pi + answerNumber); // 45.14 39 | ``` 40 | ``` 41 | val numberString = "134"; 42 | print(numberString - answerNumber); // -> 92 43 | print(numberString + answerNumber); // -> 13442 44 | ``` 45 | ``` 46 | val name = input("Name: "); // outputs "Name: " and waits for input 47 | print("Your name is " + name); 48 | ``` 49 | Implementation 50 | -------------- 51 | The work of the compiler is divided into 4 stages: 52 | * *lexer* is responsible for splitting the text into correct language tokens. 53 | * *Parser* builds AST based on generated tokens from lexer and is responsible for its correctness. 54 | * *Translator* is responsible for converting AST to C++ code. 55 | * *Compiler* runs lexer, parser and translator. 56 | 57 | *Lexer* uses class ***CharSource*** as a wrapper over the input file. It works in ***parallel*** with *Parser*. *Lexer* put the received tokens of one command into a ***CommandPipe***. *Parser* takes ready-made tokens from there and adds in *AST*. The finished *AST* is converted into c++ code using a *Translator*. In order for the translated code to compile correctly, the ***mixed*** class is added to it. The description of which in the string type lies in the ***LIB*** constant. In case of an error in the code, the ***CompileException*** error is thrown. 58 | 59 | Approximate scheme of work 60 | ```mermaid 61 | flowchart LR 62 | direction TB 63 | subgraph LexerWorker 64 | direction LR 65 | CharSource-->Lexer 66 | end 67 | subgraph ParserWorker 68 | Parser 69 | end 70 | InputCode-->LexerWorker 71 | LexerWorker-->CommandPipe-->ParserWorker 72 | ParserWorker-->CommandPipe 73 | CommandPipe-->LexerWorker 74 | ParserWorker-->Translator 75 | Translator-->TranslatedCppCode 76 | CppCompiler-->OutputBinary 77 | TranslatedCppCode-->OutputBinary 78 | ``` 79 | 80 | launch 81 | ------ 82 | Installation instructions: 83 | ``` 84 | git clone https://github.com/Ramzeeset/Compiler-Cpm 85 | cd Compiler-Cpm/ 86 | ``` 87 | For ***Windows*** 88 | ``` 89 | initializer.ps1 90 | ``` 91 | For ***Linux*** 92 | ``` 93 | bash initializer.sh 94 | ``` 95 | 96 | After that in the folder *cpm/* there will be an executable. The *initializer* script will try to find popular compilers on your device and ask for confirmation of use or ask you to specify the path to the compiler
97 | 98 | If you have problems with powershell, then take a look [how to run powershell script](https://www.howto-outlook.com/howto/powershell-scripts-faq-tips-and-tricks.htm) 99 | 100 | Usage 101 | ----- 102 | cpm used similarly to gcc and others: *cpm.exe file [options]...* 103 | ``` 104 | help manual 105 | Usage: cpm.exe file [options]... 106 | Options: 107 | --help Display this information. 108 | -cmp= Use your . Compiler must support version from c++17. Default value is stored in details/cpp_compiler_path 109 | -o Place the output into 110 | ``` 111 | 112 | -------------------------------------------------------------------------------- /initializer.ps1: -------------------------------------------------------------------------------- 1 | $cpp_path = "" 2 | 3 | $oldPreference = $ErrorActionPreference 4 | $ErrorActionPreference = 'stop' 5 | try{if(Get-Command g++){$cpp_path = (Get-Command g++).Path}} 6 | Catch {} 7 | Finally {$ErrorActionPreference=$oldPreference} 8 | 9 | $oldPreference = $ErrorActionPreference 10 | $ErrorActionPreference = 'stop' 11 | try{if(Get-Command clang++){$cpp_path = (Get-Command clang++).Path}} 12 | Catch {} 13 | Finally {$ErrorActionPreference=$oldPreference} 14 | 15 | if ( $cpp_path -eq "" ) { 16 | $cpp_path= Read-Host -Prompt "Compiler could not be found. Please specify the path : " 17 | } else { 18 | $confirmation = Read-Host "Compiler that was found $cpp_path. Use it? [y/n]" 19 | if ($confirmation -ne 'y') { 20 | $cpp_path = Read-Host "Please specify the path : " 21 | } 22 | } 23 | 24 | if (Test-Path -Path "cpm") { 25 | Write-Output "./cpm already exist. Compilation..." 26 | } else { 27 | mkdir cpm > $null 28 | } 29 | 30 | & $cpp_path ./Compiler/src/main.cpp ./Compiler/AST/ast_nodes.cpp ./Compiler/helpers/char_source.cpp ./Compiler/src/lexer.cpp ./Compiler/src/parser.cpp ./Compiler/src/translator.cpp ./Compiler/src/compiler.cpp -lpthread -std=c++17 -o cpm/cpm > $null 31 | 32 | if (Test-Path -Path "cpm/details") { 33 | } else { 34 | mkdir cpm/details > $null 35 | } 36 | 37 | Set-Content -Path cpm/details/cpp_compiler_path -Value $cpp_path -------------------------------------------------------------------------------- /initializer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cmp_path="" 4 | 5 | flag=$(type -p "gcc") 6 | 7 | if [ "$flag" != "" ]; 8 | then 9 | cmp_path=$flag; 10 | fi 11 | 12 | flag=$(type -p "g++") 13 | 14 | if [ "$flag" != "" ]; 15 | then 16 | cmp_path=$flag; 17 | fi 18 | 19 | flag=$(type -p "clang++") 20 | 21 | if [ "$flag" != "" ]; 22 | then 23 | cmp_path=$flag; 24 | fi 25 | 26 | cmp="" 27 | 28 | if [ "$cmp_path" == "" ]; 29 | then 30 | read -p "Cannot find cpp compiler. Please specify the path to the executable compiler: " cmp 31 | else 32 | confirm="" 33 | cmp=$cmp_path 34 | read -p "Compiler that was found '${cmp}' Use it? [y/n]: " confirm && [[ $confirm == [yY] || $confirm == [yY][eE][sS] ]] 35 | if [[ $confirm != [yY] && $confirm != [yY][eE][sS] ]]; 36 | then 37 | read -p "Specify the compiler's path: " cmp 38 | 39 | fi 40 | fi 41 | 42 | 43 | if [ -d ./cpm ]; 44 | 45 | then 46 | echo './cpm already exist. Compilation...' 47 | else 48 | mkdir cpm 49 | fi 50 | 51 | main_comp="${cmp} ./Compiler/src/main.cpp ./Compiler/AST/ast_nodes.cpp ./Compiler/helpers/char_source.cpp ./Compiler/src/lexer.cpp ./Compiler/src/parser.cpp ./Compiler/src/translator.cpp ./Compiler/src/compiler.cpp -lpthread -std=c++17 -o cpm/cpm" 52 | 53 | $main_comp 54 | 55 | if [ ! -d ./cpm/details ]; 56 | then 57 | mkdir cpm/details 58 | fi 59 | 60 | echo "${cmp}" > cpm/details/cpp_compiler_path 61 | 62 | --------------------------------------------------------------------------------