├── .gitattributes ├── .gitignore ├── CMakeLists.txt ├── CMakeSettings.json ├── include ├── compiler │ ├── CodeGenerator.h │ ├── SourceFile.h │ └── SourceParser.h └── runtime │ └── VirtualMachine.h ├── src ├── compiler │ ├── CodeGenerator.cpp │ ├── SourceFile.cpp │ ├── SourceParser.cpp │ ├── SymbolTable.cpp │ └── TreeNode.cpp ├── cvm.cpp └── runtime │ ├── ExecutableImage.cpp │ └── VirtualMachine.cpp └── test ├── combinatorics.cvm ├── factorial.cvm ├── primenumber.cvm └── scope.cvm /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Oo]ut/ 33 | [Ll]og/ 34 | [Ll]ogs/ 35 | 36 | # Visual Studio 2015/2017 cache/options directory 37 | .vs/ 38 | # Uncomment if you have tasks that create the project's static files in wwwroot 39 | #wwwroot/ 40 | 41 | # Visual Studio 2017 auto generated files 42 | Generated\ Files/ 43 | 44 | # MSTest test Results 45 | [Tt]est[Rr]esult*/ 46 | [Bb]uild[Ll]og.* 47 | 48 | # NUnit 49 | *.VisualState.xml 50 | TestResult.xml 51 | nunit-*.xml 52 | 53 | # Build Results of an ATL Project 54 | [Dd]ebugPS/ 55 | [Rr]eleasePS/ 56 | dlldata.c 57 | 58 | # Benchmark Results 59 | BenchmarkDotNet.Artifacts/ 60 | 61 | # .NET Core 62 | project.lock.json 63 | project.fragment.lock.json 64 | artifacts/ 65 | 66 | # ASP.NET Scaffolding 67 | ScaffoldingReadMe.txt 68 | 69 | # StyleCop 70 | StyleCopReport.xml 71 | 72 | # Files built by Visual Studio 73 | *_i.c 74 | *_p.c 75 | *_h.h 76 | *.ilk 77 | *.meta 78 | *.obj 79 | *.iobj 80 | *.pch 81 | *.pdb 82 | *.ipdb 83 | *.pgc 84 | *.pgd 85 | *.rsp 86 | *.sbr 87 | *.tlb 88 | *.tli 89 | *.tlh 90 | *.tmp 91 | *.tmp_proj 92 | *_wpftmp.csproj 93 | *.log 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio LightSwitch build output 298 | **/*.HTMLClient/GeneratedArtifacts 299 | **/*.DesktopClient/GeneratedArtifacts 300 | **/*.DesktopClient/ModelManifest.xml 301 | **/*.Server/GeneratedArtifacts 302 | **/*.Server/ModelManifest.xml 303 | _Pvt_Extensions 304 | 305 | # Paket dependency manager 306 | .paket/paket.exe 307 | paket-files/ 308 | 309 | # FAKE - F# Make 310 | .fake/ 311 | 312 | # CodeRush personal settings 313 | .cr/personal 314 | 315 | # Python Tools for Visual Studio (PTVS) 316 | __pycache__/ 317 | *.pyc 318 | 319 | # Cake - Uncomment if you are using it 320 | # tools/** 321 | # !tools/packages.config 322 | 323 | # Tabs Studio 324 | *.tss 325 | 326 | # Telerik's JustMock configuration file 327 | *.jmconfig 328 | 329 | # BizTalk build output 330 | *.btp.cs 331 | *.btm.cs 332 | *.odx.cs 333 | *.xsd.cs 334 | 335 | # OpenCover UI analysis results 336 | OpenCover/ 337 | 338 | # Azure Stream Analytics local run output 339 | ASALocalRun/ 340 | 341 | # MSBuild Binary and Structured Log 342 | *.binlog 343 | 344 | # NVidia Nsight GPU debugger configuration file 345 | *.nvuser 346 | 347 | # MFractors (Xamarin productivity tool) working folder 348 | .mfractor/ 349 | 350 | # Local History for Visual Studio 351 | .localhistory/ 352 | 353 | # BeatPulse healthcheck temp database 354 | healthchecksdb 355 | 356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 357 | MigrationBackup/ 358 | 359 | # Ionide (cross platform F# VS Code tools) working folder 360 | .ionide/ 361 | 362 | # Fody - auto-generated XML schema 363 | FodyWeavers.xsd 364 | /build/ 365 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeList.txt: проект CMake для cvm; включите исходный код и определения, 2 | # укажите здесь логику для конкретного проекта. 3 | # 4 | cmake_minimum_required (VERSION 3.8) 5 | 6 | project ("cvm") 7 | 8 | include_directories("include" "include/runtime" "include/compiler") 9 | 10 | # Добавьте источник в исполняемый файл этого проекта. 11 | add_executable (cvm 12 | "include/runtime/VirtualMachine.h" 13 | "include/compiler/CodeGenerator.h" 14 | "include/compiler/SourceParser.h" 15 | "include/compiler/SourceFile.h" 16 | 17 | "src/cvm.cpp" 18 | "src/runtime/VirtualMachine.cpp" 19 | "src/runtime/ExecutableImage.cpp" 20 | "src/compiler/SourceParser.cpp" 21 | "src/compiler/SourceFile.cpp" 22 | "src/compiler/TreeNode.cpp" 23 | "src/compiler/SymbolTable.cpp" 24 | "src/compiler/CodeGenerator.cpp") 25 | 26 | # TODO: Добавьте тесты и целевые объекты, если это необходимо. 27 | target_compile_features(cvm PUBLIC cxx_std_17) -------------------------------------------------------------------------------- /CMakeSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "x64-Debug", 5 | "generator": "Ninja", 6 | "configurationType": "Debug", 7 | "inheritEnvironments": [ "gcc-arm" ], 8 | "buildRoot": "${projectDir}\\out\\build\\${name}", 9 | "installRoot": "${projectDir}\\out\\install\\${name}", 10 | "cmakeCommandArgs": "", 11 | "buildCommandArgs": "", 12 | "ctestCommandArgs": "" 13 | }, 14 | { 15 | "name": "x64-Release", 16 | "generator": "Ninja", 17 | "configurationType": "RelWithDebInfo", 18 | "buildRoot": "${projectDir}\\out\\build\\${name}", 19 | "installRoot": "${projectDir}\\out\\install\\${name}", 20 | "cmakeCommandArgs": "", 21 | "buildCommandArgs": "", 22 | "ctestCommandArgs": "", 23 | "inheritEnvironments": [ "msvc_x64_x64" ] 24 | } 25 | ] 26 | } -------------------------------------------------------------------------------- /include/compiler/CodeGenerator.h: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine Compiler code generator header 4 | * 5 | * (C) Bolat Basheyev 2021 6 | * 7 | ============================================================================*/ 8 | #pragma once 9 | 10 | #include "runtime/VirtualMachine.h" 11 | #include "compiler/SourceParser.h" 12 | 13 | 14 | namespace vm { 15 | 16 | typedef struct { 17 | char* error; 18 | } CodeGeneratorException; 19 | 20 | class CodeGenerator { 21 | public: 22 | CodeGenerator(); 23 | ~CodeGenerator(); 24 | bool generateCode(ExecutableImage* img, TreeNode* rootNode); 25 | void emitModule(ExecutableImage* img, TreeNode* rootNode); 26 | void emitFunction(ExecutableImage* img, TreeNode* node); 27 | void emitStatement(ExecutableImage* img, TreeNode* body); 28 | void emitBlock(ExecutableImage* img, TreeNode* body); 29 | void emitDeclaration(ExecutableImage* img, TreeNode* node); 30 | void emitCall(ExecutableImage* img, TreeNode* node); 31 | void emitIfElse(ExecutableImage* img, TreeNode* node); 32 | void emitWhile(ExecutableImage* img, TreeNode* node); 33 | void emitReturn(ExecutableImage* img, TreeNode* node); 34 | void emitBreak(ExecutableImage* img, TreeNode* node); 35 | void emitAssignment(ExecutableImage* img, TreeNode* assignment); 36 | void emitExpression(ExecutableImage* img, TreeNode* expression); 37 | void emitSymbol(ExecutableImage* img, TreeNode* node); 38 | WORD emitOpcode(ExecutableImage* img, Token& token); 39 | 40 | private: 41 | inline void raiseError(char* msg) { throw CodeGeneratorException{msg }; } 42 | }; 43 | 44 | } -------------------------------------------------------------------------------- /include/compiler/SourceFile.h: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine Compiler source code loader header 4 | * 5 | * (C) Bolat Basheyev 2021 6 | * 7 | ============================================================================*/ 8 | #pragma once 9 | 10 | 11 | 12 | namespace vm { 13 | 14 | class SourceFile { 15 | public: 16 | SourceFile(const char* filename); 17 | ~SourceFile(); 18 | char* getData(); 19 | private: 20 | char* data; 21 | }; 22 | 23 | } -------------------------------------------------------------------------------- /include/compiler/SourceParser.h: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine Compiler Source Code Parser header 4 | * 5 | * Basic C like language grammar: 6 | * 7 | * ::= {}* 8 | * ::= 'int' 9 | * ::= '(' {, }* ')' 10 | * ::= 11 | * ::= | | | | | | 12 | * ::= {','}* ';' 13 | * ::= '{' {}* '}' 14 | * ::= '(' {} {, expression}* ')' 15 | * ::= 'if' '(' ')' { 'else' } 16 | * ::= 'while' '(' ')' 17 | * ::= 'return' ';' | 'break' ';' 18 | * ::= = ';' 19 | * ::= {( && | '||') } 20 | * ::= {( == | != | > | >= | < | <= ) } 21 | * ::= {(+|-) } 22 | * ::= {(*|/) } 23 | * ::= {( & | '|' | ^ | << | >> ) } 24 | * ::= ({~|!|-|+} ) | | 25 | * 26 | * 27 | * (C) Bolat Basheyev 2021 28 | * 29 | ============================================================================*/ 30 | #pragma once 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | #include "runtime/VirtualMachine.h" 37 | 38 | using namespace std; 39 | 40 | namespace vm { 41 | 42 | constexpr char* BLANKS = "\x20\n\r\t"; 43 | constexpr char* DELIMETERS = ",;{}[]()=><+-*/&|~^!"; 44 | 45 | //------------------------------------------------------------------------ 46 | // Tokens 47 | //------------------------------------------------------------------------ 48 | enum class TokenType { 49 | EMPTY = 0, NONE, UNKNOWN, IDENTIFIER, CONST_INTEGER, CONST_STRING, COMMA, EOS, 50 | OP_BRACES, CL_BRACES, OP_BRACKETS, CL_BRACKETS, OP_PARENTHESES, CL_PARENTHESES, 51 | INT, IF, ELSE, WHILE, RETURN, BREAK, ASSIGN, PLUS, MINUS, MULTIPLY, DIVIDE, NOT, AND, OR, XOR, SHL, SHR, 52 | EQUAL, NOT_EQUAL, GREATER, GR_EQUAL, LESS, LS_EQUAL, LOGIC_AND, LOGIC_OR, LOGIC_NOT 53 | }; 54 | 55 | constexpr char* const TOKEN_TYPE_MNEMONIC[] = { 56 | "", "", "", "", "", "", ",", ";", 57 | "{", "}", "[", "]", "(", ")", 58 | "int", "if", "else", "while", "return", "break", 59 | "=", "+", "-", "*", "/", "~", "&", "|", "^", "<<", ">>", 60 | "==", "!=", ">", ">=", "<", "<=", "&&", "||", "!" 61 | }; 62 | 63 | constexpr int TOKEN_TYPE_COUNT = sizeof(TOKEN_TYPE_MNEMONIC) / sizeof(char*); 64 | 65 | 66 | class Token { 67 | public: 68 | TokenType type; 69 | char* text; 70 | int length; 71 | int row, col; 72 | }; 73 | 74 | constexpr Token EMPTY_TOKEN = { TokenType::EMPTY, "", 0, 0, 0 }; 75 | constexpr Token TKN_ARGUMENTS = { TokenType::NONE, "ARGUMENTS", 9, 0, 0 }; 76 | constexpr Token TKN_BLOCK = { TokenType::NONE, "BLOCK", 5, 0, 0 }; 77 | constexpr Token TKN_ZERO = { TokenType::CONST_INTEGER, "0", 1, 0, 0 }; 78 | constexpr Token TKN_MINUS = { TokenType::MINUS, "-", 1, 0, 0 }; 79 | constexpr Token TKN_BITWISE_NOT = { TokenType::NOT, "~", 1, 0, 0 }; 80 | constexpr Token TKN_LOGICAL_NOT = { TokenType::LOGIC_NOT, "!", 1, 0, 0 }; 81 | 82 | //------------------------------------------------------------------------ 83 | // Symbol table 84 | //------------------------------------------------------------------------ 85 | enum class SymbolType { 86 | UNKNOWN, CONSTANT, FUNCTION, ARGUMENT, VARIABLE 87 | }; 88 | 89 | constexpr char* const SYMBOL_TYPE_MNEMONIC[] = { 90 | "UNKNOWN", "CONSTANT", "FUNCTION", "ARGUMENT", "VARIABLE" 91 | }; 92 | 93 | class Symbol { 94 | public: 95 | string name = ""; 96 | SymbolType type = SymbolType::UNKNOWN; 97 | WORD localIndex = -1; 98 | WORD address = -1; 99 | WORD argCount = 0; 100 | 101 | }; 102 | 103 | class SymbolTable { 104 | public: 105 | 106 | SymbolTable(string name = "GLOBAL"); 107 | ~SymbolTable(); 108 | 109 | bool addChild(SymbolTable* child); 110 | void removeChild(SymbolTable* child); 111 | SymbolTable* getChildAt(size_t index); 112 | size_t getChildCount(); 113 | 114 | inline const char* getName() { return name.c_str(); }; 115 | void clearSymbols(); 116 | size_t getSymbolsCount(); 117 | bool addSymbol(Token& token, SymbolType type); 118 | Symbol* lookupSymbol(Token& token); 119 | Symbol* lookupSymbol(char* name, SymbolType type); 120 | Symbol* getSymbolAt(size_t index); 121 | void printSymbols(); 122 | 123 | private: 124 | string name; 125 | vector symbols; 126 | vector childs; 127 | SymbolTable* parent; 128 | int getNextIndex(SymbolType type); 129 | void printRecursive(int depth); 130 | }; 131 | 132 | //------------------------------------------------------------------------ 133 | // Abstract Syntax Tree Node 134 | //------------------------------------------------------------------------ 135 | 136 | enum class TreeNodeType { 137 | UNKNOWN = 0, MODULE, CONSTANT, TYPE, SYMBOL, UNARY_OP, BINARY_OP, CALL, 138 | FUNCTION, BLOCK, ASSIGNMENT, IF_ELSE, WHILE, RETURN, BREAK 139 | }; 140 | 141 | constexpr char* const TREE_NODE_TYPE_MNEMONIC[] = { 142 | "UNKNOWN", "MODULE", "CONSTANT", "TYPE", "SYMBOL", "UNARY_OP", "BINARY_OP", "CALL", 143 | "FUNCTION", "BLOCK", "ASSIGNMENT", "IF_ELSE", "WHILE", "RETURN", "BREAK" 144 | }; 145 | 146 | class TreeNode { 147 | public: 148 | TreeNode(Token token, TreeNodeType type, SymbolTable* scope); 149 | ~TreeNode(); 150 | TreeNode* addChild(TreeNode* node); 151 | bool removeChild(TreeNode* node); 152 | void removeAll(); 153 | TreeNodeType getType(); 154 | Token &getToken(); 155 | TreeNode* getParent(); 156 | TreeNode* getChild(size_t index); 157 | size_t getChildCount(); 158 | size_t getDepth(); 159 | void print(); 160 | inline void setSymbolTable(SymbolTable* scope) { symbols = scope; } 161 | inline SymbolTable* getSymbolTable() { return symbols; } 162 | private: 163 | Token token; 164 | SymbolTable* symbols = NULL; 165 | vector childs; 166 | TreeNode* parent; 167 | TreeNodeType type; 168 | void print(int tab); 169 | }; 170 | 171 | //------------------------------------------------------------------------ 172 | // Parser exception 173 | //------------------------------------------------------------------------ 174 | class ParserException { 175 | public: 176 | Token& token; 177 | const char* msg; 178 | }; 179 | 180 | //------------------------------------------------------------------------ 181 | // Syntax Parser (Abstract Syntax Tree Builder) 182 | //------------------------------------------------------------------------ 183 | class SourceParser { 184 | public: 185 | SourceParser(const char* sourceCode); 186 | ~SourceParser(); 187 | inline size_t getTokenCount() { return tokens.size(); } 188 | Token& getToken(size_t index) { return tokens[index]; } // FIXME: not sure 189 | SymbolTable& getSymbolTable() { return rootSymbolTable; } 190 | TreeNode* getSyntaxTree() { return root; } 191 | private: 192 | vector tokens; 193 | TreeNode* root = NULL; 194 | SymbolTable rootSymbolTable; 195 | size_t currentToken = 0; 196 | int blockCounter = 0; 197 | 198 | void parseToTokens(const char* sourceCode); 199 | bool isBlank(char value) { return strchr(BLANKS, value) != NULL; }; 200 | bool isDelimeter(char value) { return strchr(DELIMETERS, value) != NULL; }; 201 | bool pushToken(char* text, int length, int row, int col); 202 | TokenType getTokenType(char* text, int length); 203 | TokenType validateNumber(char* text, int length); 204 | TokenType validateIdentifier(char* text, int length); 205 | TokenType validateString(char* text, int length); 206 | 207 | void buildSyntaxTree(); 208 | TreeNode* parseModule(SymbolTable* scope); 209 | TreeNode* parseDeclaration(SymbolTable* scope); 210 | TreeNode* parseFunction(SymbolTable* scope); 211 | TreeNode* parseArgument(SymbolTable* scope); 212 | TreeNode* parseBlock(SymbolTable* scope, bool isFunction, bool whileBlock); 213 | TreeNode* parseStatement(SymbolTable* scope, bool whileBlock); 214 | TreeNode* parseCall(SymbolTable* scope); 215 | TreeNode* parseIfElse(SymbolTable* scope, bool whileBlock); 216 | TreeNode* parseWhile(SymbolTable* scope); 217 | TreeNode* parseAssignment(SymbolTable* scope); 218 | TreeNode* parseLogical(SymbolTable* scope); 219 | TreeNode* parseComparison(SymbolTable* scope); 220 | TreeNode* parseExpression(SymbolTable* scope); 221 | TreeNode* parseTerm(SymbolTable* scope); 222 | TreeNode* parseBitwise(SymbolTable* scope); 223 | TreeNode* parseFactor(SymbolTable* scope); 224 | 225 | inline bool next() { currentToken++; return currentToken < getTokenCount(); } 226 | inline Token& getToken() { return getToken(currentToken); } 227 | inline Token& getNextToken() { return getToken(currentToken + 1); } 228 | inline bool isTokenType(TokenType type) { return getToken().type == type; } 229 | 230 | inline bool isComparison(TokenType type) { return type >= TokenType::EQUAL && type <= TokenType::LS_EQUAL; } 231 | inline bool isLogical(TokenType type) { return type >= TokenType::LOGIC_AND && type <= TokenType::LOGIC_OR; } 232 | inline bool isBitwise(TokenType type) { return type >= TokenType::AND && type <= TokenType::SHR; } 233 | inline bool isDataType(TokenType type) { return type == TokenType::INT; } 234 | inline void checkToken(TokenType type, const char* msg) { if (!isTokenType(type)) raiseError(msg); } 235 | inline void raiseError(Token& tkn, const char* msg) { throw ParserException{ tkn, msg }; } 236 | inline void raiseError(const char* msg) { throw ParserException{ getToken(), msg }; } 237 | }; 238 | 239 | }; 240 | -------------------------------------------------------------------------------- /include/runtime/VirtualMachine.h: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine class header 4 | * 5 | * Lightweight 32-bit stack virtual machine 6 | * 7 | * 8 | * (C) Bolat Basheyev 2021 9 | * 10 | ============================================================================*/ 11 | 12 | #pragma once 13 | 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | 19 | namespace vm { 20 | 21 | typedef int32_t WORD; 22 | 23 | constexpr WORD OP_CODE_MASK = 0b00000000000000000000000011111111; 24 | constexpr WORD OP_TYPE_MASK = 0b00000000000000000000111000000000; 25 | 26 | constexpr WORD OP_HALT = 0b00000000000000000000000000000000; 27 | constexpr WORD OP_CONST = 0b00000000000000000000000000000001; 28 | constexpr WORD OP_PUSH = 0b00000000000000000000000000000010; 29 | constexpr WORD OP_POP = 0b00000000000000000000000000000011; 30 | 31 | constexpr WORD OP_ADD = 0b00000000000000000000000000000100; 32 | constexpr WORD OP_SUB = 0b00000000000000000000000000000101; 33 | constexpr WORD OP_MUL = 0b00000000000000000000000000000110; 34 | constexpr WORD OP_DIV = 0b00000000000000000000000000000111; 35 | 36 | constexpr WORD OP_AND = 0b00000000000000000000000000001000; 37 | constexpr WORD OP_OR = 0b00000000000000000000000000001001; 38 | constexpr WORD OP_XOR = 0b00000000000000000000000000001010; 39 | constexpr WORD OP_NOT = 0b00000000000000000000000000001011; 40 | constexpr WORD OP_SHL = 0b00000000000000000000000000001100; 41 | constexpr WORD OP_SHR = 0b00000000000000000000000000001101; 42 | 43 | constexpr WORD OP_JMP = 0b00000000000000000000000000001110; 44 | constexpr WORD OP_IFZERO = 0b00000000000000000000000000001111; 45 | 46 | constexpr WORD OP_EQUAL = 0b00000000000000000000000000010000; 47 | constexpr WORD OP_NEQUAL = 0b00000000000000000000000000010001; 48 | constexpr WORD OP_GREATER = 0b00000000000000000000000000010010; 49 | constexpr WORD OP_GREQUAL = 0b00000000000000000000000000010011; 50 | constexpr WORD OP_LESS = 0b00000000000000000000000000010100; 51 | constexpr WORD OP_LSEQUAL = 0b00000000000000000000000000010101; 52 | constexpr WORD OP_LAND = 0b00000000000000000000000000010110; 53 | constexpr WORD OP_LOR = 0b00000000000000000000000000010111; 54 | constexpr WORD OP_LNOT = 0b00000000000000000000000000011000; 55 | 56 | constexpr WORD OP_CALL = 0b00000000000000000000000000011001; 57 | constexpr WORD OP_RET = 0b00000000000000000000000000011010; 58 | constexpr WORD OP_SYSCALL = 0b00000000000000000000000000011011; 59 | constexpr WORD OP_RESERVED = 0b00000000000000000000000000011100; 60 | 61 | constexpr WORD OP_LOAD = 0b00000000000000000000000000011101; 62 | constexpr WORD OP_STORE = 0b00000000000000000000000000011110; 63 | constexpr WORD OP_ARG = 0b00000000000000000000000000011111; 64 | 65 | 66 | class ExecutableImage { 67 | public: 68 | ExecutableImage(); 69 | ~ExecutableImage(); 70 | void clear(); 71 | WORD setEmitAddress(WORD address); 72 | WORD getEmitAddress(); 73 | WORD emit(WORD opcode); 74 | WORD emit(WORD opcode, WORD operand); 75 | WORD emit(WORD opcode, WORD operand1, WORD operand2); 76 | WORD emit(ExecutableImage& img); 77 | void writeWord(WORD address, WORD value); 78 | void writeData(WORD address, void* data, WORD bytesCount); 79 | WORD readWord(WORD address); 80 | WORD* getImage(); 81 | WORD getSize(); 82 | void disassemble(); 83 | 84 | private: 85 | vector image; 86 | WORD emitAddress = 0; 87 | void prepareSpace(WORD wordsCount); 88 | void prepareSpace(WORD address, WORD wordsCount); 89 | WORD printMnemomic(WORD address); 90 | }; 91 | 92 | 93 | class VirtualMachine { 94 | public: 95 | VirtualMachine(WORD memorySize = 0xFFFF); // Allocates VM memory in bytes 96 | ~VirtualMachine(); // Desctructor 97 | bool loadImage(ExecutableImage& image); // Load executable image 98 | void execute(); // Runs image from address 0 99 | void printState(); // Print current VM state 100 | inline WORD getMaxAddress() { return maxAddress; }; // Get max address in WORDS 101 | inline WORD* getMemory() { return memory; }; // Returns pointer to VM RAM 102 | inline WORD getIP() { return ip; }; // Get Instruction Pointer address 103 | inline WORD getSP() { return sp; }; // Get Stack Pointer address 104 | inline WORD getFP() { return fp; }; // Get Frame Pointer address 105 | inline WORD getLP() { return lp; }; // Get Locals Pointer address 106 | private: 107 | WORD* memory; // Random access memory array 108 | WORD ip; // Instruction pointer 109 | WORD sp; // Stack pointer 110 | WORD fp; // Frame pointer 111 | WORD lp; // Local variables pointer 112 | WORD maxAddress; // Highest address in words 113 | void sysCall(WORD n); // System call 114 | }; 115 | 116 | 117 | 118 | 119 | 120 | }; 121 | -------------------------------------------------------------------------------- /src/compiler/CodeGenerator.cpp: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine Compiler code generator imlementation 4 | * 5 | * (C)Bolat Basheyev 2021 6 | * 7 | ============================================================================*/ 8 | 9 | #include "compiler/CodeGenerator.h" 10 | 11 | #include 12 | #include 13 | 14 | using namespace vm; 15 | using namespace std; 16 | 17 | // todo error info and handling 18 | 19 | constexpr WORD MAGIC_BREAK = 0xFFFFFFFF; 20 | constexpr WORD JUMP_OFFSET = 2; 21 | 22 | 23 | CodeGenerator::CodeGenerator() { 24 | 25 | } 26 | 27 | CodeGenerator::~CodeGenerator() { 28 | 29 | 30 | } 31 | 32 | 33 | bool CodeGenerator::generateCode(ExecutableImage* img, TreeNode* rootNode) { 34 | try { 35 | img->clear(); // clear executable image 36 | img->setEmitAddress(4); // reserve 4 memory cells to call main() entry point 37 | emitModule(img, rootNode); // emit module code starting from address [4] 38 | // Lookup entry point address 39 | SymbolTable* global = rootNode->getSymbolTable(); 40 | Symbol* main = global->lookupSymbol("main", SymbolType::FUNCTION); 41 | if (main == NULL || main->argCount != 0) { 42 | raiseError("No entry point found - int main() function missing."); 43 | } else { 44 | // emit uncoditional jump to entry point; 45 | img->setEmitAddress(0); 46 | img->emit(OP_CALL, main->address, 0); 47 | img->emit(OP_HALT); 48 | } 49 | } catch (CodeGeneratorException& e) { 50 | cout << "CODE GENERATION ERROR: "; 51 | cout << e.error << endl; 52 | return false; 53 | } 54 | return true; 55 | } 56 | 57 | 58 | void CodeGenerator::emitModule(ExecutableImage* img, TreeNode* rootNode) { 59 | for (int i = 0; i < rootNode->getChildCount(); i++) { 60 | TreeNode* node = rootNode->getChild(i); 61 | if (node->getType() == TreeNodeType::FUNCTION) { 62 | // emit function code 63 | emitFunction(img, node); 64 | } 65 | } 66 | } 67 | 68 | 69 | 70 | void CodeGenerator::emitFunction(ExecutableImage* img, TreeNode* node) { 71 | // set function address in symbols table 72 | Token tkn = node->getToken(); 73 | Symbol* symbol = node->getSymbolTable()->lookupSymbol(tkn); 74 | symbol->address = img->getEmitAddress(); 75 | // Child nodes: #0 - return type, #1 - arguments, #2 - function body 76 | TreeNode* returnType = node->getChild(0); 77 | TreeNode* arguments = node->getChild(1); 78 | TreeNode* body = node->getChild(2); 79 | ExecutableImage funCode; 80 | 81 | // elevate all variable declaration to the function beginning 82 | emitDeclaration(&funCode, body); 83 | 84 | emitBlock(&funCode, body); 85 | // if no return statment then add return instruction; 86 | if (funCode.getSize() > 0) { 87 | WORD lastInstruction = funCode.readWord(funCode.getSize() - 1); 88 | if (lastInstruction != OP_RET) funCode.emit(OP_RET); 89 | } else funCode.emit(OP_RET); 90 | img->emit(funCode); 91 | 92 | } 93 | 94 | 95 | 96 | void CodeGenerator::emitStatement(ExecutableImage* img, TreeNode* statement) { 97 | switch (statement->getType()) { 98 | case TreeNodeType::TYPE: break; // skip because already emitted 99 | case TreeNodeType::ASSIGNMENT: emitAssignment(img, statement); break; 100 | case TreeNodeType::IF_ELSE: emitIfElse(img, statement); break; 101 | case TreeNodeType::WHILE: emitWhile(img, statement); break; 102 | case TreeNodeType::CALL: emitCall(img, statement); break; 103 | case TreeNodeType::BLOCK: emitBlock(img, statement); break; 104 | case TreeNodeType::RETURN: emitReturn(img, statement); break; 105 | case TreeNodeType::BREAK: emitBreak(img, statement); break; 106 | default: raiseError("Unknown structure in syntax tree."); 107 | } 108 | } 109 | 110 | 111 | void CodeGenerator::emitBlock(ExecutableImage* img, TreeNode* body) { 112 | size_t count = body->getChildCount(); 113 | TreeNode* statement; 114 | for (int j = 0; j < count; j++) { 115 | statement = body->getChild(j); 116 | emitStatement(img, statement); 117 | } 118 | } 119 | 120 | 121 | void CodeGenerator::emitDeclaration(ExecutableImage* img, TreeNode* node) { 122 | Token token; 123 | TreeNode* statement; 124 | // scan all child blocks and emit variable declaration 125 | for (int j = 0; j < node->getChildCount(); j++) { 126 | statement = node->getChild(j); 127 | // if it's variable declaratiob 128 | if (statement->getType() == TreeNodeType::TYPE) { 129 | for (int i = 0; i < statement->getChildCount(); i++) { 130 | token = statement->getChild(i)->getToken(); 131 | img->emit(OP_CONST, 0); 132 | } 133 | } else if (node->getChildCount() > 0) { 134 | // if there are childs - scan recursively 135 | emitDeclaration(img, statement); 136 | } 137 | } 138 | } 139 | 140 | 141 | void CodeGenerator::emitCall(ExecutableImage* img, TreeNode* node) { 142 | 143 | // look up function name in symbols table 144 | Token funcToken = node->getToken(); 145 | Symbol* func = node->getSymbolTable()->lookupSymbol(funcToken); 146 | if (func == NULL || func->type != SymbolType::FUNCTION) raiseError("Function not found."); 147 | 148 | // emit arguments expressions 149 | for (int i = 0; i < node->getChildCount(); i++) { 150 | emitExpression(img, node->getChild(i)); 151 | } 152 | 153 | // todo make proper system call labeling in syntax tree 154 | // system function 155 | if (funcToken.length == 4 && strncmp(funcToken.text, "iput", 4) == 0) img->emit(OP_SYSCALL, 0x21); else 156 | if (funcToken.length == 4 && strncmp(funcToken.text, "iget", 4) == 0) img->emit(OP_SYSCALL, 0x22); 157 | else { 158 | // user function 159 | WORD funcAddress = func->address; 160 | img->emit(OP_CALL, funcAddress, (WORD) node->getChildCount()); 161 | } 162 | } 163 | 164 | 165 | void CodeGenerator::emitIfElse(ExecutableImage* img, TreeNode* node) { 166 | TreeNode* condition = node->getChild(0); 167 | TreeNode* thenBlock = node->getChild(1); 168 | TreeNode* elseBlock = node->getChild(2); 169 | ExecutableImage conditionCode, thenCode, elseCode; 170 | WORD offset; 171 | 172 | emitExpression(&conditionCode, condition); // generate condition code 173 | emitStatement(&thenCode, thenBlock); // generate then block code 174 | if (elseBlock) { // if there is else block 175 | emitStatement(&elseCode, elseBlock); // generate else block code 176 | } 177 | 178 | // IF: emit conditional jump code 179 | offset = thenCode.getSize() + 1; // calculate next address after then block 180 | if (elseBlock) offset += JUMP_OFFSET; // if there is an else block add JMP offset 181 | img->emit(conditionCode); 182 | img->emit(OP_IFZERO, offset); 183 | 184 | // THEN: emit then block code 185 | img->emit(thenCode); 186 | if (elseBlock) { // if there is an else block 187 | offset = elseCode.getSize() + 1; // calculate next address after else block 188 | img->emit(OP_JMP, offset); // emit unconditional jump over else block 189 | } 190 | 191 | // ELSE: emit else block code 192 | if (elseBlock) img->emit(elseCode); 193 | 194 | } 195 | 196 | 197 | void CodeGenerator::emitWhile(ExecutableImage* img, TreeNode* node) { 198 | TreeNode* condition = node->getChild(0); 199 | TreeNode* whileBlock = node->getChild(1); 200 | ExecutableImage conditionCode, whileCode; 201 | // Generate while block code 202 | emitStatement(&whileCode, whileBlock); 203 | // Generate condition expression code 204 | emitExpression(&conditionCode, condition); 205 | // Calculate next address after while block 206 | WORD jumpOut = whileCode.getSize() + JUMP_OFFSET + 1; 207 | 208 | // Emit coniditional jump 209 | img->emit(conditionCode); 210 | img->emit(OP_IFZERO, jumpOut); 211 | 212 | // Replace in while block code MAGIC_BREAK with relative jump out 213 | WORD whileCodeSize = whileCode.getSize(); 214 | WORD w1, w2, offset; 215 | for (int i = 0; i < whileCodeSize - 1; i++) { 216 | w1 = whileCode.readWord(i); 217 | w2 = whileCode.readWord(i + 1); 218 | if (w1 == MAGIC_BREAK && w2 == MAGIC_BREAK) { 219 | offset = jumpOut - i - JUMP_OFFSET; 220 | whileCode.writeWord(i, OP_JMP); 221 | whileCode.writeWord(i + 1, offset); 222 | } 223 | } 224 | 225 | // Emit while block code to image 226 | img->emit(whileCode); 227 | // calculate relative offset to beginning of condition expression 228 | WORD jumpBackOffset = -(whileCode.getSize() + conditionCode.getSize() + JUMP_OFFSET + 1); 229 | // Emit unconditional jump to 230 | img->emit(OP_JMP, jumpBackOffset); 231 | } 232 | 233 | 234 | void CodeGenerator::emitReturn(ExecutableImage* img, TreeNode* node) { 235 | emitExpression(img, node->getChild(0)); 236 | img->emit(OP_RET); 237 | } 238 | 239 | 240 | void CodeGenerator::emitBreak(ExecutableImage* img, TreeNode* node) { 241 | // reserve space for jump out of While cycle and 242 | // mark BREAK as two MAGIC_BREAK, MAGIC_BREAK values 243 | // for later replacement 244 | img->emit(MAGIC_BREAK, MAGIC_BREAK); 245 | } 246 | 247 | 248 | void CodeGenerator::emitAssignment(ExecutableImage* img, TreeNode* assignment) { 249 | Token asgn = assignment->getChild(0)->getToken(); 250 | emitExpression(img, assignment->getChild(1)); 251 | Symbol* entry = assignment->getSymbolTable()->lookupSymbol(asgn); 252 | if (entry != NULL && entry->type==SymbolType::VARIABLE) { 253 | img->emit(OP_STORE, entry->localIndex); 254 | } else { 255 | raiseError("Can not assign if its not variable."); 256 | } 257 | } 258 | 259 | 260 | void CodeGenerator::emitExpression(ExecutableImage* img, TreeNode* node) { 261 | size_t childCount = node->getChildCount(); 262 | TreeNodeType type = node->getType(); 263 | Token token; 264 | string integerString; 265 | WORD value; 266 | 267 | switch (type) { 268 | case TreeNodeType::BINARY_OP: 269 | emitExpression(img, node->getChild(0)); 270 | emitExpression(img, node->getChild(1)); 271 | emitOpcode(img, node->getToken()); 272 | break; 273 | case TreeNodeType::UNARY_OP: 274 | emitExpression(img, node->getChild(0)); 275 | emitOpcode(img, node->getToken()); 276 | break; 277 | case TreeNodeType::SYMBOL: 278 | emitSymbol(img, node); 279 | break; 280 | case TreeNodeType::CALL: 281 | emitCall(img, node); 282 | break; 283 | case TreeNodeType::CONSTANT: 284 | token = node->getToken(); 285 | integerString.append(token.text, token.length); 286 | value = stoi(integerString); 287 | img->emit(OP_CONST, value); 288 | break; 289 | default: 290 | raiseError ("Error unknown abstract syntax tree node"); 291 | break; 292 | } 293 | 294 | } 295 | 296 | 297 | void CodeGenerator::emitSymbol(ExecutableImage* img, TreeNode* node) { 298 | Token token = node->getToken(); 299 | TreeNodeType type = node->getType(); 300 | Symbol* entry; 301 | entry = node->getSymbolTable()->lookupSymbol(token); 302 | if (entry != NULL) { 303 | if (entry->type == SymbolType::ARGUMENT) img->emit(OP_ARG, entry->localIndex); 304 | else if (entry->type == SymbolType::VARIABLE) img->emit(OP_LOAD, entry->localIndex); 305 | else raiseError("Variable or argument expected."); 306 | } else raiseError("Symbol not declared."); 307 | } 308 | 309 | 310 | WORD CodeGenerator::emitOpcode(ExecutableImage* img, Token& token) { 311 | switch (token.type) { 312 | case TokenType::PLUS: img->emit(OP_ADD); break; 313 | case TokenType::MINUS: img->emit(OP_SUB); break; 314 | case TokenType::MULTIPLY: img->emit(OP_MUL); break; 315 | case TokenType::DIVIDE: img->emit(OP_DIV); break; 316 | case TokenType::EQUAL: img->emit(OP_EQUAL); break; 317 | case TokenType::NOT_EQUAL: img->emit(OP_NEQUAL); break; 318 | case TokenType::GREATER: img->emit(OP_GREATER); break; 319 | case TokenType::GR_EQUAL: img->emit(OP_GREQUAL); break; 320 | case TokenType::LESS: img->emit(OP_LESS); break; 321 | case TokenType::LS_EQUAL: img->emit(OP_LSEQUAL); break; 322 | case TokenType::LOGIC_AND: img->emit(OP_LAND); break; 323 | case TokenType::LOGIC_OR: img->emit(OP_LOR); break; 324 | case TokenType::LOGIC_NOT: img->emit(OP_LNOT); break; 325 | case TokenType::NOT: img->emit(OP_NOT); break; 326 | case TokenType::AND: img->emit(OP_AND); break; 327 | case TokenType::OR: img->emit(OP_OR); break; 328 | case TokenType::XOR: img->emit(OP_XOR); break; 329 | case TokenType::SHL: img->emit(OP_SHL); break; 330 | case TokenType::SHR: img->emit(OP_SHR); break; 331 | default: 332 | cout << "UNKNOWN OPERATION: "; 333 | cout.write(token.text, token.length); 334 | cout << endl; 335 | break; 336 | } 337 | return 0; 338 | } 339 | 340 | -------------------------------------------------------------------------------- /src/compiler/SourceFile.cpp: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine Compiler source code loader implementation 4 | * 5 | * (C) Bolat Basheyev 2021 6 | * 7 | ============================================================================*/ 8 | #include 9 | #include 10 | 11 | #include "compiler/SourceFile.h" 12 | 13 | using namespace std; 14 | using namespace vm; 15 | 16 | SourceFile::SourceFile(const char* filename) { 17 | data = NULL; 18 | ios_base::openmode openmode = ios::ate | ios::in | ios::binary; 19 | ifstream file(filename, openmode); 20 | if (file.is_open()) { 21 | streampos pos = file.tellg(); 22 | size_t size = pos; 23 | data = new char[size + 1]; 24 | file.seekg(0, ios::beg); 25 | file.read(data, pos); 26 | file.close(); 27 | // Terminate C string at the end 28 | data[size] = 0; 29 | } 30 | } 31 | 32 | SourceFile::~SourceFile() { 33 | if (data != NULL) { 34 | delete[] data; 35 | data = NULL; 36 | } 37 | } 38 | 39 | char* SourceFile::getData() { 40 | return data; 41 | } 42 | -------------------------------------------------------------------------------- /src/compiler/SourceParser.cpp: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine Compiler Source Code Parser implementation 4 | * 5 | * Basic C like language grammar: 6 | * 7 | * ::= {}* 8 | * ::= 'int' 9 | * ::= '(' {, }* ')' 10 | * ::= 11 | * ::= | | | | | | 12 | * ::= {','}* ';' 13 | * ::= '{' {}* '}' 14 | * ::= '(' {} {, expression}* ')' 15 | * ::= 'if' '(' ')' { 'else' } 16 | * ::= 'while' '(' ')' 17 | * ::= 'return' ';' | 'break' ';' 18 | * ::= = ';' 19 | * ::= {( && | '||') } 20 | * ::= {( == | != | > | >= | < | <= ) } 21 | * ::= {(+|-) } 22 | * ::= {(*|/) } 23 | * ::= {( & | '|' | ^ | << | >> ) } 24 | * ::= ({~|!|-|+} ) | | 25 | * 26 | * 27 | * (C) Bolat Basheyev 2021 28 | * 29 | ============================================================================*/ 30 | #include 31 | #include "compiler/SourceParser.h" 32 | 33 | using namespace vm; 34 | 35 | //----------------------------------------------------------------------------- 36 | // Constructor - builds source code abstract syntax tree 37 | //----------------------------------------------------------------------------- 38 | SourceParser::SourceParser(const char* sourceCode) { 39 | try { 40 | parseToTokens(sourceCode); 41 | buildSyntaxTree(); 42 | } 43 | catch (ParserException e) { 44 | TokenType type = e.token.type; 45 | cout << "PARSER EXCEPTION: " << e.msg << endl; 46 | cout << "Token at row=" << e.token.row << " col=" << e.token.col; 47 | if (type == TokenType::IDENTIFIER || 48 | type == TokenType::CONST_INTEGER || 49 | type == TokenType::CONST_STRING) { 50 | cout << " '"; 51 | cout.write(e.token.text, e.token.length); 52 | cout << "'" << endl; 53 | } 54 | else cout << " '" << TOKEN_TYPE_MNEMONIC[(int)type] << "'" << endl; 55 | } 56 | } 57 | 58 | 59 | //----------------------------------------------------------------------------- 60 | // Destructor 61 | //----------------------------------------------------------------------------- 62 | SourceParser::~SourceParser() { 63 | if (root != NULL) delete root; 64 | } 65 | 66 | 67 | //----------------------------------------------------------------------------- 68 | // Parses source code to tokens 69 | //----------------------------------------------------------------------------- 70 | void SourceParser::parseToTokens(const char* sourceCode) { 71 | 72 | tokens.clear(); // clear tokens vector 73 | int length; // token length variable 74 | int row = 1, col = 1; // reset current row, col counter 75 | char* cursor = (char*)sourceCode; // set cursor to source beginning 76 | char* newLine = cursor; // new line pointer 77 | char* start = cursor; // start new token from cursor 78 | char value = *cursor; // read first char from cursor 79 | char nextChar; // next char variable 80 | bool insideString = false; // inside string flag 81 | bool insideComment = false; // inside comment flag 82 | 83 | while (value != NULL) { // while not end of string (NULL) 84 | length = (int) (cursor - start); // measure token length 85 | if (!insideComment) { // if we are not inside comment 86 | if ((isBlank(value) || isDelimeter(value)) && !insideString) { // if there is token separator 87 | if (value == '\n') { // if new line '\n' found 88 | row++; col = 1; // increment row, reset col 89 | newLine = cursor + 1; // set new line pointer 90 | insideComment = false; // reset insideComment flag 91 | } 92 | if (length > 0) pushToken(start, length, row, col); // if length > 0 push token to vector 93 | nextChar = cursor[1]; // get next char after cursor 94 | if (isDelimeter(value) && isDelimeter(nextChar)) { // if next char is also delimeter 95 | if (value != '/' && nextChar != '/') { // if it's not '//' comment 96 | if (!pushToken(cursor, 2, row, col)) // try to push double char delimeter token 97 | pushToken(cursor, 1, row, col); // if not pushed - its single char delimeter 98 | else cursor++; // if double delimeter, increment cursor 99 | } else insideComment = true; // set insideComment flag 100 | } else pushToken(cursor, 1, row, col); // else push single char delimeter 101 | start = cursor + 1; // calculate next token start pointer 102 | col = (int)(start - newLine + 1); // calculate token start column 103 | } else if (value == '"') insideString = !insideString; // if '"' char - flip insideString flag 104 | else if (insideString && value == '\n') { // if '\n' found inside string 105 | Token tkn{ TokenType::UNKNOWN,start,length,row,col }; // use token information 106 | raiseError(tkn, "Can't use '\\n' in string constant."); // and throw exception 107 | } 108 | } else if (insideComment && value == '\n') { // if comment terminated '\n' character 109 | insideComment = false; // reset insideComment flag 110 | row++; col = 1; // increment row, reset col 111 | newLine = cursor + 1; // set new line pointer 112 | start = cursor + 1; // calculate next token start pointer 113 | col = (int)(start - newLine + 1); // calculate token start column 114 | } 115 | cursor++; // increment cursor pointer 116 | value = *cursor; // read next char 117 | } 118 | 119 | if (insideString) { 120 | length = (int)(cursor - start); // if there is a last token 121 | Token tkn{ TokenType::UNKNOWN,start,length,row,col }; // use token information 122 | raiseError(tkn, "String constant not closed by '\"' character."); 123 | } 124 | 125 | if (!insideComment) { 126 | length = (int)(cursor - start); // if there is a last token 127 | if (length > 0) pushToken(start, length, row, col); // push last token to vector 128 | } 129 | } 130 | 131 | 132 | //----------------------------------------------------------------------------- 133 | // Pushes parsed token to tokens vector 134 | //----------------------------------------------------------------------------- 135 | bool SourceParser::pushToken(char* text, int length, int row, int col) { 136 | TokenType type = getTokenType(text, length); 137 | if (type != TokenType::UNKNOWN) { 138 | tokens.push_back({ type, text, length, row, col }); 139 | return true; 140 | } 141 | return false; 142 | } 143 | 144 | 145 | //----------------------------------------------------------------------------- 146 | // Identifies token type by comparing to keywords, operators and rules 147 | //----------------------------------------------------------------------------- 148 | TokenType SourceParser::getTokenType(char* text, int length) { 149 | if (text == NULL || length < 1) return TokenType::UNKNOWN; 150 | for (int i = 0; i < TOKEN_TYPE_COUNT; i++) { 151 | const char* mnemonic = TOKEN_TYPE_MNEMONIC[i]; 152 | int mnemonicLength = (int) strlen(mnemonic); 153 | if (length == mnemonicLength) { 154 | if (strncmp(text, mnemonic, mnemonicLength) == 0) { 155 | return (TokenType) i; 156 | } 157 | } 158 | } 159 | char firstChar = text[0]; 160 | if (isdigit(firstChar)) return validateNumber(text, length); 161 | if (isalpha(firstChar)) return validateIdentifier(text, length); 162 | if (firstChar == '"') return validateString(text, length); 163 | return TokenType::UNKNOWN; 164 | } 165 | 166 | 167 | //----------------------------------------------------------------------------- 168 | // Validates constant integer number 169 | //----------------------------------------------------------------------------- 170 | TokenType SourceParser::validateNumber(char* text, int length) { 171 | for (size_t i = 1; i < length; i++) 172 | if (!isdigit(text[i])) return TokenType::UNKNOWN; 173 | return TokenType::CONST_INTEGER; 174 | } 175 | 176 | 177 | //----------------------------------------------------------------------------- 178 | // Validates identifier 179 | //----------------------------------------------------------------------------- 180 | TokenType SourceParser::validateIdentifier(char* text, int length) { 181 | for (size_t i = 1; i < length; i++) 182 | if (!isalnum(text[i])) return TokenType::UNKNOWN; 183 | return TokenType::IDENTIFIER; 184 | } 185 | 186 | 187 | //----------------------------------------------------------------------------- 188 | // Validates constant string 189 | //----------------------------------------------------------------------------- 190 | TokenType SourceParser::validateString(char* text, int length) { 191 | if (length < 2) return TokenType::UNKNOWN; 192 | if (text[length - 1] != '"') return TokenType::UNKNOWN; 193 | return TokenType::CONST_STRING; 194 | } 195 | 196 | 197 | //--------------------------------------------------------------------------- 198 | // Builds abstract syntax tree 199 | //--------------------------------------------------------------------------- 200 | void SourceParser::buildSyntaxTree() { 201 | currentToken = 0; 202 | 203 | // add iput system function to symbols table (write int to std out) 204 | Token iput = { TokenType::IDENTIFIER, "iput", 4, 0,0 }; 205 | rootSymbolTable.addSymbol(iput, SymbolType::FUNCTION); 206 | rootSymbolTable.lookupSymbol(iput)->argCount = 1; 207 | 208 | // add iget system function to symbols table (read int from std in) 209 | Token iget = { TokenType::IDENTIFIER, "iget", 4, 0,0 }; 210 | rootSymbolTable.addSymbol(iget, SymbolType::FUNCTION); 211 | rootSymbolTable.lookupSymbol(iget)->argCount = 1; 212 | 213 | root = parseModule(&rootSymbolTable); 214 | } 215 | 216 | 217 | //--------------------------------------------------------------------------- 218 | // ::= { | }* 219 | //--------------------------------------------------------------------------- 220 | TreeNode* SourceParser::parseModule(SymbolTable* scope) { 221 | TreeNode* program = new TreeNode(EMPTY_TOKEN, TreeNodeType::MODULE, scope); 222 | Token functionCheck; 223 | do { 224 | // Check: there must be parentheses after 2 tokens (for functions) 225 | functionCheck = getToken(currentToken + 2); 226 | if (functionCheck.type == TokenType::OP_PARENTHESES) { 227 | program->addChild(parseFunction(scope)); 228 | } else raiseError("Function declaration expected."); 229 | } while (next()); 230 | return program; 231 | } 232 | 233 | 234 | //--------------------------------------------------------------------------- 235 | // ::= {','}* ';' 236 | //--------------------------------------------------------------------------- 237 | TreeNode* SourceParser::parseDeclaration(SymbolTable* scope) { 238 | Token dataType = getToken(); 239 | if (!isDataType(dataType.type)) raiseError("Data type expected"); 240 | TreeNode* variableDeclaration = new TreeNode(dataType, TreeNodeType::TYPE, scope); 241 | while (next()) { 242 | if (isTokenType(TokenType::COMMA)) next(); else 243 | if (isTokenType(TokenType::EOS)) break; 244 | checkToken(TokenType::IDENTIFIER, "Variable name expected"); 245 | TreeNode* variableName = new TreeNode(getToken(), TreeNodeType::SYMBOL, scope); 246 | if (!scope->addSymbol(variableName->getToken(), SymbolType::VARIABLE)) { 247 | raiseError("Variable already defined."); 248 | } 249 | variableDeclaration->addChild(variableName); 250 | } 251 | return variableDeclaration; 252 | } 253 | 254 | 255 | //--------------------------------------------------------------------------- 256 | // ::= '(' {, }* ')' 257 | //--------------------------------------------------------------------------- 258 | TreeNode* SourceParser::parseFunction(SymbolTable* scope) { 259 | Token dataType = getToken(); 260 | if (!isDataType(dataType.type)) raiseError("Function return data type expected"); 261 | TreeNode* returnType = new TreeNode(dataType, TreeNodeType::TYPE, scope); next(); 262 | checkToken(TokenType::IDENTIFIER, "Function name expected"); 263 | TreeNode* function = new TreeNode(getToken(), TreeNodeType::FUNCTION, scope); 264 | if (!scope->addSymbol(function->getToken(), SymbolType::FUNCTION)) { 265 | raiseError("Function already defined."); 266 | } else next(); 267 | 268 | string functionName; 269 | functionName.append(function->getToken().text, function->getToken().length); 270 | SymbolTable* blockSymbols = new SymbolTable(functionName); 271 | scope->addChild(blockSymbols); 272 | TreeNode* arguments = new TreeNode(EMPTY_TOKEN, TreeNodeType::SYMBOL, blockSymbols); 273 | while (next()) { 274 | Token tkn = getToken(); 275 | if (isTokenType(TokenType::COMMA)) next(); else 276 | if (isTokenType(TokenType::CL_PARENTHESES)) break; 277 | arguments->addChild(parseArgument(blockSymbols)); 278 | } 279 | next(); 280 | 281 | // save function params count 282 | Symbol* func = scope->lookupSymbol(function->getToken()); 283 | func->argCount = (WORD) arguments->getChildCount(); 284 | 285 | TreeNode* functionBody = parseBlock(blockSymbols, true, false); 286 | 287 | function->addChild(returnType); 288 | function->addChild(arguments); 289 | function->addChild(functionBody); 290 | return function; 291 | } 292 | 293 | 294 | //--------------------------------------------------------------------------- 295 | // :: = 296 | //--------------------------------------------------------------------------- 297 | TreeNode* SourceParser::parseArgument(SymbolTable* scope) { 298 | Token dataType = getToken(); 299 | if (!isDataType(dataType.type)) raiseError("Function argument type expected"); 300 | TreeNode* argument = new TreeNode(dataType, TreeNodeType::TYPE, scope); next(); 301 | checkToken(TokenType::IDENTIFIER, "Function argument name expected"); 302 | TreeNode* variableName = new TreeNode(getToken(), TreeNodeType::SYMBOL, scope); 303 | if (!scope->addSymbol(variableName->getToken(), SymbolType::ARGUMENT)) { 304 | raiseError("Argument already defined."); 305 | } 306 | argument->addChild(variableName); 307 | return argument; 308 | } 309 | 310 | 311 | 312 | //--------------------------------------------------------------------------- 313 | // ::= '{' {}* '}' 314 | //--------------------------------------------------------------------------- 315 | TreeNode* SourceParser::parseBlock(SymbolTable* scope, bool isFunction, bool whileBlock) { 316 | TreeNode* block = new TreeNode(TKN_BLOCK, TreeNodeType::BLOCK, scope); 317 | SymbolTable* blockSymbols; 318 | if (isFunction) blockSymbols = scope; else { 319 | string name = "block"; 320 | name.append(to_string(blockCounter++)); 321 | blockSymbols = new SymbolTable(name); 322 | scope->addChild(blockSymbols); 323 | block->setSymbolTable(blockSymbols); 324 | } 325 | while (next()) { 326 | if (isTokenType(TokenType::CL_BRACES)) break; 327 | if (isTokenType(TokenType::EOS)) continue; 328 | block->addChild(parseStatement(blockSymbols, whileBlock)); 329 | } 330 | return block; 331 | } 332 | 333 | 334 | 335 | //--------------------------------------------------------------------------- 336 | // ::= | | | | | | 337 | //--------------------------------------------------------------------------- 338 | TreeNode* SourceParser::parseStatement(SymbolTable* scope, bool whileBlock) { 339 | Token token = getToken(); 340 | if (isDataType(token.type)) return parseDeclaration(scope); else 341 | if (token.type == TokenType::OP_BRACES) return parseBlock(scope, false, whileBlock); else 342 | if (token.type == TokenType::IDENTIFIER) { 343 | Token nextToken = getNextToken(); 344 | if (nextToken.type == TokenType::ASSIGN) return parseAssignment(scope); 345 | if (nextToken.type == TokenType::OP_PARENTHESES) { 346 | TreeNode* callNode = parseCall(scope); next(); 347 | if (!isTokenType(TokenType::EOS)) raiseError("';' expected"); 348 | return callNode; 349 | } else raiseError("Unexpected token, assignment '=' or function call expecated."); 350 | } else 351 | if (token.type == TokenType::IF) return parseIfElse(scope, whileBlock); else 352 | if (token.type == TokenType::WHILE) return parseWhile(scope); else 353 | if (token.type == TokenType::RETURN) { 354 | TreeNode* returnStmt = new TreeNode(token, TreeNodeType::RETURN, scope); next(); 355 | TreeNode* expr = parseExpression(scope); 356 | returnStmt->addChild(expr); 357 | return returnStmt; 358 | } if (token.type == TokenType::BREAK) { 359 | if (!whileBlock) raiseError("Can't use 'break' statement outside 'while' cycle."); 360 | TreeNode* breakStmt = new TreeNode(token, TreeNodeType::BREAK, scope); next(); 361 | return breakStmt; 362 | } 363 | else raiseError("Unexpected token, statement expected"); 364 | return NULL; 365 | } 366 | 367 | 368 | //--------------------------------------------------------------------------- 369 | // ::= '(' {} {, expression}* ')' 370 | //--------------------------------------------------------------------------- 371 | TreeNode* SourceParser::parseCall(SymbolTable* scope) { 372 | Token identifier = getToken(); 373 | 374 | Symbol* func = scope->lookupSymbol(identifier); 375 | if (func == NULL || func->type != SymbolType::FUNCTION) { 376 | raiseError("Function not defined."); 377 | return NULL; 378 | } 379 | 380 | TreeNode* callNode = new TreeNode(identifier, TreeNodeType::CALL, scope); next(); 381 | if (!isTokenType(TokenType::OP_PARENTHESES)) raiseError("Opening parentheses '(' expected."); 382 | while (next()) { 383 | Token tkn = getToken(); 384 | if (isTokenType(TokenType::CL_PARENTHESES)) break; 385 | if (isTokenType(TokenType::COMMA)) continue; 386 | TreeNode* param = parseExpression(scope); 387 | callNode->addChild(param); 388 | if (isTokenType(TokenType::CL_PARENTHESES)) break; 389 | } 390 | 391 | 392 | // check arguments count 393 | if (callNode->getChildCount() != func->argCount) { 394 | raiseError("Function call arguments count doesn't match function declaration"); 395 | } 396 | 397 | return callNode; 398 | } 399 | 400 | 401 | //--------------------------------------------------------------------------- 402 | // ::= 'if' '(' ')' { 'else' } 403 | //--------------------------------------------------------------------------- 404 | TreeNode* SourceParser::parseIfElse(SymbolTable* scope, bool whileBlock) { 405 | TreeNode* ifblock = new TreeNode(getToken(), TreeNodeType::IF_ELSE, scope); 406 | next(); 407 | checkToken(TokenType::OP_PARENTHESES, "Opening parentheses '(' expected"); 408 | next(); 409 | ifblock->addChild(parseLogical(scope)); 410 | checkToken(TokenType::CL_PARENTHESES, "Closing parentheses ')' expected"); 411 | next(); 412 | ifblock->addChild(parseStatement(scope, whileBlock)); 413 | if (getNextToken().type == TokenType::ELSE) { 414 | next(); next(); 415 | ifblock->addChild(parseStatement(scope, whileBlock)); 416 | } 417 | return ifblock; 418 | } 419 | 420 | 421 | //--------------------------------------------------------------------------- 422 | // :: = 'while' '(' < expression > ')' < statement > 423 | //--------------------------------------------------------------------------- 424 | TreeNode* SourceParser::parseWhile(SymbolTable* scope) { 425 | TreeNode* whileBlock = new TreeNode(getToken(), TreeNodeType::WHILE, scope); 426 | next(); 427 | checkToken(TokenType::OP_PARENTHESES, "Opening parentheses '(' expected"); 428 | next(); 429 | whileBlock->addChild(parseLogical(scope)); 430 | checkToken(TokenType::CL_PARENTHESES, "Closing parentheses ')' expected"); 431 | next(); 432 | whileBlock->addChild(parseStatement(scope, true)); 433 | return whileBlock; 434 | } 435 | 436 | 437 | //--------------------------------------------------------------------------- 438 | // ::= = ';' 439 | //--------------------------------------------------------------------------- 440 | TreeNode* SourceParser::parseAssignment(SymbolTable* scope) { 441 | Token identifier = getToken(); 442 | if (scope->lookupSymbol(identifier) == NULL) { 443 | raiseError("Symbol not defined."); 444 | } 445 | next(); 446 | checkToken(TokenType::ASSIGN, "Assignment operator '=' expected"); 447 | TreeNode* op = new TreeNode(getToken(), TreeNodeType::ASSIGNMENT, scope); 448 | next(); 449 | TreeNode* a = new TreeNode(identifier, TreeNodeType::SYMBOL, scope); 450 | TreeNode* b = parseLogical(scope); 451 | op->addChild(a); 452 | op->addChild(b); 453 | return op; 454 | } 455 | 456 | 457 | 458 | //--------------------------------------------------------------------------- 459 | // * ::= {( && | '||') } 460 | //--------------------------------------------------------------------------- 461 | TreeNode* SourceParser::parseLogical(SymbolTable* scope) { 462 | TreeNode* operand1, * operand2, * op = NULL, * prevOp = NULL; 463 | operand1 = parseComparison(scope); 464 | Token token = getToken(); 465 | while (isLogical(token.type)) { 466 | next(); 467 | operand2 = parseComparison(scope); 468 | op = new TreeNode(token, TreeNodeType::BINARY_OP, scope); 469 | if (prevOp == NULL) op->addChild(operand1); else op->addChild(prevOp); 470 | op->addChild(operand2); 471 | prevOp = op; 472 | token = getToken(); 473 | } 474 | return op == NULL ? operand1 : op; 475 | 476 | } 477 | 478 | //--------------------------------------------------------------------------- 479 | // :: = { (== | != | > | >= | < | <=) < expression > } 480 | //--------------------------------------------------------------------------- 481 | TreeNode* SourceParser::parseComparison(SymbolTable* scope) { 482 | TreeNode* operand1, * operand2, * op = NULL, * prevOp = NULL; 483 | operand1 = parseExpression(scope); 484 | Token token = getToken(); 485 | while (isComparison(token.type)) { 486 | next(); 487 | operand2 = parseExpression(scope); 488 | op = new TreeNode(token, TreeNodeType::BINARY_OP, scope); 489 | if (prevOp == NULL) op->addChild(operand1); else op->addChild(prevOp); 490 | op->addChild(operand2); 491 | prevOp = op; 492 | token = getToken(); 493 | } 494 | return op == NULL ? operand1 : op; 495 | 496 | } 497 | 498 | 499 | //--------------------------------------------------------------------------- 500 | // ::= {(+|-) } 501 | //--------------------------------------------------------------------------- 502 | TreeNode* SourceParser::parseExpression(SymbolTable* scope) { 503 | TreeNode* operand1, * operand2, * op = NULL, * prevOp = NULL; 504 | operand1 = parseTerm(scope); 505 | Token token = getToken(); 506 | while (isTokenType(TokenType::PLUS) || isTokenType(TokenType::MINUS)) { 507 | next(); 508 | operand2 = parseTerm(scope); 509 | op = new TreeNode(token, TreeNodeType::BINARY_OP, scope); 510 | if (prevOp == NULL) op->addChild(operand1); else op->addChild(prevOp); 511 | op->addChild(operand2); 512 | prevOp = op; 513 | token = getToken(); 514 | } 515 | return op == NULL ? operand1 : op; 516 | } 517 | 518 | 519 | //--------------------------------------------------------------------------- 520 | // ::= {(*|/) } 521 | //--------------------------------------------------------------------------- 522 | TreeNode* SourceParser::parseTerm(SymbolTable* scope) { 523 | TreeNode* operand1, * operand2, * op = NULL, * prevOp = NULL; 524 | operand1 = parseBitwise(scope); 525 | Token token = getToken(); 526 | while (isTokenType(TokenType::MULTIPLY) || isTokenType(TokenType::DIVIDE)) { 527 | next(); 528 | operand2 = parseBitwise(scope); 529 | op = new TreeNode(token, TreeNodeType::BINARY_OP, scope); 530 | if (prevOp == NULL) op->addChild(operand1); else op->addChild(prevOp); 531 | op->addChild(operand2); 532 | prevOp = op; 533 | token = getToken(currentToken); 534 | } 535 | return op == NULL ? operand1 : op; 536 | } 537 | 538 | 539 | //--------------------------------------------------------------------------- 540 | // ::= {( & | '|' | ^ | << | >> ) } 541 | //--------------------------------------------------------------------------- 542 | TreeNode* SourceParser::parseBitwise(SymbolTable* scope) { 543 | TreeNode* operand1, * operand2, * op = NULL, * prevOp = NULL; 544 | operand1 = parseFactor(scope); 545 | Token token = getToken(); 546 | while (isBitwise(token.type)) { 547 | next(); 548 | operand2 = parseFactor(scope); 549 | op = new TreeNode(token, TreeNodeType::BINARY_OP, scope); 550 | if (prevOp == NULL) op->addChild(operand1); else op->addChild(prevOp); 551 | op->addChild(operand2); 552 | prevOp = op; 553 | token = getToken(); 554 | } 555 | return op == NULL ? operand1 : op; 556 | } 557 | 558 | 559 | //--------------------------------------------------------------------------- 560 | // ::= ({~|!|-|+} ) | | 561 | //--------------------------------------------------------------------------- 562 | TreeNode* SourceParser::parseFactor(SymbolTable* scope) { 563 | 564 | TreeNode* factor = NULL; 565 | bool unaryMinus = false; 566 | bool bitwiseNot = false; 567 | bool logicalNot = false; 568 | 569 | if (isTokenType(TokenType::MINUS)) { unaryMinus = true; next(); } 570 | else if (isTokenType(TokenType::PLUS)) { unaryMinus = false; next(); } 571 | else if (isTokenType(TokenType::NOT)) { bitwiseNot = true; next(); } 572 | else if (isTokenType(TokenType::LOGIC_NOT)) { logicalNot = true; next(); } 573 | 574 | if (isTokenType(TokenType::OP_PARENTHESES)) { 575 | next(); 576 | factor = parseExpression(scope); 577 | Token token = getToken(); 578 | if (isTokenType(TokenType::CL_PARENTHESES)) next(); 579 | else raiseError("Closing parentheses expected"); 580 | } else if (isTokenType(TokenType::CONST_INTEGER)) { 581 | factor = new TreeNode(getToken(), TreeNodeType::CONSTANT, scope); next(); 582 | } else if (isTokenType(TokenType::IDENTIFIER)) { 583 | Token nextToken = getNextToken(); 584 | if (nextToken.type == TokenType::OP_PARENTHESES) { 585 | factor = parseCall(scope); next(); 586 | } else { 587 | factor = new TreeNode(getToken(), TreeNodeType::SYMBOL, scope); 588 | if (scope->lookupSymbol(getToken()) == NULL) { 589 | raiseError("Symbol not defined."); 590 | } 591 | next(); 592 | } 593 | } else raiseError("Number or identifier expected"); 594 | 595 | if (unaryMinus) { 596 | TreeNode* expr = new TreeNode(TKN_MINUS, TreeNodeType::BINARY_OP, scope); 597 | TreeNode* zero = new TreeNode(TKN_ZERO, TreeNodeType::CONSTANT, scope); 598 | expr->addChild(zero); 599 | expr->addChild(factor); 600 | return expr; 601 | } 602 | 603 | if (bitwiseNot) { 604 | TreeNode* expr = new TreeNode(TKN_BITWISE_NOT, TreeNodeType::UNARY_OP, scope); 605 | expr->addChild(factor); 606 | return expr; 607 | } 608 | 609 | if (logicalNot) { 610 | TreeNode* expr = new TreeNode(TKN_LOGICAL_NOT, TreeNodeType::UNARY_OP, scope); 611 | expr->addChild(factor); 612 | return expr; 613 | } 614 | 615 | return factor; 616 | 617 | } 618 | -------------------------------------------------------------------------------- /src/compiler/SymbolTable.cpp: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine Compiler Symbol Table implementation 4 | * 5 | * (C) Bolat Basheyev 2021 6 | * 7 | ============================================================================*/ 8 | #pragma once 9 | 10 | #include "compiler/SourceParser.h" 11 | 12 | #include 13 | 14 | using namespace vm; 15 | using namespace std; 16 | 17 | 18 | SymbolTable::SymbolTable(string name) { 19 | parent = NULL; 20 | this->name = name; 21 | // cout << "Symbol table '" << name << "' created." << endl; 22 | } 23 | 24 | 25 | SymbolTable::~SymbolTable() { 26 | SymbolTable* child; 27 | size_t count = getChildCount(); 28 | // cout << "Symbol table '" << name << "' and " << count << " child deleted." << endl; 29 | for (int i = 0; i < count; i++) { 30 | child = getChildAt(i); 31 | delete child; 32 | } 33 | } 34 | 35 | 36 | 37 | bool SymbolTable::addChild(SymbolTable* child) { 38 | if (child == NULL) return false; 39 | child->parent = this; 40 | childs.push_back(child); 41 | return true; 42 | } 43 | 44 | 45 | void SymbolTable::removeChild(SymbolTable* child) { 46 | for (auto entry = begin(childs); entry != end(childs); ++entry) { 47 | if (*entry == child) { 48 | childs.erase(entry); 49 | delete child; 50 | return; 51 | } 52 | } 53 | } 54 | 55 | 56 | SymbolTable* SymbolTable::getChildAt(size_t index) { 57 | return childs.at(index); 58 | } 59 | 60 | size_t SymbolTable::getChildCount() { 61 | return childs.size(); 62 | } 63 | 64 | 65 | 66 | void SymbolTable::clearSymbols() { 67 | symbols.clear(); 68 | } 69 | 70 | 71 | size_t SymbolTable::getSymbolsCount() { 72 | return symbols.size(); 73 | } 74 | 75 | bool SymbolTable::addSymbol(Token& token, SymbolType type) { 76 | if (lookupSymbol(token) != NULL) return false; 77 | Symbol entry; 78 | entry.name.append(token.text, token.length); 79 | entry.type = type; 80 | entry.localIndex = (int) getNextIndex(type); 81 | entry.address = NULL; 82 | symbols.push_back(entry); 83 | return true; 84 | } 85 | 86 | 87 | Symbol* SymbolTable::getSymbolAt(size_t index) { 88 | return &symbols.at(index); 89 | } 90 | 91 | 92 | Symbol* SymbolTable::lookupSymbol(Token& token) { 93 | Symbol entry; 94 | size_t count = getSymbolsCount(); 95 | size_t length; 96 | // Search symbol in current scope 97 | for (int i = 0; i < count; i++) { 98 | entry = symbols.at(i); 99 | if (entry.name.size() == token.length) { 100 | length = token.length; 101 | if (strncmp(entry.name.c_str(), token.text, length)==0) return &symbols.at(i); 102 | } 103 | } 104 | // Search symbol in parent scope 105 | if (parent != NULL) { 106 | Symbol* entry = parent->lookupSymbol(token); 107 | if (entry != NULL) return entry; 108 | } 109 | return NULL; 110 | } 111 | 112 | 113 | Symbol* SymbolTable::lookupSymbol(char* name, SymbolType type) { 114 | Symbol entry; 115 | size_t count = getSymbolsCount(); 116 | size_t length = strlen(name); 117 | bool equalName; 118 | // Search symbol in current scope 119 | for (int i = 0; i < count; i++) { 120 | entry = symbols.at(i); 121 | if (entry.name.size() == length) { 122 | equalName = strncmp(entry.name.c_str(), name, length) == 0; 123 | if (equalName && entry.type==type) return &symbols.at(i); 124 | 125 | } 126 | } 127 | // Search symbol in parent scope 128 | if (parent != NULL) { 129 | Symbol* entry = parent->lookupSymbol(name, type); 130 | if (entry != NULL) return entry; 131 | } 132 | return NULL; 133 | 134 | } 135 | 136 | int SymbolTable::getNextIndex(SymbolType type) { 137 | Symbol entry; 138 | size_t count = getSymbolsCount(); 139 | int index = 0; 140 | for (int i = 0; i < count; i++) { 141 | entry = symbols.at(i); 142 | if (entry.type == type) index++; 143 | } 144 | return index; 145 | } 146 | 147 | void SymbolTable::printSymbols() { 148 | cout << "-----------------------------------------------------" << endl; 149 | cout << "Symbol table" << endl; 150 | cout << "-----------------------------------------------------" << endl; 151 | printRecursive(0); 152 | } 153 | 154 | 155 | void SymbolTable::printRecursive(int depth) { 156 | Symbol entry; 157 | size_t count = getSymbolsCount(); 158 | for (int i = 0; i < depth; i++) cout << "\t"; 159 | cout << name << ":" << endl; 160 | for (int i = 0; i < count; i++) { 161 | entry = symbols.at(i); 162 | for (int j = 0; j < depth; j++) cout << "\t"; 163 | cout << entry.name << "\t"; 164 | cout << SYMBOL_TYPE_MNEMONIC[(int)entry.type]; 165 | if (entry.type == SymbolType::FUNCTION) { 166 | cout << " at [" << entry.address << "]"; 167 | cout << " args=" << entry.argCount; 168 | } else { 169 | cout << " #" << entry.localIndex; 170 | } 171 | 172 | cout << endl; 173 | } 174 | for (int i = 0; i < childs.size(); i++) { 175 | childs.at(i)->printRecursive(depth + 1); 176 | } 177 | } -------------------------------------------------------------------------------- /src/compiler/TreeNode.cpp: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine Compiler Abstract Syntax Tree Node implementation 4 | * 5 | * (C) Bolat Basheyev 2021 6 | * 7 | ============================================================================*/ 8 | #pragma once 9 | 10 | #include "compiler/SourceParser.h" 11 | 12 | #include 13 | 14 | using namespace vm; 15 | using namespace std; 16 | 17 | 18 | TreeNode::TreeNode(Token token, TreeNodeType type, SymbolTable* scope) { 19 | this->parent = NULL; 20 | this->token = token; 21 | this->type = type; 22 | this->symbols = scope; 23 | } 24 | 25 | 26 | TreeNode::~TreeNode() { 27 | removeAll(); 28 | } 29 | 30 | 31 | TreeNode* TreeNode::addChild(TreeNode* node) { 32 | if (node == NULL) return NULL; 33 | node->parent = this; 34 | childs.push_back(node); 35 | return node; 36 | } 37 | 38 | 39 | bool TreeNode::removeChild(TreeNode* node) { 40 | for (auto entry = begin(childs); entry != end(childs); ++entry) { 41 | if (*entry == node) { 42 | childs.erase(entry); 43 | return true; 44 | } 45 | } 46 | return false; 47 | } 48 | 49 | 50 | void TreeNode::removeAll() { 51 | size_t childCount = childs.size(); 52 | for (size_t i = 0; i < childCount; i++) { 53 | delete childs[i]; 54 | childs[i] = NULL; 55 | } 56 | childs.clear(); 57 | } 58 | 59 | 60 | TreeNodeType TreeNode::getType() { 61 | return type; 62 | } 63 | 64 | Token& TreeNode::getToken() { 65 | return token; 66 | } 67 | 68 | 69 | TreeNode* TreeNode::getParent() { 70 | return parent; 71 | } 72 | 73 | TreeNode* TreeNode::getChild(size_t index) { 74 | if (index >= childs.size()) return NULL; 75 | return childs.at(index); 76 | } 77 | 78 | 79 | size_t TreeNode::getChildCount() { 80 | return childs.size(); 81 | } 82 | 83 | 84 | size_t TreeNode::getDepth() { 85 | size_t depth = 0; 86 | TreeNode* node = getParent(); 87 | while (node != NULL) { 88 | depth++; 89 | node = node->getParent(); 90 | } 91 | return depth; 92 | } 93 | 94 | 95 | void TreeNode::print() { 96 | cout << "-----------------------------------------------------" << endl; 97 | cout << "Parsed abstract syntax tree" << endl; 98 | cout << "-----------------------------------------------------" << endl; 99 | print(0); 100 | } 101 | 102 | 103 | void TreeNode::print(int tab) { 104 | for (int i = 0; i < tab; i++) if (i < tab - 1) cout << "| "; else cout << "|-"; 105 | cout << "'"; 106 | cout.write(token.text, token.length); 107 | cout << "'" << "(" << TREE_NODE_TYPE_MNEMONIC[(unsigned int)type] << ")"; 108 | cout << " " << getSymbolTable()->getName() << endl; 109 | //if (symbols != NULL) symbols->printSymbols(); 110 | for (auto& node : childs) node->print(tab + 1); 111 | } 112 | -------------------------------------------------------------------------------- /src/cvm.cpp: -------------------------------------------------------------------------------- 1 | // cvm.cpp: определяет точку входа для приложения. 2 | // 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "runtime/VirtualMachine.h" 10 | #include "compiler/SourceParser.h" 11 | #include "compiler/CodeGenerator.h" 12 | #include "compiler/SourceFile.h" 13 | 14 | 15 | using namespace std; 16 | using namespace vm; 17 | 18 | 19 | // todo refactor it 20 | void compileRun(string filepath, bool showTree, bool showSymbols, bool disassemble, bool run) { 21 | 22 | // Read source code file 23 | SourceFile source(filepath.c_str()); 24 | cout << "Current path: " << filesystem::current_path() << endl; 25 | if (source.getData()==NULL) { 26 | cout << "File not open." << endl; 27 | return; 28 | } 29 | 30 | // Parse source code 31 | ExecutableImage* img = new ExecutableImage(); 32 | SourceParser* parser = new SourceParser(source.getData()); 33 | TreeNode *root = parser->getSyntaxTree(); 34 | if (root == NULL) { 35 | cout << "Parser error. Can not parse source code."; 36 | delete parser; 37 | return; 38 | } else { 39 | if (showTree) root->print(); 40 | // Generate executable image 41 | CodeGenerator* codeGenerator = new CodeGenerator(); 42 | if (!codeGenerator->generateCode(img, parser->getSyntaxTree())) { 43 | cout << "Code generator error. Can not generate code."; 44 | delete codeGenerator; 45 | delete img; 46 | return; 47 | } else { 48 | if (showSymbols) parser->getSymbolTable().printSymbols(); 49 | if (disassemble) img->disassemble(); 50 | delete codeGenerator; 51 | delete parser; 52 | } 53 | } 54 | 55 | ; 56 | 57 | // Run executable image 58 | if (run) { 59 | VirtualMachine* machine = new VirtualMachine(); 60 | machine->loadImage(*img); 61 | auto start = std::chrono::high_resolution_clock::now(); 62 | machine->execute(); 63 | auto end = std::chrono::high_resolution_clock::now(); 64 | auto ms_int = chrono::duration_cast(end - start).count(); 65 | cout << "Execution time: " << ms_int / 1000000000.0 << "s" << endl; 66 | delete machine; 67 | } 68 | 69 | delete img; 70 | 71 | } 72 | 73 | 74 | int main(int argc, char* argv[]) { 75 | 76 | if (argc < 2) { 77 | puts("No filename was given."); 78 | return 1; 79 | } 80 | 81 | compileRun(argv[1], true, true, true, true); 82 | 83 | //compileRun("../../../test/factorial.cvm", true, true, true, true); 84 | //compileRun("../../../test/primenumber.cvm", true, true, true, true); 85 | //compileRun("../../../test/combinatorics.cvm", true, true, true, true); 86 | //compileRun("../../../test/scope.cvm", true, true, true, true); 87 | return 0; 88 | } 89 | -------------------------------------------------------------------------------- /src/runtime/ExecutableImage.cpp: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine Executable Image class implementation 4 | * 5 | * (C) Bolat Basheyev 2021 6 | * 7 | ============================================================================*/ 8 | 9 | #include 10 | #include 11 | #include 12 | #include "runtime/VirtualMachine.h" 13 | 14 | using namespace std; 15 | using namespace vm; 16 | 17 | 18 | ExecutableImage::ExecutableImage() { 19 | clear(); 20 | } 21 | 22 | ExecutableImage::~ExecutableImage() { 23 | clear(); 24 | } 25 | 26 | 27 | //----------------------------------------------------------------------------- 28 | // Clears executable image 29 | //----------------------------------------------------------------------------- 30 | void ExecutableImage::clear() { 31 | image.clear(); 32 | emitAddress = 0; 33 | } 34 | 35 | //----------------------------------------------------------------------------- 36 | // Checks available space and resize image if required 37 | //----------------------------------------------------------------------------- 38 | void ExecutableImage::prepareSpace(WORD wordsCount) { 39 | WORD required = emitAddress + wordsCount; 40 | if (image.size() < required) image.resize(required); 41 | } 42 | 43 | void ExecutableImage::prepareSpace(WORD address, WORD wordsCount) { 44 | WORD required = address + wordsCount; 45 | if (image.size() < required) image.resize(required); 46 | } 47 | 48 | 49 | //----------------------------------------------------------------------------- 50 | // Sets EmitAddress value to specified address 51 | //----------------------------------------------------------------------------- 52 | WORD ExecutableImage::setEmitAddress(WORD address) { 53 | return emitAddress = address; 54 | } 55 | 56 | 57 | //----------------------------------------------------------------------------- 58 | // Returns EmitAddress value 59 | //----------------------------------------------------------------------------- 60 | WORD ExecutableImage::getEmitAddress() { 61 | return emitAddress; 62 | } 63 | 64 | 65 | //----------------------------------------------------------------------------- 66 | // Writes opcode to executable image at current EmitAddress 67 | //----------------------------------------------------------------------------- 68 | WORD ExecutableImage::emit(WORD opcode) { 69 | prepareSpace(1); 70 | WORD startAddress = emitAddress; 71 | image[emitAddress++] = opcode; 72 | return startAddress; 73 | } 74 | 75 | //----------------------------------------------------------------------------- 76 | // Writes opcode and its operand to executable image at current EmitAddress 77 | //----------------------------------------------------------------------------- 78 | WORD ExecutableImage::emit(WORD opcode, WORD operand) { 79 | prepareSpace(2); 80 | WORD startAddress = emitAddress; 81 | image[emitAddress++] = opcode; 82 | image[emitAddress++] = operand; 83 | return startAddress; 84 | } 85 | 86 | //----------------------------------------------------------------------------- 87 | // Writes opcode and its operands to executable image at current EmitAddress 88 | //----------------------------------------------------------------------------- 89 | WORD ExecutableImage::emit(WORD opcode, WORD operand1, WORD operand2) { 90 | prepareSpace(3); 91 | WORD startAddress = emitAddress; 92 | image[emitAddress++] = opcode; 93 | image[emitAddress++] = operand1; 94 | image[emitAddress++] = operand2; 95 | return startAddress; 96 | } 97 | 98 | 99 | //----------------------------------------------------------------------------- 100 | // Writes specified image to this executable image at current EmitAddress 101 | //----------------------------------------------------------------------------- 102 | WORD ExecutableImage::emit(ExecutableImage& img) { 103 | WORD startAddress = emitAddress; 104 | WORD wordsCount = img.getSize(); 105 | prepareSpace(wordsCount); 106 | memcpy(image.data() + emitAddress, img.getImage(), wordsCount * sizeof(WORD)); 107 | emitAddress += wordsCount; 108 | return startAddress; 109 | } 110 | 111 | 112 | //----------------------------------------------------------------------------- 113 | // Write WORD to specified memory address 114 | //----------------------------------------------------------------------------- 115 | void ExecutableImage::writeWord(WORD address, WORD value) { 116 | WORD temp = emitAddress; 117 | prepareSpace(address, 1); 118 | image[address] = value; 119 | emitAddress = temp; 120 | } 121 | 122 | 123 | //----------------------------------------------------------------------------- 124 | // Writes data to executable image at current EmitAddress 125 | //----------------------------------------------------------------------------- 126 | void ExecutableImage::writeData(WORD address, void* data, WORD bytesCount) { 127 | WORD reminder = bytesCount % sizeof(WORD); 128 | WORD wordsCount = bytesCount / sizeof(WORD); 129 | if (reminder != 0) wordsCount++; 130 | prepareSpace(address, wordsCount); 131 | memcpy(image.data() + address, data, bytesCount); 132 | } 133 | 134 | //----------------------------------------------------------------------------- 135 | // Reads WORD from executable image at specified EmitAddress 136 | //----------------------------------------------------------------------------- 137 | WORD ExecutableImage::readWord(WORD address) { 138 | return image[address]; 139 | } 140 | 141 | 142 | //----------------------------------------------------------------------------- 143 | // Returns pointer to executable image 144 | //----------------------------------------------------------------------------- 145 | WORD* ExecutableImage::getImage() { 146 | return image.data(); 147 | } 148 | 149 | 150 | //----------------------------------------------------------------------------- 151 | // Returns memory size in words 152 | //----------------------------------------------------------------------------- 153 | WORD ExecutableImage::getSize() { 154 | return (WORD) image.size(); 155 | } 156 | 157 | 158 | //----------------------------------------------------------------------------- 159 | // Disassembles executable image to console 160 | //----------------------------------------------------------------------------- 161 | void ExecutableImage::disassemble() { 162 | cout << "-----------------------------------------------------" << endl; 163 | cout << "Virtual machine executable image disassembly" << endl; 164 | cout << "-----------------------------------------------------" << endl; 165 | if (image.size() == 0) return; 166 | WORD opcode; 167 | WORD previousOp = -1; 168 | WORD ip = 0; 169 | do { 170 | opcode = image[ip]; 171 | if (opcode != OP_HALT) ip += printMnemomic(ip); 172 | else { 173 | if (previousOp != OP_HALT) printMnemomic(ip); 174 | ip++; 175 | } 176 | previousOp = opcode; 177 | } while (ip < image.size()); 178 | } 179 | 180 | 181 | //----------------------------------------------------------------------------- 182 | // Prints instruction mnemonic 183 | //----------------------------------------------------------------------------- 184 | WORD ExecutableImage::printMnemomic(WORD address) { 185 | WORD ip = address; 186 | WORD opcode = image[ip++]; 187 | cout << "[" << setw(6) << address << "] "; 188 | switch (opcode) { 189 | //------------------------------------------------------------------------ 190 | // STACK OPERATIONS 191 | //------------------------------------------------------------------------ 192 | case OP_CONST: cout << "iconst " << image[ip++]; break; 193 | case OP_PUSH: cout << "ipush [" << image[ip++] << "]"; break; 194 | case OP_POP: cout << "ipop [" << image[ip++] << "]"; break; 195 | //------------------------------------------------------------------------ 196 | // ARITHMETIC OPERATIONS 197 | //------------------------------------------------------------------------ 198 | case OP_ADD: cout << "iadd "; break; 199 | case OP_SUB: cout << "isub "; break; 200 | case OP_MUL: cout << "imul "; break; 201 | case OP_DIV: cout << "idiv "; break; 202 | //------------------------------------------------------------------------ 203 | // BITWISE OPERATIONS 204 | //------------------------------------------------------------------------ 205 | case OP_AND: cout << "iand "; break; 206 | case OP_OR: cout << "ior "; break; 207 | case OP_XOR: cout << "ixor "; break; 208 | case OP_NOT: cout << "inot "; break; 209 | case OP_SHL: cout << "ishl "; break; 210 | case OP_SHR: cout << "ishr "; break; 211 | //------------------------------------------------------------------------ 212 | // FLOW CONTROL OPERATIONS 213 | //------------------------------------------------------------------------ 214 | case OP_JMP: cout << "jmp [" << std::showpos << image[ip++] << std::noshowpos << "]"; break; 215 | case OP_IFZERO: cout << "ifzero [" << std::showpos << image[ip++] << std::noshowpos << "]"; break; 216 | case OP_EQUAL: cout << "equal "; break; 217 | case OP_NEQUAL: cout << "nequal "; break; 218 | case OP_GREATER:cout << "greater "; break; 219 | case OP_GREQUAL:cout << "grequal "; break; 220 | case OP_LESS: cout << "less "; break; 221 | case OP_LSEQUAL:cout << "lsequal "; break; 222 | case OP_LAND: cout << "land "; break; 223 | case OP_LOR: cout << "lor "; break; 224 | case OP_LNOT: cout << "lnot "; break; 225 | 226 | //------------------------------------------------------------------------ 227 | // PROCEDURE CALL OPERATIONS 228 | //------------------------------------------------------------------------ 229 | case OP_CALL: cout << "call [" << image[ip++] << "], " << image[ip++]; break; 230 | case OP_RET: cout << "ret "; break; 231 | case OP_SYSCALL:cout << "syscall 0x" << setbase(16) << image[ip++] << setbase(10); break; 232 | case OP_HALT: cout << "---- halt ----"; break; 233 | //------------------------------------------------------------------------ 234 | // LOCAL VARIABLES AND ARGUMENTS ACCESS OPERATIONS 235 | //------------------------------------------------------------------------ 236 | case OP_LOAD: cout << "iload #" << image[ip++]; break; 237 | case OP_STORE: cout << "istore #" << image[ip++]; break; 238 | case OP_ARG: cout << "iarg #" << image[ip++]; break; 239 | default: 240 | cout << "0x" << setbase(16) << opcode << setbase(10); 241 | } 242 | cout << endl; 243 | return ip - address; 244 | } 245 | 246 | -------------------------------------------------------------------------------- /src/runtime/VirtualMachine.cpp: -------------------------------------------------------------------------------- 1 | /*============================================================================ 2 | * 3 | * Virtual Machine class implementation 4 | * 5 | * Lightweight 32-bit stack virtual machine runtime. 6 | * 7 | * (C) Bolat Basheyev 2021 8 | * 9 | ============================================================================*/ 10 | #include 11 | #include 12 | #include "runtime/VirtualMachine.h" 13 | 14 | using namespace std; 15 | using namespace vm; 16 | 17 | //----------------------------------------------------------------------------- 18 | // Allocates virtual machine RAM in bytes 19 | //----------------------------------------------------------------------------- 20 | VirtualMachine::VirtualMachine(WORD memorySize) { 21 | maxAddress = memorySize / sizeof(WORD); 22 | memory = new WORD[maxAddress]; 23 | memset(memory, 0, maxAddress); 24 | } 25 | 26 | //----------------------------------------------------------------------------- 27 | // Releases RAM of virtual machine 28 | //----------------------------------------------------------------------------- 29 | VirtualMachine::~VirtualMachine() { 30 | delete[] memory; 31 | } 32 | 33 | //----------------------------------------------------------------------------- 34 | // Loads executable image to virtual machine RAM 35 | //----------------------------------------------------------------------------- 36 | bool VirtualMachine::loadImage(ExecutableImage& image) { 37 | if (image.getSize() > maxAddress) return false; 38 | memcpy(memory, image.getImage(), image.getSize() * sizeof(WORD)); 39 | return true; 40 | } 41 | 42 | //---------------------------------------------------------------------------- 43 | // Starts execution from address [0x0000] 44 | //---------------------------------------------------------------------------- 45 | void VirtualMachine::execute() { 46 | 47 | cout << "-----------------------------------------------------" << endl; 48 | cout << "Virtual machine runtime" << endl; 49 | cout << "-----------------------------------------------------" << endl; 50 | 51 | WORD a = 0; // temporary variables 52 | WORD b = 0; // temporary variables 53 | 54 | ip = 0; // Set Instruction pointer to 0 55 | sp = maxAddress; // Set Stack pointer to highest address 56 | fp = sp; // Set Frame pointer to Stack pointer 57 | lp = sp - 1; // Set Locals pointer to Stack pointer - 1 58 | 59 | fetch: 60 | 61 | //printState(); 62 | 63 | switch (memory[ip++]) { 64 | //------------------------------------------------------------------------ 65 | // STACK OPERATIONS 66 | //------------------------------------------------------------------------ 67 | case OP_CONST: 68 | memory[--sp] = memory[ip++]; 69 | goto fetch; 70 | case OP_PUSH: 71 | a = memory[ip++]; 72 | memory[--sp] = memory[a]; 73 | goto fetch; 74 | case OP_POP: 75 | a = memory[ip++]; 76 | memory[a] = memory[sp++]; 77 | break; 78 | //------------------------------------------------------------------------ 79 | // ARITHMETIC OPERATIONS 80 | //------------------------------------------------------------------------ 81 | case OP_ADD: 82 | b = memory[sp++]; 83 | a = memory[sp++]; 84 | memory[--sp] = a + b; 85 | goto fetch; 86 | case OP_SUB: 87 | b = memory[sp++]; 88 | a = memory[sp++]; 89 | memory[--sp] = a - b; 90 | goto fetch; 91 | case OP_MUL: 92 | b = memory[sp++]; 93 | a = memory[sp++]; 94 | memory[--sp] = a * b; 95 | goto fetch; 96 | case OP_DIV: 97 | b = memory[sp++]; 98 | a = memory[sp++]; 99 | memory[--sp] = a / b; 100 | goto fetch; 101 | //------------------------------------------------------------------------ 102 | // BITWISE OPERATIONS 103 | //------------------------------------------------------------------------ 104 | case OP_AND: 105 | b = memory[sp++]; 106 | a = memory[sp++]; 107 | memory[--sp] = a & b; 108 | goto fetch; 109 | case OP_OR: 110 | b = memory[sp++]; 111 | a = memory[sp++]; 112 | memory[--sp] = a | b; 113 | goto fetch; 114 | case OP_XOR: 115 | b = memory[sp++]; 116 | a = memory[sp++]; 117 | memory[--sp] = a ^ b; 118 | goto fetch; 119 | case OP_NOT: 120 | a = memory[sp++]; 121 | memory[--sp] = ~a; 122 | goto fetch; 123 | case OP_SHL: 124 | b = memory[sp++]; 125 | a = memory[sp++]; 126 | memory[--sp] = a << b; 127 | goto fetch; 128 | case OP_SHR: 129 | b = memory[sp++]; 130 | a = memory[sp++]; 131 | memory[--sp] = a >> b; 132 | goto fetch; 133 | //------------------------------------------------------------------------ 134 | // FLOW CONTROL OPERATIONS (Relative jumps depending on top of the stack) 135 | //------------------------------------------------------------------------ 136 | case OP_JMP: 137 | ip += memory[ip]; 138 | goto fetch; 139 | case OP_IFZERO: 140 | a = memory[sp++]; 141 | if (a == 0) ip += memory[ip]; else ip++; 142 | goto fetch; 143 | //------------------------------------------------------------------------ 144 | // LOGICAL (BOOLEAN) OPERATIONS 145 | //------------------------------------------------------------------------ 146 | case OP_EQUAL: 147 | b = memory[sp++]; 148 | a = memory[sp++]; 149 | memory[--sp] = (a == b); 150 | goto fetch; 151 | case OP_NEQUAL: 152 | b = memory[sp++]; 153 | a = memory[sp++]; 154 | memory[--sp] = (a != b); 155 | goto fetch; 156 | case OP_GREATER: 157 | b = memory[sp++]; 158 | a = memory[sp++]; 159 | memory[--sp] = (a > b); 160 | goto fetch; 161 | case OP_GREQUAL: 162 | b = memory[sp++]; 163 | a = memory[sp++]; 164 | memory[--sp] = (a >= b); 165 | goto fetch; 166 | case OP_LESS: 167 | b = memory[sp++]; 168 | a = memory[sp++]; 169 | memory[--sp] = (a < b); 170 | goto fetch; 171 | case OP_LSEQUAL: 172 | b = memory[sp++]; 173 | a = memory[sp++]; 174 | memory[--sp] = (a <= b); 175 | goto fetch; 176 | case OP_LAND: 177 | b = memory[sp++]; 178 | a = memory[sp++]; 179 | memory[--sp] = a && b; 180 | goto fetch; 181 | case OP_LOR: 182 | b = memory[sp++]; 183 | a = memory[sp++]; 184 | memory[--sp] = a || b; 185 | goto fetch; 186 | case OP_LNOT: 187 | a = memory[sp++]; 188 | memory[--sp] = !a; 189 | goto fetch; 190 | //------------------------------------------------------------------------ 191 | // PROCEDURE CALL OPERATIONS 192 | //------------------------------------------------------------------------ 193 | case OP_CALL: 194 | a = memory[ip++]; // get call address and increment address 195 | b = memory[ip++]; // get arguments count (argc) 196 | b = sp + b; // calculate new frame pointer 197 | memory[--sp] = ip; // push return address to the stack 198 | memory[--sp] = fp; // push old Frame pointer to stack 199 | memory[--sp] = lp; // push old Local variables pointer to stack 200 | fp = b; // set Frame pointer to arguments pointer 201 | lp = sp - 1; // set Local variables pointer after top of a stack 202 | ip = a; // jump to call address 203 | goto fetch; 204 | case OP_RET: 205 | a = memory[sp++]; // read function return value on top of a stack 206 | b = lp; // save Local variables pointer 207 | sp = fp; // set stack pointer to Frame pointer (drop locals) 208 | lp = memory[b + 1]; // restore old Local variables pointer 209 | fp = memory[b + 2]; // restore old Frame pointer 210 | ip = memory[b + 3]; // set IP to return address 211 | memory[--sp] = a; // save return value on top of a stack 212 | goto fetch; 213 | case OP_SYSCALL: 214 | a = memory[ip++]; // read system call index from top of the stack 215 | sysCall(a); // make system call by index 216 | goto fetch; 217 | case OP_HALT: 218 | printState(); 219 | return; 220 | //------------------------------------------------------------------------ 221 | // LOCAL VARIABLES AND CALL ARGUMENTS OPERATIONS 222 | //------------------------------------------------------------------------ 223 | case OP_LOAD: 224 | a = memory[ip++]; // read local variable index 225 | b = lp - a; // calculate local variable address 226 | memory[--sp] = memory[b]; // push local variable to stack 227 | goto fetch; 228 | case OP_STORE: 229 | a = memory[ip++]; // read local variable index 230 | b = lp - a; // calculate local variable address 231 | memory[b] = memory[sp++]; // pop top of stack to local variable 232 | goto fetch; 233 | case OP_ARG: 234 | a = memory[ip++]; // read parameter index 235 | b = fp - a - 1; // calculate parameter address 236 | memory[--sp] = memory[b]; // push parameter to stack 237 | goto fetch; 238 | default: 239 | cout << "Runtime error - unknown opcode at [" << ip << "]" << endl; 240 | printState(); 241 | return; 242 | } 243 | 244 | goto fetch; 245 | 246 | } 247 | 248 | //---------------------------------------------------------------------------- 249 | // SYSCALL implementation 250 | //---------------------------------------------------------------------------- 251 | void VirtualMachine::sysCall(WORD n) { 252 | WORD ptr, a; 253 | switch (n) { 254 | case 0x20: // print C style string 255 | ptr = memory[sp++]; 256 | cout << ((char*)&memory[ptr]); 257 | return; 258 | case 0x21: // print int from TOS 259 | a = memory[sp++]; 260 | cout << a << endl; 261 | return; 262 | case 0x22: 263 | cin >> a; 264 | memory[--sp] = a; 265 | return; 266 | } 267 | } 268 | 269 | //---------------------------------------------------------------------------- 270 | // Prints IP, SP, FP, LP and STACK to standard out 271 | //---------------------------------------------------------------------------- 272 | void VirtualMachine::printState() { 273 | cout << "VM:"; 274 | cout << " IP=" << ip; 275 | cout << " FP=" << fp; 276 | cout << " LP=" << lp; 277 | cout << " SP=" << sp; 278 | cout << " STACK=["; 279 | for (WORD i = maxAddress - 1; i >= sp; i--) { 280 | cout << memory[i]; 281 | if (i > sp) cout << ","; 282 | } 283 | cout << "] -> TOP" << endl; 284 | } 285 | -------------------------------------------------------------------------------- /test/combinatorics.cvm: -------------------------------------------------------------------------------- 1 | 2 | 3 | int brutal(int n, int m) { 4 | int sum, i, j; 5 | if (n < 1 || m < 1) return 0; 6 | iput(n); 7 | iput(m); 8 | i = 1; 9 | while (i <= n) { 10 | j = 1; 11 | while (j <= m) { 12 | sum = sum + i * j; 13 | iput(sum); 14 | j = j + 1; 15 | } 16 | i = i + 1; 17 | } 18 | return sum; 19 | } 20 | 21 | 22 | int main() { 23 | int x; 24 | x = brutal(5,5); 25 | return x; 26 | } 27 | -------------------------------------------------------------------------------- /test/factorial.cvm: -------------------------------------------------------------------------------- 1 | int fact(int n) { 2 | int x; 3 | if (n) { 4 | x = n * fact(n-1); 5 | return x; 6 | } else { 7 | x = 10; 8 | return 1; 9 | } 10 | return 0; 11 | } 12 | 13 | 14 | int main() { 15 | int n, i; 16 | i = 1; 17 | while (i<=100) { 18 | if (i<12) { 19 | n = fact(i); 20 | iput(n); 21 | i = i + 1; 22 | } else break; 23 | } 24 | return n; 25 | } 26 | -------------------------------------------------------------------------------- /test/primenumber.cvm: -------------------------------------------------------------------------------- 1 | //---------------------------- 2 | // Checks is it prime number 3 | //---------------------------- 4 | int isPrime(int n) { 5 | int i,a,b; 6 | i = 2; 7 | while (i < n) { 8 | a = n / i; 9 | b = a * i; 10 | if (b == n) return 0; 11 | i = i + 1; 12 | } 13 | return 1; 14 | } 15 | 16 | 17 | int main() { 18 | int j; 19 | j = 2; 20 | while (j < 100) { 21 | if (isPrime(j)) iput(j); 22 | j = j + 1; 23 | } 24 | 25 | return 0; 26 | } -------------------------------------------------------------------------------- /test/scope.cvm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | int main() { 5 | int a; 6 | { 7 | int b; 8 | while (b < 5) { 9 | int c; 10 | b = b + 1; 11 | { 12 | int d; 13 | 14 | } 15 | } 16 | return b; 17 | int e; 18 | } 19 | } --------------------------------------------------------------------------------