├── .gitattributes ├── .gitignore ├── CMakeLists.txt ├── CMakeSettings.json ├── Dockerfile ├── LICENSE ├── NanoAssembler ├── CMakeLists.txt ├── Mapper.cpp ├── Mapper.h ├── Nano.cpp ├── NanoAssembler.cpp ├── NanoAssembler.h └── Types.h ├── NanoDebugger ├── CMakeLists.txt ├── Debugger.cpp ├── Instructions.cpp ├── Instructions.h ├── NanoDebugger.cpp └── NanoDebugger.h ├── NanoUnitTests ├── CMakeLists.txt └── test.cpp ├── NanoVM ├── CMakeLists.txt ├── Nano.cpp ├── NanoVM.cpp └── NanoVM.h ├── README.md └── examples ├── HelloWorld.nano ├── SieveOfEratosthenes.nano ├── arithmetic.nano ├── fibonacciSequence.nano ├── labels.nano ├── labels2.nano ├── labels3.nano ├── labels4.nano └── loop.nano /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | .vs/ 3 | out/ 4 | build/ 5 | CMakeFiles/ 6 | *.filters 7 | *.cmake 8 | *.vcxproj 9 | CMakeCache.txt 10 | DartConfiguration.tcl 11 | NanoVM.sln -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeList.txt : Top-level CMake project file, do global configuration 2 | # and include sub-projects here. 3 | # 4 | cmake_minimum_required (VERSION 3.10) 5 | 6 | project ("NanoVM") 7 | 8 | # Include sub-projects. 9 | add_subdirectory ("NanoVM") 10 | add_subdirectory ("NanoAssembler") 11 | add_subdirectory ("NanoDebugger") 12 | add_subdirectory ("NanoUnitTests") 13 | 14 | include( CTest ) 15 | enable_testing() -------------------------------------------------------------------------------- /CMakeSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com//fwlink//?linkid=834763 for more information about this file. 3 | "configurations": [ 4 | { 5 | "name": "x64-Debug", 6 | "generator": "Ninja", 7 | "configurationType": "Debug", 8 | "inheritEnvironments": [ "msvc_x64_x64" ], 9 | "buildRoot": "${projectDir}\\out\\build\\${name}", 10 | "installRoot": "${projectDir}\\out\\install\\${name}", 11 | "cmakeCommandArgs": "", 12 | "buildCommandArgs": "-v", 13 | "ctestCommandArgs": "" 14 | } 15 | ] 16 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | RUN apk --no-cache add cmake clang clang-dev make gcc g++ libc-dev linux-headers -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 etsubu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NanoAssembler/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeList.txt : CMake project for NanoVM, include source and define 2 | # project specific logic here. 3 | # 4 | cmake_minimum_required (VERSION 3.8) 5 | 6 | # Add source to this project's executable. 7 | add_executable (NanoAssembler "Nano.cpp" "NanoAssembler.cpp" "Mapper.cpp" "Mapper.h" "NanoAssembler.h" "Types.h") 8 | 9 | # TODO: Add tests and install targets if needed. 10 | -------------------------------------------------------------------------------- /NanoAssembler/Mapper.cpp: -------------------------------------------------------------------------------- 1 | #include "Mapper.h" 2 | 3 | Mapper::Mapper() { 4 | registerMap["reg0"] = 0x00; 5 | registerMap["reg1"] = 0x01; 6 | registerMap["reg2"] = 0x02; 7 | registerMap["reg3"] = 0x03; 8 | registerMap["reg4"] = 0x04; 9 | registerMap["reg5"] = 0x05; 10 | registerMap["bp"] = 0x06; 11 | registerMap["esp"] = 0x07; 12 | 13 | opcodeMap["mov"] = std::make_pair(0, 2); 14 | opcodeMap["add"] = std::make_pair(1, 2); 15 | opcodeMap["sub"] = std::make_pair(2, 2); 16 | opcodeMap["and"] = std::make_pair(3, 2); 17 | opcodeMap["or"] = std::make_pair(4, 2); 18 | opcodeMap["xor"] = std::make_pair(5, 2); 19 | opcodeMap["sar"] = std::make_pair(6, 2); 20 | opcodeMap["sal"] = std::make_pair(7, 2); 21 | opcodeMap["ror"] = std::make_pair(8, 2); 22 | opcodeMap["rol"] = std::make_pair(9, 2); 23 | opcodeMap["mul"] = std::make_pair(10, 2); 24 | opcodeMap["div"] = std::make_pair(11, 2); 25 | opcodeMap["mod"] = std::make_pair(12, 2); 26 | opcodeMap["cmp"] = std::make_pair(13, 2); 27 | 28 | opcodeMap["jz"] = std::make_pair(14, 1); 29 | opcodeMap["jnz"] = std::make_pair(15, 1); 30 | opcodeMap["jg"] = std::make_pair(16, 1); 31 | opcodeMap["js"] = std::make_pair(17, 1); 32 | opcodeMap["jmp"] = std::make_pair(18, 1); 33 | opcodeMap["not"] = std::make_pair(19, 1); 34 | opcodeMap["inc"] = std::make_pair(20, 1); 35 | opcodeMap["dec"] = std::make_pair(21, 1); 36 | opcodeMap["ret"] = std::make_pair(22, 0); 37 | 38 | opcodeMap["call"] = std::make_pair(23, 1); 39 | opcodeMap["push"] = std::make_pair(24, 1); 40 | opcodeMap["pop"] = std::make_pair(25, 1); 41 | opcodeMap["halt"] = std::make_pair(26, 0); 42 | opcodeMap["printi"] = std::make_pair(27, 1); 43 | opcodeMap["prints"] = std::make_pair(28, 1); 44 | opcodeMap["printc"] = std::make_pair(29, 1); 45 | opcodeMap["syscall"] = std::make_pair(30, 1); 46 | opcodeMap["memcpy"]= std::make_pair(31, 1); 47 | } 48 | 49 | Mapper::~Mapper() { 50 | 51 | } 52 | 53 | bool Mapper::canMapLabel(std::string label, unsigned int instructionIndex, std::unordered_map labelMap, 54 | std::vector instructions) { 55 | 56 | size_t labelIndex; 57 | try { 58 | labelIndex = labelMap.at(label); 59 | } 60 | catch (std::out_of_range) { 61 | return false; 62 | } 63 | if (labelIndex == instructionIndex) { 64 | return true; 65 | } 66 | if (labelIndex > instructionIndex) { 67 | for (int i = instructionIndex; i < labelIndex; i++) { 68 | if (!instructions[i].length) { 69 | return false; 70 | } 71 | } 72 | } 73 | else { 74 | for (int i = instructionIndex - 1; i >= labelIndex && i > 0; i--) { 75 | if (!instructions[i].length) { 76 | return false; 77 | } 78 | if (i == 0) 79 | return true; 80 | } 81 | } 82 | return true; 83 | } 84 | 85 | int Mapper::calculateSizeRequirement(std::string label, unsigned int instructionIndex, std::unordered_map labelMap, 86 | std::vector instructions) { 87 | 88 | size_t labelIndex; 89 | try { 90 | labelIndex = labelMap.at(label); 91 | } 92 | catch (std::out_of_range) { 93 | return 0; 94 | } 95 | if (labelIndex == instructionIndex) { 96 | return 0; 97 | } 98 | int64_t delta; 99 | if (labelIndex > instructionIndex) { 100 | delta = sizeof(uint64_t) + 2; // assume max length for the jump instruction (10 bytes) 101 | for (unsigned int i = instructionIndex + 1; i < labelIndex; i++) { 102 | size_t instructionLength = instructions[i].length; 103 | int unAssembled = 0; 104 | if (instructionLength == 0) { 105 | // Instruction hasn't been assembled yet because it contains label that hasn't been resolved, thus it's length is unknown 106 | unAssembled++; 107 | } 108 | else { 109 | delta += instructionLength; 110 | } 111 | if (unAssembled) { 112 | // assume the unassembled instructions will take max space 113 | delta += (unAssembled * (2 + sizeof(uint64_t))); 114 | } 115 | } 116 | } 117 | else { 118 | delta = 0; 119 | for (unsigned int i = instructionIndex - 1; i >= labelIndex; i--) { 120 | size_t instructionLength = instructions[i].length; 121 | int unAssembled = 0; 122 | if (instructionLength == 0) { 123 | // Instruction hasn't been assembled yet because it contains label that hasn't been resolved, thus it's length is unknown 124 | unAssembled++; 125 | } 126 | else { 127 | delta += instructionLength; 128 | } 129 | if (unAssembled) { 130 | // assume the unassembled instructions will take max space 131 | delta += (unAssembled * (2 + sizeof(uint64_t))); 132 | } 133 | if (i == 0) 134 | break; 135 | } 136 | delta = -delta; 137 | } 138 | if (SCHAR_MIN <= delta && delta <= SCHAR_MAX) { 139 | return (delta > 0) ? (sizeof(int8_t) + 2) : sizeof(int8_t); 140 | } 141 | else if (SHRT_MIN <= delta && delta <= SHRT_MAX) { 142 | return (delta > 0) ? (sizeof(int16_t) + 2) : sizeof(int16_t); 143 | } 144 | else if (INT32_MIN <= delta && delta <= INT32_MAX) { 145 | return (delta > 0) ? (sizeof(int32_t) + 2) : sizeof(int32_t); 146 | } 147 | return (delta > 0) ? (2 + sizeof(int64_t)) : sizeof(int64_t); 148 | } 149 | 150 | unsigned int Mapper::mapLabel(std::string label, unsigned int instructionIndex, std::unordered_map labelMap, 151 | std::vector &instructions, int64_t &value) { 152 | 153 | size_t labelIndex; 154 | try { 155 | labelIndex = labelMap.at(label); 156 | } 157 | catch (std::out_of_range) { 158 | return 0; 159 | } 160 | if (labelIndex == instructionIndex) { 161 | return 0; 162 | } 163 | int64_t delta; 164 | if (labelIndex > instructionIndex) { 165 | delta = (instructions[instructionIndex].length) ? instructions[instructionIndex].length : sizeof(uint64_t) + 2; // assume max length for the jump instruction (10 bytes) 166 | for (unsigned int i = instructionIndex + 1; i < labelIndex; i++) { 167 | size_t instructionLength = instructions[i].length; 168 | int unAssembled = 0; 169 | if (instructionLength == 0) { 170 | // Instruction hasn't been assembled yet because it contains label that hasn't been resolved, thus it's length is unknown 171 | unAssembled++; 172 | return 0; 173 | } 174 | else { 175 | delta += instructionLength; 176 | } 177 | if (unAssembled) { 178 | // assume the unassembled instructions will take max space 179 | delta += (unAssembled * (2 + sizeof(uint64_t))); 180 | } 181 | } 182 | } 183 | else { 184 | delta = 0; // assume max length for the jump instruction (10 bytes) 185 | for (unsigned int i = instructionIndex - 1; i >= labelIndex; i--) { 186 | size_t instructionLength = instructions[i].length; 187 | int unAssembled = 0; 188 | if (instructionLength == 0) { 189 | // Instruction hasn't been assembled yet because it contains label that hasn't been resolved, thus it's length is unknown 190 | unAssembled++; 191 | return 0; 192 | } 193 | else { 194 | delta += instructionLength; 195 | } 196 | if (unAssembled) { 197 | // assume the unassembled instructions will take max space 198 | delta += (unAssembled * (2 + sizeof(uint64_t))); 199 | } 200 | if (i == 0) 201 | break; 202 | } 203 | delta = -delta; 204 | } 205 | value = delta; 206 | std::cout << "delta " << delta << std::endl; 207 | if (instructions[instructionIndex].length) { 208 | return instructions[instructionIndex].length - 2; 209 | } 210 | if (SCHAR_MIN <= value && value <= SCHAR_MAX) { 211 | if (delta > 0) 212 | value -= (sizeof(int64_t) - sizeof(int8_t)); 213 | return sizeof(int8_t); 214 | } 215 | else if (SHRT_MIN <= value && value <= SHRT_MAX - 1) { 216 | if (delta > 0) 217 | value -= sizeof(int16_t) - sizeof(int8_t); 218 | return sizeof(int16_t); 219 | } 220 | else if (INT32_MIN <= value && value <= INT32_MAX - 1) { 221 | if (delta > 0) 222 | value -= sizeof(int32_t) - sizeof(int8_t); 223 | return sizeof(int32_t); 224 | } 225 | return sizeof(int64_t); 226 | } 227 | 228 | bool Mapper::mapRegister(std::string regName, unsigned char& reg) { 229 | try { 230 | reg = registerMap.at(regName); 231 | return true; 232 | } 233 | catch (std::out_of_range) { 234 | return false; 235 | } 236 | } 237 | 238 | bool Mapper::mapOpcode(std::string opcodeName, AssemberInstruction&instruction) { 239 | try { 240 | std::pair opcode = opcodeMap[opcodeName]; 241 | instruction.opcode = opcode.first; 242 | instruction.operands = opcode.second; 243 | return true; 244 | } 245 | catch (std::out_of_range) { 246 | return false; 247 | } 248 | } 249 | 250 | template void Mapper::mapImmediate(unsigned char* bytes, T value) { 251 | for (unsigned int i = 0; i < sizeof(T); i++) { 252 | bytes[i] = static_cast(value >> ((sizeof(T) * 8) - 8)); 253 | } 254 | } 255 | 256 | int Mapper::mapInteger(int64_t value64, unsigned char* bytes, unsigned int &length) { 257 | if (length == sizeof(int8_t) || (!length && INT8_MIN <= value64 && value64 <= INT8_MAX)) { 258 | *reinterpret_cast(bytes) = static_cast(value64); 259 | length = sizeof(int8_t); 260 | return Byte; 261 | } 262 | else if (length == sizeof(int16_t) || (INT16_MIN <= value64 && value64 <= INT16_MAX)) { 263 | *reinterpret_cast(bytes) = static_cast(value64); 264 | length = sizeof(int16_t); 265 | return Short; 266 | } 267 | else if (length == sizeof(int32_t) || (INT32_MIN <= value64 && value64 <= INT32_MAX)) { 268 | *reinterpret_cast(bytes) = static_cast(value64); 269 | length = sizeof(int32_t); 270 | return Dword; 271 | } 272 | else { 273 | *reinterpret_cast(bytes) = static_cast(value64); 274 | length = sizeof(int64_t); 275 | return Qword; 276 | } 277 | } 278 | 279 | int Mapper::mapImmediate(std::string value, unsigned char* bytes, unsigned int &length) { 280 | if (value.empty() || (value.length() == 1 && value[0] == '-')) 281 | return -1; 282 | try { 283 | if (value[0] == '-') 284 | { 285 | int64_t value64; 286 | if (value.length() > 3 && value[1] == '\'' && value[value.length() - 1] == '\'') { 287 | size_t diff = value.length() - 1 - 2; 288 | if (diff == 1) { 289 | value64 = -static_cast(value[2]); 290 | } 291 | else if (diff == 2 && value[2] == '\\') { 292 | switch (value[3]) { 293 | case 'n': 294 | value64 = -'\n'; 295 | break; 296 | case 'r': 297 | value64 = -'\r'; 298 | break; 299 | case 't': 300 | value64 = -'t'; 301 | break; 302 | default: 303 | return -1; 304 | } 305 | 306 | } 307 | else 308 | return -1; 309 | } 310 | else 311 | value64 = std::stoll(value, nullptr,0); 312 | return mapInteger(value64, bytes, length); 313 | } 314 | else { 315 | uint64_t value64; 316 | if (value.length() > 2 && value[0] == '\'' && value[value.length() - 1] == '\'') { 317 | size_t diff = value.length() - 2; 318 | if (diff == 1) { 319 | value64 = static_cast(value[1]); 320 | } 321 | else if (diff == 2 && value[1] == '\\') { 322 | switch (value[2]) { 323 | case 'n': 324 | value64 = '\n'; 325 | break; 326 | case 'r': 327 | value64 = '\r'; 328 | break; 329 | case 't': 330 | value64 = 't'; 331 | break; 332 | default: 333 | return -1; 334 | } 335 | } 336 | else 337 | return -1; 338 | } 339 | else 340 | value64 = std::stoull(value, nullptr, 0); 341 | if (value64 <= UINT8_MAX) { 342 | *reinterpret_cast(bytes) = static_cast(value64); 343 | length = sizeof(uint8_t); 344 | return Byte; 345 | } 346 | else if (value64 <= UINT16_MAX) { 347 | *reinterpret_cast(bytes) = static_cast(value64); 348 | length = sizeof(uint16_t); 349 | return Short; 350 | } 351 | else if (value64 <= UINT32_MAX) { 352 | *reinterpret_cast(bytes) = static_cast(value64); 353 | length = sizeof(uint32_t); 354 | return Dword; 355 | } 356 | else { 357 | *reinterpret_cast(bytes) = static_cast(value64); 358 | length = sizeof(uint64_t); 359 | return Qword; 360 | } 361 | } 362 | } 363 | catch (std::invalid_argument) { 364 | return -1; 365 | } 366 | catch (std::out_of_range) { 367 | return -2; 368 | } 369 | } -------------------------------------------------------------------------------- /NanoAssembler/Mapper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "Types.h" 8 | 9 | /** 10 | * \brief Handles mapping of text representation of instruction parts to their corresponding structures 11 | * 12 | * Mapper implements handling for text representation of insturction parts and calculation of sizes for instructions 13 | * containing relative addresses 14 | */ 15 | class Mapper { 16 | public: 17 | 18 | /** 19 | * Initializes Mapper 20 | */ 21 | Mapper(); 22 | 23 | /** 24 | * Mapper destructor 25 | */ 26 | ~Mapper(); 27 | 28 | /** 29 | * Maps text representation of opcode to instruction struct 30 | * @param opcodeName Text representation of the opcode 31 | * @param[out] instruction Instruction struct reference to update with opcode value 32 | * @return True if opcode was resolved, false if the opcode was unknown 33 | */ 34 | bool mapOpcode(std::string opcodeName, AssemberInstruction& instruction); 35 | 36 | /** 37 | * Maps a text representation of register to its corresponding register value 38 | * @param[out] reg Reference to the value to hold the resolved register value 39 | * @return True if register name was resolved, false if the name was unknown 40 | */ 41 | bool mapRegister(std::string regName, unsigned char& reg); 42 | 43 | /** 44 | * Maps a text representation of immediate value to bytes 45 | * @param value Text representation of integer 46 | * @param[out] bytes Pointer to array to hold the bytes of the integer 47 | * @param[out] length Reference to integer to hold the amount of bytes of the resolved immediate value 48 | */ 49 | int mapImmediate(std::string value, unsigned char* bytes, unsigned int& length); 50 | 51 | /** 52 | * Checks if the given text label can be resolved as relative address from the given instruction 53 | * @param label Name of the label to try mapping to address 54 | * @param instructionIndex The index of the instruction we try to resolve the relative label address from 55 | * @param labelMap Map structure holding all labels 56 | * @param instructions List of all the instructions 57 | * @return True if the label can be resolved to relative address from the current instruction, false if not 58 | */ 59 | bool canMapLabel(std::string label, unsigned int instructionIndex, std::unordered_map labelMap, 60 | std::vector instructions); 61 | 62 | /** 63 | * Calculate the size requirement in bytes for the relative label address from the current instruction. 64 | * Should be called only if canMapLabel() returns true 65 | * @param label Name of the label to try mapping to address 66 | * @param instructionIndex The index of the instruction we try to resolve the relative label address from 67 | * @param labelMap Map structure holding all labels 68 | * @param instructions List of all the instructions 69 | * @return Size of the relative address in bytes 70 | */ 71 | int calculateSizeRequirement(std::string label, unsigned int instructionIndex, std::unordered_map labelMap, 72 | std::vector instructions); 73 | 74 | /** 75 | * Maps label to relative address from the current instruction. Should noly be called if canMapLabel returns true 76 | * @param label Name of the label to try mapping to address 77 | * @param instructionIndex The index of the instruction we try to resolve the relative label address from 78 | * @param labelMap Map structure holding all labels 79 | * @param[out] instructions[out] Reference to the list of all the instructions. Corresponding instructions will be updated with the relative address 80 | * @return Size of the resolved relative address 81 | */ 82 | unsigned int mapLabel(std::string label, unsigned int instructionIndex, std::unordered_map labelMap, 83 | std::vector &instructions, int64_t &value); 84 | 85 | /** 86 | * Maps integer to bytes with minimum required bytes 87 | * @param value64 64-bit signed integer representing the value to be mapped in bytes 88 | * @param[out] bytes Pointer to array that will be updated with the integer bytes 89 | * @param[out] length Reference that will hold the number of bytes that were stored in the array 90 | * @return Size mask for the instruction bytes to use for setting the size of immediate value 91 | */ 92 | int mapInteger(int64_t value64, unsigned char* bytes, unsigned int &length); 93 | 94 | /** 95 | * Copies given integer to byte array as bytes 96 | * @param bytes Pointer to array to be updated with the integer bytes 97 | * @param value Integer value to be copied 98 | */ 99 | template void mapImmediate(unsigned char *bytes, T value); 100 | 101 | private: 102 | std::unordered_map> opcodeMap; /**< Map between all the opcodes text representation and corresponding values */ 103 | std::unordered_map registerMap; /**< Map between register text representations and register number */ 104 | }; 105 | -------------------------------------------------------------------------------- /NanoAssembler/Nano.cpp: -------------------------------------------------------------------------------- 1 | #include "NanoAssembler.h" 2 | 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | if (argc <= 1) { 7 | std::cout << "Usage NanoAssembler.exe [FILE]" << std::endl; 8 | return 0; 9 | } 10 | NanoAssembler assembler; 11 | std::string input = argv[1]; 12 | std::string output = input.substr(0, input.find_last_of('.')) + ".nanoc"; 13 | AssemblerReturnValues ret = assembler.assembleToFile(input, output); 14 | switch (ret) { 15 | case AssemblerReturnValues::Success: 16 | std::cout << "File successfully assembled to: " << output << std::endl; 17 | break; 18 | case AssemblerReturnValues::IOError: 19 | std::cout << "There was an error while reading/writing a file on disk" << std::endl; 20 | break; 21 | case AssemblerReturnValues::MemoryAllocationError: 22 | std::cout << "Failed to dynamically allocate memory" << std::endl; 23 | break; 24 | case AssemblerReturnValues::AssemblerError: 25 | std::cout << "The input file could be compiled" << std::endl; 26 | break; 27 | default: 28 | std::cout << "Received unknown error: " << ret << std::endl; 29 | } 30 | return ret; 31 | } 32 | -------------------------------------------------------------------------------- /NanoAssembler/NanoAssembler.cpp: -------------------------------------------------------------------------------- 1 | #include "NanoAssembler.h" 2 | 3 | NanoAssembler::NanoAssembler() : mapper() { 4 | // Constructor 5 | } 6 | 7 | NanoAssembler::~NanoAssembler() { 8 | // Destructor 9 | } 10 | 11 | bool NanoAssembler::readLines(std::string file, std::vector &lines, std::unordered_map &labelMap) { 12 | std::string line; 13 | std::ifstream f(file); 14 | unsigned int lineNumber = 1; 15 | if (f.is_open()) { 16 | while (std::getline(f, line)) { 17 | AssemberInstruction instruction; 18 | instruction.assembled = false; 19 | instruction.length = 0; 20 | instruction.lineNumber = lineNumber; 21 | lineNumber++; 22 | // remove comments 23 | size_t index = line.find(";"); 24 | if (index != -1) { 25 | if (index == 0) { 26 | continue; 27 | } 28 | line = line.substr(0, index); 29 | } 30 | line = std::regex_replace(line, std::regex("^\\s+|\\s+$"), ""); // trim leading and trailing whitespaces 31 | if (line.empty()) // Skip empty lines and comments (prefix ";") 32 | { 33 | continue; 34 | } 35 | if (line[0] == ':' && line.length() > 1) { 36 | // label 37 | labelMap[line.substr(1)] = lines.size(); 38 | std::cout << "Label: " << line << std::endl; 39 | continue; 40 | } 41 | instruction.line = std::regex_replace(line, std::regex("\\s{2,}"), " "); // replace all consecutive whitespaces with single space 42 | instruction.line.erase(std::remove(instruction.line.begin(), instruction.line.end(), ','), instruction.line.end()); // Remove ',' 43 | std::transform(instruction.line.begin(), instruction.line.end(), instruction.line.begin(), ::tolower); // to lowercase 44 | lines.push_back(instruction); 45 | } 46 | if (!lines.empty() && lines.at(lines.size() - 1).line != "halt") { 47 | std::cout << "Adding line \"halt\" to the end of file!" << std::endl; 48 | AssemberInstruction instruction; 49 | instruction.assembled = false; 50 | instruction.length = 0; 51 | instruction.lineNumber = lineNumber; 52 | instruction.line = "halt"; 53 | lines.push_back(instruction); 54 | } 55 | return true; 56 | } 57 | return false; 58 | } 59 | 60 | int NanoAssembler::assembleInstruction(int i, std::vector &instructionBytes, std::unordered_map labelMap, bool initial) { 61 | // Skip already assembled instructions 62 | if (instructionBytes[i].assembled) 63 | return 1; 64 | std::istringstream iss(instructionBytes[i].line); 65 | std::vector parts(std::istream_iterator{iss}, std::istream_iterator()); 66 | AssemberInstruction&instruction = instructionBytes[i]; 67 | // Check that the instruction is valid e.g. 'mov' 68 | if (!mapper.mapOpcode(parts[0], instruction)) { 69 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 70 | std::cout << "Unknown instruction \"" << parts[0] << std::endl; 71 | return 0; 72 | } 73 | // Check that there are required amount of parameters for the instruction e.g. 'mov reg0,reg1' requires 2 74 | if (instruction.operands != parts.size() - 1) { 75 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 76 | std::cout << "Invalid amount of parameters for instruction \"" << parts[0] << "\" expected: " << instruction.operands 77 | << " but received: " << (parts.size() - 1) << std::endl; 78 | return 0; 79 | } 80 | // assemble instruction with two operands 81 | if (instruction.operands == 2) { 82 | unsigned char dstReg; 83 | bool isDstMem = false, isSrcMem = false; 84 | // set flags whether the operands refer to memory address (operands are to be treated as pointers) 85 | if (parts[1][0] == '@') { 86 | parts[1] = parts[1].substr(1); 87 | isDstMem = true; 88 | } 89 | if (parts[2][0] == '@') { 90 | parts[2] = parts[2].substr(1); 91 | isSrcMem = true; 92 | } 93 | // parse destination register 94 | if (!mapper.mapRegister(parts[1], dstReg)) { 95 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 96 | std::cout << "Invalid register name: \"" << parts[1] << "\"" << std::endl; 97 | return 0; 98 | } 99 | // add first instruction byte 100 | instruction.bytecode[0] = ((dstReg << 5) | (instruction.opcode)); 101 | unsigned char srcReg; 102 | // parse source register if it exists (optional parameter) 103 | if (mapper.mapRegister(parts[2], srcReg)) { 104 | // second operand was register. add final instruction byte 105 | instruction.bytecode[1] = ((DataType::Reg | SRC_SIZE | (isSrcMem ? SRC_MEM : 0) | (isDstMem ? DST_MEM : 0)) | srcReg); 106 | instruction.length = 2; 107 | instruction.assembled = true; 108 | } 109 | else { 110 | //Second parameter is immediate value 111 | unsigned int length = 0; 112 | int size = mapper.mapImmediate(parts[2], instruction.bytecode + 2, length); 113 | if (size == -1) { 114 | // parameter was not integer or register 115 | if (labelMap.find(parts[2]) == labelMap.end()) { 116 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 117 | std::cout << "Unknown parameter: \"" << parts[2] << "\""; 118 | return 0; 119 | } 120 | 121 | } 122 | else if (size == -2) { 123 | // immediate value couldn't fit in 64bit unsinged integer... 124 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 125 | std::cout << "Integer too large: " << parts[2] << std::endl; 126 | return 0; 127 | } 128 | // we now have the size of instruction. Update to the previous byte 129 | instruction.bytecode[1] = ((DataType::Immediate | size | (isSrcMem ? SRC_MEM : 0) | (isDstMem ? DST_MEM : 0))); 130 | instruction.length = 2 + length; 131 | instruction.assembled = true; 132 | } 133 | } 134 | else if (instruction.operands == 0) { 135 | // Instructions w/o operands or with known to have one register can be pushed by the opcode (e.g. halt, inc reg0) 136 | instruction.bytecode[0] = instruction.opcode; 137 | instruction.length = 1; 138 | instruction.assembled = true; 139 | // Check if the instruction has register parameter 140 | if (parts.size() == 2) { 141 | unsigned char dstReg; 142 | if (!mapper.mapRegister(parts[1], dstReg)) { 143 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 144 | std::cout << "Invalid register name: \"" << parts[1] << "\"" << std::endl; 145 | return 0; 146 | } 147 | instruction.bytecode[0] |= (dstReg << 5); 148 | } 149 | } 150 | else if (instruction.operands == 1) { 151 | // Instruction has only one operand (e.g. jz, push, pop, ...) 152 | unsigned char srcReg; 153 | bool isSrcMem = false; 154 | // set flags whether the operands refer to memory address (operands are to be treated as pointers) 155 | if (parts[1][0] == '@') { 156 | parts[1] = parts[1].substr(1); 157 | isSrcMem = true; 158 | } 159 | // Check if the single operand is register 160 | if (mapper.mapRegister(parts[1], srcReg)) { 161 | // Operand is register 162 | instruction.bytecode[0] = instruction.opcode; 163 | instruction.bytecode[1] = ((DataType::Reg | SRC_SIZE | (isSrcMem ? SRC_MEM : 0) | srcReg)); 164 | instruction.length = 2; 165 | instruction.assembled = true; 166 | } 167 | else { 168 | // The single operand is immediate value 169 | instruction.bytecode[0] = instruction.opcode; 170 | unsigned int length = 0; 171 | // parse the immediate value 172 | int size = mapper.mapImmediate(parts[1], instruction.bytecode + 2, length); 173 | if (size == -1) { 174 | // parameter was not integer or register 175 | if (labelMap.find(parts[1]) == labelMap.end()) { 176 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 177 | std::cout << "Unknown parameter: \"" << parts[1] << "\"" << std::endl; 178 | return 0; 179 | } 180 | // Check if the label can already be mapped to an immediate value 181 | if (mapper.canMapLabel(parts[1], i, labelMap, instructionBytes)) { 182 | // Assemble the instruction 183 | int64_t value; 184 | length = mapper.mapLabel(parts[1], i, labelMap, instructionBytes, value); 185 | if (length == 0) { 186 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 187 | std::cout << "Failed to map label: \"" << parts[1] << "\"" << std::endl; 188 | return 0; 189 | } 190 | std::cout << "Mapped to " << value << std::endl; 191 | size = mapper.mapInteger(value, instruction.bytecode + 2, length); 192 | } 193 | else if (initial) { 194 | return -1; 195 | } 196 | else { 197 | size = mapper.calculateSizeRequirement(parts[1], i, labelMap, instructionBytes); 198 | if (size == 0) { 199 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 200 | std::cout << "Failed to map label: \"" << parts[1] << "\"" << std::endl; 201 | return 0; 202 | } 203 | instruction.length = size; 204 | std::cout << parts[1] << " require " << size << " bytes" << std::endl; 205 | std::cout << "Did not map label but defined size requirement" << std::endl; 206 | return -1; 207 | } 208 | } 209 | else if (size == -2) { 210 | // immediate value couldn't fit in 64bit unsinged integer... 211 | std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl; 212 | std::cout << "Integer too large: " << parts[1] << std::endl; 213 | return 0; 214 | } 215 | // we now have the size of instruction. Update to the previous byte 216 | instruction.bytecode[1] = ((DataType::Immediate | size | (isSrcMem ? SRC_MEM : 0))); 217 | instruction.length = 2 + length; 218 | instruction.assembled = true; 219 | } 220 | } 221 | return 1; 222 | } 223 | 224 | bool NanoAssembler::assemble(std::vector &instruction, std::unordered_map &labelMap) { 225 | int rounds = 3; 226 | // Iterate over all instructions 3 times if needed because instructions with labels need other instructions to be assembled to calculate 227 | // relative distance from itself to the label 228 | bool reiterate = true; 229 | while (rounds-- && reiterate) { 230 | reiterate = false; 231 | bool ready = true; 232 | for (int i = 0; i < instruction.size(); i++) { 233 | int success = assembleInstruction(i, instruction, labelMap, rounds == 2); 234 | if (instruction[i].assembled) 235 | continue; 236 | ready &= success == 1; 237 | if (success == 0) 238 | return false; 239 | if (success == -1) { 240 | reiterate = true; 241 | std::cout << "require reiteration for mapping label" << std::endl; 242 | } 243 | } 244 | if (ready) 245 | return true; 246 | } 247 | return false; 248 | } 249 | 250 | AssemblerReturnValues NanoAssembler::assembleToFile(std::string inputFile, std::string outputFile) { 251 | std::vector lines; 252 | std::unordered_map labelMap; 253 | // Load file from disk 254 | if (!readLines(inputFile, lines, labelMap)) { 255 | return AssemblerReturnValues::IOError; 256 | } 257 | // Compile the file to bytecode 258 | if (assemble(lines, labelMap)) { 259 | // Write to disk 260 | std::ofstream file(outputFile, std::ios::out | std::ios::binary); 261 | if (file.is_open()) { 262 | for (AssemberInstruction inst : lines) 263 | file.write((const char*)& inst.bytecode[0], inst.length); 264 | file.close(); 265 | return AssemblerReturnValues::Success; 266 | } 267 | return AssemblerReturnValues::IOError; 268 | } 269 | return AssemblerReturnValues::AssemblerError; 270 | } 271 | 272 | AssemblerReturnValues NanoAssembler::assembleToMemory(std::string inputFile, unsigned char*& bytecodeBuffer, unsigned int& size) { 273 | std::vector lines; 274 | std::unordered_map labelMap; 275 | // Load assembler file from disk 276 | if (!readLines(inputFile, lines, labelMap)) { 277 | return AssemblerReturnValues::IOError; 278 | } 279 | // Compile to bytecode 280 | if (assemble(lines, labelMap)) { 281 | size = 0; 282 | // Calculate the resulting bytecode size 283 | for (AssemberInstruction inst : lines) { 284 | size += inst.length; 285 | } 286 | // Allocate buffer to store the bytecode 287 | bytecodeBuffer = new unsigned char[size]; 288 | if (bytecodeBuffer) { 289 | unsigned int index = 0; 290 | // Copy the bytecode to output buffer 291 | for (AssemberInstruction inst : lines) { 292 | memcpy(bytecodeBuffer + index, inst.bytecode, inst.length); 293 | index += inst.length; 294 | } 295 | return AssemblerReturnValues::Success; 296 | } 297 | return AssemblerReturnValues::MemoryAllocationError; 298 | } 299 | return AssemblerReturnValues::AssemblerError; 300 | } -------------------------------------------------------------------------------- /NanoAssembler/NanoAssembler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "Types.h" 12 | #include "Mapper.h" 13 | 14 | /** 15 | * NanoAssembler instance handles loading assembler files and compiling those to bytecode format 16 | */ 17 | class NanoAssembler { 18 | public: 19 | NanoAssembler(); 20 | ~NanoAssembler(); 21 | 22 | /** 23 | * \brief Assembles input file and writes the resulting bytecode to a file on disk 24 | * 25 | * NanoAssembler loads the input file containing assembler instructions, compiles those in to binary format 26 | * and writes them to a binary file on disk 27 | * @param inputFile Points to the assembler file to load 28 | * @param outputFile Points to the file where the compiled bytecode will be written 29 | * @return 1 on success and anything else meaning failure 30 | */ 31 | AssemblerReturnValues assembleToFile(std::string inputFile, std::string outputFile); 32 | 33 | /** 34 | * \brief Assembles input file to buffer in memory 35 | * 36 | * NanoAssembler loads the input file containing assembler instructions, compiles those in to binary format 37 | * and outputs them to dynamically allocated buffer 38 | * @param inputFile Points to the assembler file to load 39 | * @param[out] bytecodeBuffer Reference to a pointer that will point to the compiled bytecode buffer 40 | * @param[out] size Reference to an int that will hold the size of the bytecodeBuffer in bytes 41 | * @return 1 on success and anything else meaning failure 42 | */ 43 | AssemblerReturnValues assembleToMemory(std::string inputFile, unsigned char*& bytecodeBuffer, unsigned int &size); 44 | private: 45 | bool readLines(std::string file, std::vector& lines, std::unordered_map& labelMap); 46 | int assembleInstruction(int i, std::vector& instructionBytes, std::unordered_map labelMap, bool initial); 47 | bool assemble(std::vector& instruction, std::unordered_map& labelMap); 48 | 49 | Mapper mapper; 50 | }; -------------------------------------------------------------------------------- /NanoAssembler/Types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | constexpr uint8_t SRC_TYPE = 0b10000000; 6 | constexpr uint8_t SRC_SIZE = 0b01100000; 7 | constexpr uint8_t DST_MEM = 0b00010000; 8 | constexpr uint8_t SRC_MEM = 0b00001000; 9 | 10 | #ifndef TYPE_H 11 | #define TYPE_H 12 | 13 | /** 14 | * DataType enum holds the type mask of value reg/immediate 15 | */ 16 | enum DataType { 17 | Reg = 0, 18 | Immediate = 0b10000000 19 | }; 20 | 21 | enum Size { 22 | Byte = 0b00000000, 23 | Short = 0b00100000, 24 | Dword = 0b01000000, 25 | Qword = 0b01100000 26 | }; 27 | #endif 28 | 29 | /** 30 | * Instruction represent a single instruction to be assembled 31 | */ 32 | struct AssemberInstruction { 33 | std::string line; 34 | unsigned char bytecode[2 + sizeof(int64_t)]; 35 | unsigned char opcode; 36 | unsigned int operands; 37 | unsigned int length; 38 | unsigned int lineNumber; 39 | bool assembled; 40 | }; 41 | typedef struct AssemberInstruction AssemberInstruction; 42 | 43 | enum AssemblerReturnValues { 44 | Success, 45 | AssemblerError, 46 | MemoryAllocationError, 47 | IOError 48 | }; -------------------------------------------------------------------------------- /NanoDebugger/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeList.txt : CMake project for NanoVM, include source and define 2 | # project specific logic here. 3 | # 4 | cmake_minimum_required (VERSION 3.8) 5 | include_directories(../NanoVM) 6 | # Add source to this project's executable. 7 | add_executable (NanoDebugger "NanoDebugger.cpp" "../NanoVM/NanoVM.cpp" "../NanoVM/NanoVM.h" "NanoDebugger.h" "Instructions.cpp" "Instructions.h" "Debugger.cpp") 8 | 9 | # TODO: Add tests and install targets if needed. 10 | -------------------------------------------------------------------------------- /NanoDebugger/Debugger.cpp: -------------------------------------------------------------------------------- 1 | #include "NanoDebugger.h" 2 | 3 | int main(int argc, char *argv[]) 4 | { 5 | if (argc < 1) { 6 | std::cout << "Usage NanoDebugger.exe [FILE]" << std::endl; 7 | } 8 | std::string file = (argv[1]); 9 | NanoDebugger debugger(file); 10 | debugger.debug(); 11 | return 0; 12 | } -------------------------------------------------------------------------------- /NanoDebugger/Instructions.cpp: -------------------------------------------------------------------------------- 1 | #include "Instructions.h" 2 | 3 | const char *instructionStr[] = { "mov","add", "sub","and", "or", "xor", "sar", "sal", "ror", "rol", "mul", 4 | "div", "mod", "cmp", "jz", "jnz", "jg", "js", "jmp", "not", "inc", "dec", "ret", "call", "push", "pop", "halt", 5 | "printi", "prints", "printc", "syscall", "memcpy" }; -------------------------------------------------------------------------------- /NanoDebugger/Instructions.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | extern const char *instructionStr[]; -------------------------------------------------------------------------------- /NanoDebugger/NanoDebugger.cpp: -------------------------------------------------------------------------------- 1 | #include "NanoDebugger.h" 2 | 3 | NanoDebugger::NanoDebugger(std::string file) : NanoVM(file) { 4 | run = false; 5 | } 6 | 7 | NanoDebugger::NanoDebugger(unsigned char *bytecode, uint64_t size) : NanoVM(bytecode, size) { 8 | run = false; 9 | } 10 | 11 | NanoDebugger::~NanoDebugger() { 12 | 13 | } 14 | 15 | bool NanoDebugger::disassembleInstruction(std::string &instruction) { 16 | Instruction ins; 17 | if (!fetch(ins)) { 18 | return false; 19 | } 20 | std::string opcode = instructionStr[ins.opcode]; 21 | if (ins.opcode == Opcodes::Halt || ins.opcode == Opcodes::Ret) { 22 | instruction = opcode; 23 | return true; 24 | } 25 | // Single param instructions 26 | if (ins.opcode == Opcodes::Jg || ins.opcode == Opcodes::Js || ins.opcode == Opcodes::Jnz || ins.opcode == Opcodes::Jz || 27 | ins.opcode == Opcodes::Jmp || ins.opcode == Opcodes::Push || ins.opcode == Opcodes::Pop || ins.opcode == Opcodes::Call || 28 | ins.opcode == Opcodes::Dec || ins.opcode == Opcodes::Inc || ins.opcode == Opcodes::Printc || ins.opcode == Opcodes::Printi || 29 | ins.opcode == Opcodes::Prints) { 30 | if (ins.srcType == DataType::Reg) { 31 | instruction = opcode + ((ins.isSrcMem) ? " @reg" : " reg") + std::to_string(ins.srcReg); 32 | } 33 | else { 34 | instruction = opcode + ((ins.isSrcMem) ? " @" : " ") + std::to_string(ins.immediate); 35 | } 36 | } 37 | // two param instruction 38 | else { 39 | if (ins.srcType == DataType::Reg) { 40 | instruction = opcode + ((ins.isDstMem) ? " @reg" : " reg") + std::to_string(ins.dstReg) + ", " + 41 | ((ins.isSrcMem) ? " @reg" : "reg") + std::to_string(ins.srcReg); 42 | } 43 | else { 44 | instruction = opcode + ((ins.isDstMem) ? " @reg" : " reg") + std::to_string(ins.dstReg) + ", " + 45 | ((ins.isSrcMem) ? "@" : "") + std::to_string(ins.immediate); 46 | } 47 | } 48 | return true; 49 | } 50 | 51 | bool NanoDebugger::handleInteractive() { 52 | int value = 0; 53 | do { 54 | std::string instruction; 55 | if (!disassembleInstruction(instruction)) { 56 | std::cout << "Failed to fetch instruction: IP out of bounds! IP: " << cpu.registers[ip] << std::endl; 57 | return false; 58 | } 59 | std::cout << cpu.registers[ip] << ". " << instruction << std::endl; 60 | std::cout << "> "; 61 | value = getchar(); 62 | std::cout << "\b\b"; 63 | if (value == 'h') { 64 | std::cout << "\n(s)tack\nr(e)gisters\n(b)reakpoint\n(r)un\n(c)lean breakpoint\n(q)uit" << std::endl; 65 | } 66 | else if (value == 'e') { 67 | std::cout << "\nRegisters:\n"; 68 | for (int i = 0; i < 8; i++) { 69 | std::cout << "reg" << i << ": " << cpu.registers[i] << std::endl; 70 | } 71 | } 72 | else if (value == 'r') { 73 | run = true; 74 | return true; 75 | } 76 | else if (value == 'b') { 77 | std::cout << "Breakpoint where (offset): "; 78 | int offset; 79 | std::cin >> offset; 80 | breakpoints.insert(offset); 81 | } 82 | else if (value == 'c') { 83 | auto a = breakpoints.find(cpu.registers[ip]); 84 | if (a == breakpoints.end()) { 85 | std::cout << "No breakpoint was placed here!" << std::endl; 86 | } 87 | else { 88 | breakpoints.erase(a); 89 | std::cout << "Breakpoint removed!" << std::endl; 90 | } 91 | } 92 | else if (value == 's') { 93 | printStack(); 94 | } 95 | else if (value == 'q') { 96 | return false; 97 | } 98 | } while (value != 13); 99 | return true; 100 | } 101 | 102 | void NanoDebugger::printStack() { 103 | int counter = 0; 104 | unsigned char *p = (cpu.stackBase); 105 | uint64_t size = (cpu.registers[esp] + cpu.codeBase) - cpu.stackBase; 106 | std::cout << "\nStack size: " << size << "\n"; 107 | for (int i = 0; i < size; i++) { 108 | if (counter == 7) { 109 | counter = 0; 110 | std::printf("%02X | %c %c %c %c %c %c %c %c\n", p[i], p[i - 7], p[i - 6], p[i - 5], p[i - 4], p[i - 3], p[i - 2], p[i - 1], p[i]); 111 | continue; 112 | } 113 | else { 114 | std::printf("%02X ", p[i]); 115 | } 116 | counter++; 117 | } 118 | if (counter) { 119 | std::printf("| "); 120 | for (int i = counter; i > 0; i--) { 121 | std::printf("%c ", p[size - i]); 122 | } 123 | std::printf("\n"); 124 | } 125 | std::printf("\n"); 126 | } 127 | 128 | bool NanoDebugger::debug() { 129 | run = false; 130 | while (cpu.registers[ip] < cpu.bytecodeSize) { 131 | Instruction inst; 132 | if (fetch(inst)) { 133 | if (breakpoints.find(cpu.registers[ip]) != breakpoints.end()) { 134 | std::cout << "Breakpoint triggered! " << cpu.registers[ip] << std::endl; 135 | run = false; 136 | handleInteractive(); 137 | } 138 | else if (!run) { 139 | handleInteractive(); 140 | } 141 | if (inst.opcode == Halt) { 142 | std::cout << "VM halted!" << std::endl; 143 | handleInteractive(); 144 | break; 145 | } 146 | if (!execute(inst)) { 147 | switch (errorFlag) { 148 | case MEMORY_ACCESS: 149 | std::cout << "Tried to read/write memory outside of VM!" << std::endl; 150 | break; 151 | default: 152 | std::cout << "Unknown error!" << std::endl; 153 | } 154 | return false; 155 | } 156 | } 157 | else { 158 | std::cout << "Invalid instruction!" << std::endl; 159 | return false; 160 | } 161 | } 162 | std::cout << "VM exited with return code: " << cpu.registers[Reg0] << std::endl; 163 | handleInteractive(); 164 | return true; 165 | } 166 | -------------------------------------------------------------------------------- /NanoDebugger/NanoDebugger.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "NanoVM.h" 3 | #include "Instructions.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | // Windows only #include 10 | 11 | /** 12 | * \brief NanoDebugger inherits NanoVM allowing more control over the execution of the program 13 | * 14 | * NanoDebugger inherits NanoVM implementation and allows to step through the execution, dump stack, set breakpoints 15 | * disassembling of instructions and other debugger behavior. 16 | */ 17 | class NanoDebugger : NanoVM { 18 | public: 19 | /** 20 | * Initializes NanoDebugger 21 | * @param file Bytecode file to load 22 | */ 23 | NanoDebugger(std::string file); 24 | 25 | /** 26 | * Initializes NanoDebugger 27 | * @param bytecode Bytecode buffer to load 28 | * @param size Size of the bytecode buffer 29 | */ 30 | NanoDebugger(unsigned char *bytecode, uint64_t size); 31 | 32 | /** 33 | * NanDebugger destructor 34 | */ 35 | ~NanoDebugger(); 36 | 37 | /** 38 | * Starts interactive debugging of the loaded bytecode program 39 | * @return True if bytecode program was executed successfully, false if error occurred 40 | */ 41 | bool debug(); 42 | //bool disassembleToFile(std::string out); 43 | private: 44 | 45 | /** 46 | * Disassembles the next instruction pointed by IP 47 | * @param[out] instruction String reference to hold the text representation of disassembled instruction 48 | * @return True if instruction was disassembled successfully, false if failed 49 | */ 50 | bool disassembleInstruction(std::string &instruction); 51 | 52 | /** 53 | * Prints stack dump of the stack memory on screen 54 | */ 55 | void printStack(); 56 | 57 | /** 58 | * Handles interactive mode for the current instruction allowing user to interact with the program 59 | * @return True if successfull, false if failed 60 | */ 61 | bool handleInteractive(); 62 | 63 | std::set breakpoints; /**< Set of all active breakpoints */ 64 | bool run; /**< Boolean value whether to run until breakpoint is hit or false if stepping through */ 65 | }; 66 | -------------------------------------------------------------------------------- /NanoUnitTests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeList.txt : CMake project for NanoVM, include source and define 2 | # project specific logic here. 3 | # 4 | cmake_minimum_required (VERSION 3.8) 5 | include_directories(../NanoVM) 6 | # Add source to this project's executable. 7 | # add_executable (NanoUnitTests "test.cpp" "../NanoAssembler/NanoAssembler.cpp" "../NanoAssembler/NanoAssembler.h" "../NanoVM/NanoVM.cpp" "../NanoVM/NanoVM.h" "NanoDebugger.h" "Instructions.cpp" "Instructions.h" "Debugger.cpp") 8 | add_executable (NanoUnitTests "test.cpp" "../NanoAssembler/NanoAssembler.cpp" "../NanoAssembler/NanoAssembler.h" "../NanoAssembler/Mapper.h" "../NanoAssembler/Mapper.cpp" "../NanoAssembler/Types.h" "../NanoVM/NanoVM.cpp" "../NanoVM/NanoVM.h") 9 | # TODO: Add tests and install targets if needed. 10 | 11 | set_property(TARGET NanoUnitTests PROPERTY CXX_STANDARD 20) 12 | set_property(TARGET NanoUnitTests PROPERTY CXX_STANDARD_REQUIRED ON) 13 | -------------------------------------------------------------------------------- /NanoUnitTests/test.cpp: -------------------------------------------------------------------------------- 1 | #include "../NanoAssembler/NanoAssembler.h" 2 | #include "../NanoVM/NanoVM.h" 3 | #include 4 | #include 5 | #include 6 | namespace fs = std::filesystem; 7 | 8 | /** 9 | * This file contains unit tests for NanoVM + NanoAssembler 10 | * The tests load up Nano assembler files (.nano file extension), assemble those to binary files, run those 11 | * and verify the results. 12 | * This way we do not bind the unit tests to bytecode format but rather enforce that the NanoVM bytecode 13 | * is executed properly and does not contain bugs while allowing us to modify the assembler 14 | * without breaking the tests 15 | */ 16 | 17 | int runSingleTest(NanoAssembler& assembler, std::string& path) { 18 | unsigned char* bytecode; 19 | unsigned int length; 20 | AssemblerReturnValues ret = assembler.assembleToMemory(path, bytecode, length); 21 | // Assembling should succeed 22 | if (ret != AssemblerReturnValues::Success) 23 | return 1; 24 | // Read the assembly file 25 | std::string expectedReturnKey = "NANO_TEST_EXPECT_RETURN="; 26 | std::ifstream file(path, std::ios::in | std::ios::ate); 27 | int expectedValue; 28 | if (file.is_open()) 29 | { 30 | unsigned long size = file.tellg(); 31 | char *memblock = new char[size + 1]; 32 | file.seekg(0, std::ios::beg); 33 | file.read(memblock, size); 34 | file.close(); 35 | memblock[size] = '\0'; 36 | std::string content(memblock); 37 | delete[] memblock; 38 | int index = content.find(expectedReturnKey); 39 | if (index == -1 || index == content.length() - expectedReturnKey.length()) { 40 | // Not a test file 41 | return 0; 42 | } 43 | std::string expectedReturnStr = content.substr(index + expectedReturnKey.length()); 44 | try { 45 | expectedValue = std::stoi(expectedReturnStr); 46 | } 47 | catch (std::invalid_argument & e) { 48 | return 3; 49 | } 50 | catch (std::out_of_range & e) { 51 | return 3; 52 | } 53 | } 54 | else { 55 | return 4; 56 | } 57 | // Fire up the VM 58 | NanoVM vm(bytecode, length); 59 | int vmValue = vm.Run(); 60 | if (vmValue == expectedValue) { 61 | std::cout << "Test passed: " << path.substr(path.find_last_of("/")) << std::endl; 62 | return 0; 63 | } 64 | std::cout << "Test failed: " << path.substr(path.find_last_of("/")) << " Expected value: " << expectedValue << " but was " << vmValue << std::endl; 65 | return 5; 66 | } 67 | 68 | int runTests() { 69 | NanoAssembler assembler; 70 | std::string path = "../../../../examples"; 71 | std::string ending = ".nano"; 72 | int totalTests = 0; 73 | int failedTests = 0; 74 | for (const auto& entry : fs::directory_iterator(path)) { 75 | std::string path = entry.path().string(); 76 | std::cout << path << std::endl; 77 | if (path.compare(path.length() - ending.length(), ending.length(), ending) == 0) { 78 | // Run only test for files with .nano ending 79 | int status = runSingleTest(assembler, path); 80 | if (status) { 81 | failedTests++; 82 | } 83 | totalTests++; 84 | } 85 | } 86 | if (!failedTests) { 87 | // All available tests passed 88 | std::cout << "All tests passed! " << totalTests << "/" << totalTests << std::endl; 89 | return 0; 90 | } 91 | std::cout << "Failed tests " << failedTests << " / " << totalTests << std::endl; 92 | return 1; 93 | } 94 | 95 | // main 96 | int main(int argc, char* argv[]) { 97 | runTests(); 98 | return 0; 99 | } -------------------------------------------------------------------------------- /NanoVM/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # CMakeList.txt : CMake project for NanoVM, include source and define 2 | # project specific logic here. 3 | # 4 | cmake_minimum_required (VERSION 3.8) 5 | 6 | # Add source to this project's executable. 7 | add_executable (NanoVM "Nano.cpp" "NanoVM.cpp" "NanoVM.h") 8 | 9 | # TODO: Add tests and install targets if needed. -------------------------------------------------------------------------------- /NanoVM/Nano.cpp: -------------------------------------------------------------------------------- 1 | #include "NanoVM.h" 2 | 3 | int main(int argc, char* argv[]) 4 | { 5 | if (argc <= 1) { 6 | std::cout << "Usage NanoVM.exe [FILE]" << std::endl; 7 | return 0; 8 | } 9 | NanoVM vm(argv[1]); 10 | // Return the VM's exit code 11 | return vm.Run(); 12 | } -------------------------------------------------------------------------------- /NanoVM/NanoVM.cpp: -------------------------------------------------------------------------------- 1 | #include "NanoVM.h" 2 | #include 3 | 4 | NanoVM::NanoVM(unsigned char* code, uint64_t size) { 5 | cpu.bytecodeSize = size; 6 | // Initialize cpu 7 | memset(&cpu, 0x00, sizeof(cpu)); 8 | // Zero out registers 9 | memset(cpu.registers, 0x00, sizeof(cpu.registers)); 10 | cpu.codeSize = (NANOVM_PAGE_SIZE * (1 + (size / NANOVM_PAGE_SIZE))); 11 | cpu.stackSize = NANOVM_PAGE_SIZE; 12 | // allocate whole memory, code pages, stack, +10 bytes 13 | // +10 bytes is for instruction fetching which might read more bytes than the instruction size 14 | // This avoids reading memory out side of the VM 15 | cpu.codeBase = (unsigned char*) malloc(cpu.stackSize + 10 + cpu.codeSize); 16 | // Set stack base. Stack grows up instead of down like in x86 17 | cpu.stackBase = cpu.codeBase + cpu.codeSize; 18 | // Zero out stack + the last 10 bytes 19 | memset(cpu.stackBase, 0x00, sizeof(cpu.stackSize) + 10); 20 | // copy the bytecode to the vm 21 | memcpy(cpu.codeBase, code, size); 22 | // Set IP to the beginning of code 23 | cpu.registers[ip] = 0; 24 | cpu.registers[esp] = cpu.codeSize; 25 | cpu.registers[bp] = cpu.codeSize; 26 | } 27 | 28 | NanoVM::NanoVM(std::string fileName) { 29 | memset(&cpu, 0x00, sizeof(cpu)); 30 | // Zero out registers 31 | memset(cpu.registers, 0x00, sizeof(cpu.registers)); 32 | 33 | std::streampos size; 34 | 35 | std::ifstream file(fileName, std::ios::in | std::ios::binary | std::ios::ate); 36 | if (file.is_open()) 37 | { 38 | size = file.tellg(); 39 | 40 | cpu.codeSize = (NANOVM_PAGE_SIZE * (1 + (size / NANOVM_PAGE_SIZE))); 41 | cpu.stackSize = NANOVM_PAGE_SIZE; 42 | 43 | // allocate whole memory, code pages, stack, +10 bytes 44 | // +10 bytes is for instruction fetching which might read more bytes than the instruction size 45 | // This avoids reading memory out side of the VM 46 | cpu.codeBase = (unsigned char*)malloc(cpu.stackSize + 10 + cpu.codeSize); 47 | 48 | file.seekg(0, std::ios::beg); 49 | file.read((char*)cpu.codeBase, size); 50 | file.close(); 51 | 52 | cpu.bytecodeSize = size; 53 | // Set stack base. Stack grows up instead of down like in x86 54 | cpu.stackBase = cpu.codeBase + cpu.codeSize; 55 | // Zero out stack + the last 10 bytes 56 | memset(cpu.stackBase, 0x00, sizeof(cpu.stackSize) + 10); 57 | cpu.registers[esp] = cpu.codeSize; 58 | cpu.registers[bp] = cpu.codeSize; 59 | // Set IP to the beginning of code 60 | cpu.registers[ip] = 0; 61 | } 62 | else std::cout << "Unable to open file"; 63 | } 64 | NanoVM::~NanoVM() { 65 | free(cpu.codeBase); 66 | } 67 | 68 | template inline void NanoVM::push(T value) { 69 | // Check bounds 70 | if (sizeof(value) + cpu.registers[esp] >= reinterpret_cast(cpu.stackBase) + cpu.stackSize) { 71 | // No room in stack. 72 | // Throw error or reallocate more pages 73 | errorFlag = STACK_ERROR; 74 | return; 75 | } 76 | // push to stack 77 | *reinterpret_cast(cpu.codeBase + cpu.registers[esp]) = value; 78 | // update stack pointer 79 | cpu.registers[esp] += sizeof(value); 80 | } 81 | 82 | template inline T NanoVM::pop() { 83 | // Check bounds 84 | if (cpu.registers[esp] - sizeof(T) < cpu.codeSize) { 85 | // Reached the bottom of stack 86 | errorFlag = STACK_ERROR; 87 | return 0; 88 | } 89 | // pop value from stack 90 | T value = *reinterpret_cast(cpu.codeBase + cpu.registers[esp] - sizeof(T)); 91 | // update esp 92 | cpu.registers[esp] -= sizeof(value); 93 | return value; 94 | } 95 | 96 | uint64_t NanoVM::Run() { 97 | while (true) { 98 | Instruction inst; 99 | if (fetch(inst)) { 100 | if (inst.opcode == Halt) { 101 | // Return value will be in reg0 102 | return cpu.registers[Reg0]; 103 | } 104 | if (!execute(inst)) { 105 | // More error flags will be added 106 | switch (errorFlag) { 107 | case MEMORY_ACCESS: 108 | return 1; 109 | break; 110 | default: 111 | return 2; 112 | } 113 | return false; 114 | } 115 | } 116 | else { 117 | return 3; 118 | } 119 | } 120 | } 121 | 122 | bool NanoVM::execute(Instruction &inst) { 123 | // set source and destination addresses 124 | void *dst, *src; 125 | bool isDstReg = false; 126 | dst = (inst.isDstMem) ? reinterpret_cast(cpu.codeBase + cpu.registers[inst.dstReg]) : reinterpret_cast(&cpu.registers[inst.dstReg]); 127 | if (inst.srcType == DataType::Reg) { 128 | src = (inst.isSrcMem) ? reinterpret_cast(cpu.codeBase + cpu.registers[inst.srcReg]) : reinterpret_cast(&cpu.registers[inst.srcReg]); 129 | } 130 | else { 131 | isDstReg = (inst.isDstMem) ? false : true; 132 | src = (inst.isSrcMem) ? reinterpret_cast(cpu.codeBase + inst.immediate) : reinterpret_cast(&inst.immediate); 133 | } 134 | // Do bounds check 135 | if ((src != &inst.immediate && src != &cpu.registers[inst.srcReg] && (src < cpu.codeBase || src >= cpu.codeBase + cpu.codeSize + cpu.stackSize)) || (dst != &cpu.registers[inst.dstReg] && (dst < cpu.codeBase || dst > cpu.codeBase + cpu.codeSize + cpu.stackSize))) { 136 | // Source or destination is out side of VM memory 137 | errorFlag = MEMORY_ACCESS; 138 | return false; 139 | } 140 | 141 | #define MATHOP(INST, OP, SIZE, DSTSIZE) \ 142 | case INST: { \ 143 | *reinterpret_cast(dst) OP *reinterpret_cast(src); \ 144 | break; \ 145 | } 146 | 147 | //USIZE is unsigned and SIZE is signed type => e.g. uint8_t and int8_t 148 | #define BRANCH(USIZE, SIZE, DSTSIZE) \ 149 | switch(inst.opcode) { \ 150 | MATHOP(Opcodes::Add, +=, USIZE, DSTSIZE) \ 151 | MATHOP(Opcodes::Mov, =, USIZE, DSTSIZE) \ 152 | MATHOP(Opcodes::Sub, -=, USIZE, DSTSIZE) \ 153 | MATHOP(Opcodes::Xor, ^=, USIZE, DSTSIZE) \ 154 | MATHOP(Opcodes::And, &=, USIZE, DSTSIZE) \ 155 | MATHOP(Opcodes::Or, |=, USIZE, DSTSIZE) \ 156 | MATHOP(Opcodes::Sar, >>=, USIZE, DSTSIZE) \ 157 | MATHOP(Opcodes::Sal, <<=, USIZE, DSTSIZE) \ 158 | MATHOP(Opcodes::Div, /=, USIZE, DSTSIZE) \ 159 | MATHOP(Opcodes::Mul, *=, USIZE, DSTSIZE) \ 160 | MATHOP(Opcodes::Mod, %=, USIZE, DSTSIZE) \ 161 | case Opcodes::Printi: \ 162 | std::printf("%" PRIu64 "", *reinterpret_cast(src)); \ 163 | break; \ 164 | case Opcodes::Prints: \ 165 | std::printf("%s", src); \ 166 | break; \ 167 | case Opcodes::Printc: \ 168 | std::printf("%c", *reinterpret_cast(src)); \ 169 | break; \ 170 | case Opcodes::Inc: \ 171 | *reinterpret_cast(src) += 1; \ 172 | break; \ 173 | case Opcodes::Dec: \ 174 | *reinterpret_cast(src) -= 1; \ 175 | break; \ 176 | case Opcodes::Push: \ 177 | push(*reinterpret_cast(src)); \ 178 | break; \ 179 | case Opcodes::Pop: \ 180 | *reinterpret_cast(dst) = pop(); \ 181 | break; \ 182 | case Opcodes::Jz: \ 183 | if (cpu.registers[flags] & ZERO_FLAG) { \ 184 | cpu.registers[ip] += *reinterpret_cast(src); \ 185 | return true; \ 186 | } \ 187 | break; \ 188 | case Opcodes::Jnz: \ 189 | if (!(cpu.registers[flags] & ZERO_FLAG)) { \ 190 | cpu.registers[ip] += *reinterpret_cast(src); \ 191 | return true; \ 192 | } \ 193 | break; \ 194 | case Opcodes::Jg: \ 195 | if (cpu.registers[flags] & GREATER_FLAG) { \ 196 | cpu.registers[ip] += *reinterpret_cast(src); \ 197 | return true; \ 198 | } \ 199 | break; \ 200 | case Opcodes::Js: \ 201 | if (cpu.registers[flags] & SMALLER_FLAG) { \ 202 | cpu.registers[ip] += *reinterpret_cast(src); \ 203 | return true; \ 204 | } \ 205 | break; \ 206 | case Opcodes::Jmp: \ 207 | cpu.registers[ip] += *reinterpret_cast(src); \ 208 | return true; \ 209 | case Opcodes::Call: \ 210 | push(cpu.registers[ip] + inst.instructionSize); \ 211 | cpu.registers[ip] += *reinterpret_cast(src); \ 212 | return true; \ 213 | case Opcodes::Ret: \ 214 | cpu.registers[ip] = pop(); \ 215 | return true; \ 216 | case Opcodes::Cmp: \ 217 | if (*reinterpret_cast(dst) == *reinterpret_cast(src)) \ 218 | cpu.registers[flags] = ZERO_FLAG; \ 219 | else if (*reinterpret_cast(dst) > *reinterpret_cast(src)) \ 220 | cpu.registers[flags] = GREATER_FLAG; \ 221 | else \ 222 | cpu.registers[flags] = SMALLER_FLAG; \ 223 | break; \ 224 | default: \ 225 | return false; \ 226 | } 227 | 228 | 229 | switch (inst.srcSize) { 230 | case Size::Byte: 231 | if (isDstReg) { 232 | BRANCH(uint8_t, int8_t, uint64_t); 233 | } 234 | else { 235 | BRANCH(uint8_t, int8_t, uint8_t); 236 | } 237 | break; 238 | case Size::Short: 239 | if (isDstReg) { 240 | BRANCH(uint16_t, int16_t, uint64_t); 241 | } 242 | else { 243 | BRANCH(uint16_t, int16_t, uint16_t); 244 | } 245 | break; 246 | case Size::Dword: 247 | if (isDstReg) { 248 | BRANCH(uint32_t, int32_t, uint64_t); 249 | } 250 | else { 251 | BRANCH(uint32_t, int32_t, uint32_t); 252 | } 253 | break; 254 | default: 255 | BRANCH(uint64_t, int64_t, uint64_t); 256 | break; 257 | } 258 | cpu.registers[ip] += inst.instructionSize; 259 | return true; 260 | } 261 | 262 | bool NanoVM::fetch(Instruction &inst) const { 263 | // Read 64bit to try and minimize the required memory reading 264 | // This increases the performance 265 | 266 | // Sanity check the ip that it is within code page 267 | if (cpu.registers[ip] >= cpu.codeSize) { 268 | std::cout << "IP out of bounds" << std::endl; 269 | return false; 270 | } 271 | // Parse the instruction 272 | unsigned char* rawIp = cpu.codeBase + cpu.registers[ip]; 273 | uint64_t value = *reinterpret_cast(rawIp); 274 | inst.opcode = (value & (unsigned char)OPCODE_MASK); 275 | inst.dstReg = ((value & DST_REG_MASK) >> 5); 276 | inst.srcType = (value >> 8) & SRC_TYPE_MASK; 277 | inst.srcReg = (value >> 8) & SRC_REG_MASK; 278 | inst.srcSize = ((value >> 8) & SRC_SIZE_MASK) >> 5; 279 | inst.isDstMem = ((value >> 8) & DST_MEM_MASK); 280 | inst.isSrcMem = ((value >> 8) & SRC_MEM_MASK); 281 | // If source is immediate value, read it to the instruction struct 282 | if (inst.srcType) { 283 | // If the immediate value fit in the initial value. Parse it with bitshift. It is faster than reading memory again 284 | switch (inst.srcSize) { 285 | case Byte: 286 | inst.immediate = (uint8_t)(value >> 16); 287 | inst.instructionSize = 3; 288 | break; 289 | case Short: 290 | inst.immediate = (uint16_t)(value >> 16); 291 | inst.instructionSize = 4; 292 | break; 293 | case Dword: 294 | inst.immediate = (uint32_t)(value >> 16); 295 | inst.instructionSize = 6; 296 | break; 297 | case Qword: 298 | // In the case of qword we have to perform another read operations 299 | inst.immediate = *(uint64_t*)(rawIp + 2); 300 | inst.instructionSize = 10; 301 | break; 302 | } 303 | } 304 | else { 305 | inst.instructionSize = 2; 306 | } 307 | return true; 308 | 309 | } -------------------------------------------------------------------------------- /NanoVM/NanoVM.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // VM masks and constants 9 | constexpr uint32_t NANOVM_PAGE_SIZE = 4096; 10 | constexpr uint8_t OPCODE_MASK = 0b00011111; 11 | constexpr uint8_t DST_REG_MASK = 0b11100000; 12 | constexpr uint8_t SRC_TYPE_MASK = 0b10000000; 13 | constexpr uint8_t SRC_SIZE_MASK = 0b01100000; 14 | constexpr uint8_t DST_MEM_MASK = 0b00010000; 15 | constexpr uint8_t SRC_MEM_MASK = 0b00001000; 16 | constexpr uint8_t SRC_REG_MASK = 0b00000111; 17 | 18 | // Error flags 19 | constexpr uint8_t STACK_ERROR = 0b10000000; 20 | constexpr uint8_t IP_ERROR = 0b01000000; 21 | constexpr uint8_t MEMORY_ACCESS = 0b00100000; 22 | 23 | // Comparison flags 24 | constexpr uint8_t ZERO_FLAG = 0b10000000; 25 | constexpr uint8_t GREATER_FLAG = 0b01000000; 26 | constexpr uint8_t SMALLER_FLAG = 0b00100000; 27 | 28 | /** 29 | * Register enum defines all the CPU registers + flags and instruction pointer 30 | */ 31 | enum Register { 32 | Reg0, 33 | Reg1, 34 | Reg2, 35 | Reg3, 36 | Reg4, 37 | Reg5, 38 | bp, // base pointer for current stack frame 39 | esp, 40 | ip, 41 | flags 42 | }; 43 | 44 | /** 45 | * Opcodes enum defines all the implemented opcodes for the NanoVM. Limited to 5 bits aka 32 opcodes 46 | */ 47 | enum Opcodes { 48 | Mov, 49 | Add, 50 | Sub, 51 | And, 52 | Or, 53 | Xor, 54 | Sar, 55 | Sal, 56 | Ror, 57 | Rol, 58 | Mul, 59 | Div, 60 | Mod, 61 | Cmp, 62 | 63 | Jz, 64 | Jnz, 65 | Jg, 66 | Js, 67 | Jmp, 68 | Not, 69 | Inc, 70 | Dec, 71 | Ret, 72 | 73 | Call, 74 | Push, 75 | Pop, 76 | Halt, 77 | Printi, 78 | Prints, 79 | Printc, 80 | Syscall, 81 | Memcpy 82 | }; 83 | 84 | #ifndef TYPE_H 85 | #define TYPE_H 86 | 87 | /** 88 | * Size enum defines used integer sizes which are 8, 16, 32, 64 bits 89 | */ 90 | enum Size { 91 | Byte, 92 | Short, 93 | Dword, 94 | Qword 95 | }; 96 | 97 | /** 98 | * DataType enum defines data types for the instructions which can be register or immediate value 99 | */ 100 | enum DataType { 101 | Reg, 102 | Immediate 103 | }; 104 | 105 | #endif // !TYPE_H 106 | 107 | /** 108 | * NanoVMCpu struct defines the CPU core which holds registers and pointers to code base, stack base and their respective sizes 109 | */ 110 | struct NanoVMCpu{ 111 | uint64_t registers[10]; /**< CPU registers + IP and flags */ 112 | unsigned char* codeBase; /**< Pointer to the base of the VM memory */ 113 | unsigned char* stackBase; /**< Pointer to the base of the stack */ 114 | uint64_t codeSize; /**< Size of the allocated VM memory including stack */ 115 | uint64_t stackSize; /**< Size of the allocated stack memory */ 116 | uint64_t bytecodeSize; /**< Size of the loaded bytecode */ 117 | }; 118 | 119 | /** 120 | * Instruction holds a single fetched instruction which the VM can run 121 | */ 122 | struct Instruction { 123 | unsigned char opcode; /**< Opcode of the instruction */ 124 | unsigned char dstReg; /**< Destination register which is defined in the 1st byte of instruction with opcode */ 125 | unsigned char srcReg; /**< Source register (optional) */ 126 | unsigned char srcType; /**< Source value type reg/immediate (optional) */ 127 | bool isDstMem; /**< Is destination register pointer to memory */ 128 | bool isSrcMem; /**< Is source value pointer to memory */ 129 | unsigned char srcSize; /**< Size of the source value (optional) */ 130 | uint64_t immediate; /**< Immediate value aka source value (optinal) */ 131 | unsigned char instructionSize; /**< Size of this instruction. This allows the vm to adjust the IP accordingly */ 132 | }; 133 | 134 | typedef struct NanoVMCpu NanoVMCpu; 135 | typedef struct Instruction Instruction; 136 | 137 | /** 138 | * \brief NanoVM is the VM core which will load and run nano bytecode 139 | * 140 | * NanoVM is the VM core which will load the bytecode, allocate and initialize memory and the CPU. 141 | * It implements feching and executing of instructions, stack memory handling and running of the bytecode 142 | */ 143 | class NanoVM { 144 | public: 145 | /** 146 | * \brief Initializes the NanoVM from bytecode 147 | * @param code Points to the bytecode to be loaded 148 | * @param size Holds the size of the bytecode to be loaded 149 | */ 150 | NanoVM(unsigned char* code, uint64_t size); 151 | 152 | /** 153 | * Initializes the NanoVM from bytecode file 154 | * @param file File to load the bytecode from 155 | */ 156 | NanoVM(std::string file); 157 | 158 | /** 159 | * NanoVM destructor 160 | */ 161 | ~NanoVM(); 162 | 163 | /** 164 | * Runs the whole loaded bytecode program 165 | * @return Return value of the bytecode program 166 | */ 167 | uint64_t Run(); 168 | protected: 169 | /** 170 | * Pops a value from the stack and adjusts the stack pointer 171 | * @return Single value from the stack 172 | */ 173 | template T pop(); 174 | 175 | /** 176 | * Pushes a value to the stack 177 | * @param value Value to push to the stack 178 | */ 179 | template void push(T value); 180 | 181 | /** 182 | * Fetches the next instruction pointed by the instruction pointer (IP). Note that fetch does not check if the instruction is valid 183 | * @param[out] Reference to instruction struct to be updated 184 | * @return True if instruction was fetched successfully, false if failed (e.g IP out of bounds) 185 | */ 186 | bool fetch(Instruction &instruction) const; 187 | 188 | /** 189 | * Executes a single instruction and updates the internal state of the VM including IP 190 | * @param instruction Instruction to be executed 191 | * @return True if the instruction was executed successfully, false if the instruction was not valid or an error occurred 192 | */ 193 | bool execute(Instruction &instruction); 194 | 195 | unsigned char errorFlag; /**< 8 bit flag that will be set with error masks if an error occurs */ 196 | NanoVMCpu cpu; /**< Holds the internal state of the CPU */ 197 | }; 198 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NanoVM 2 | PoC lightweight x64 VM implementation 3 | 4 | ### Table of contents 5 | 6 | - [NanoVM](#nanovm) 7 | * [General](#general) 8 | * [How to build](#how-to-build) 9 | + [Windows (Visual Studio 2019)](#windows--visual-studio-2019-) 10 | + [Debian](#debian) 11 | * [VM architecture](#vm-architecture) 12 | + [Registers](#registers) 13 | + [Instructions](#instructions) 14 | - [NanoAssembler](#nanoassembler) 15 | - [NanoDebugger](#nanodebugger) 16 | 17 | ## General 18 | 19 | NanoVM is cross-platform register based turing complete VM with stack memory. The project also includes assembler and debugger with similiar syntax to x86 asm with intel syntax. 20 | Note that the project is still in very early development and many things including the insturction set and format is a subject to change, so bytecode from previous versions might not work in future. 21 | The documentation will be updated when changes happen. 22 | 23 | The longer term goal of the project is to be embeddable VM with a small bytecode format while maintaining reasonable performance speed. 24 | Syscall instruction that contains some implemented functions like IO but user can register custom functions as callbacks for different syscall function values will be added eventually when the bytecode format has been finalized. 25 | This allows one to implement more "outside of the VM" functionality". Performance comparison tests to other languages will be added later. 26 | Longer term goal is to eventually actually program the Compiler/Assembler in NanoVM bytecode. 27 | 28 | Note that even though the VM does do bounds checking for read write and execute operations on memory these checks are more for catching bugs in the code + avoiding VM crashing, and not so much about hardening the VM. 29 | Escaping the VM sandbox is likely very trivial. 30 | However, if you notice a way to read, write or execute memory outside of the VM I'll gladly fix those. 31 | That being said **!this VM should not be used to run unknown and potentially hostile code!**. 32 | Also stuff like executing stack memory is currently possible and this is made on purpose to allow dynamic code generation or encryption. 33 | Read/write/execute permissions to memory pages might be added in future. 34 | 35 | ## How to build 36 | 37 | Build instructions have been tested on Windows and Debian based linux distros 38 | 39 | ### Windows (Visual Studio 2019) 40 | 41 | You need to have Visual Studio 2019 and cmake installed on your system.\ 42 | Visual Studio 2019 is compatible with cmake projects so you can build the project by opening the project in visual studio, right click the root CMakeLists.txt -> "Generate Cache for NanoVM". This will generate the cmake cache for you and now you can build the project by selecting 43 | from the menu bar: Build -> Build all.\ 44 | If you rather wish to generate visual studio specific build files you can do that by running the following command in the project root with cmd/powershell: 45 | 46 | ``` 47 | cmake . -B ./build 48 | ``` 49 | 50 | This will generate new Visual Studio build files under build/ 51 | 52 | ### Debian 53 | 54 | You need to have build tools and cmake available. You can install those by running the following commands in terminal 55 | ``` 56 | sudo apt install build-essentials 57 | sudo apt install cmake 58 | ``` 59 | Now to build the project run the following commands 60 | ``` 61 | git clone https://github.com/etsubu/NanoVM.git 62 | cd NanoVM 63 | cmake . 64 | make 65 | ``` 66 | This will build all the binaries in their own folders along the source files. 67 | 68 | ## VM architecture 69 | 70 | The VM memory are defined as pages which by default are 4096 bytes each. When initialized the VM bytecode will be placed at the bottom of the allocated memory followed by the stack memory base on the next page. While the VM is similiar to x86 the stack grows up unlike in x86. This can be utilized to dynamically increase the stack memory if required with minimal effort. 71 | 72 | ### Registers 73 | The VM is register based so the instuctions utilize different registers. Registers are encoded with 3 bits so there are 8 registers in total (the names will change in future): 74 | 75 | | Register | Number | Description | 76 | | ------------- |:-------------:| --------------------------------------------:| 77 | | Reg0 | 0 | General purpose. Used to store return values | 78 | | Reg1 | 1 | General purpose. | 79 | | Reg2 | 2 | General purpose. | 80 | | Reg3 | 3 | General purpose. | 81 | | Reg4 | 4 | General purpose. | 82 | | Reg5 | 5 | General purpose. | 83 | | Reg6 | 6 | General purpose. | 84 | | Esp | 7 | Stack pointer. Points to the top of the stack| 85 | 86 | ### Instructions 87 | Instructions have always an opcode and 0-2 operands. Below is the instruction encoding defined from LSB to MSB 88 | 89 | | 5 bits | 3 bits | 1 bit | 2 bits | 1 bits | 1 bit | 3 bits | 90 | | ------------- |:---------------------:|:-----------------:|:---------------------------:|:-------------:|:-------------:|:-------------:| 91 | | Opcode | Destination register | Source type | Source size | Is_Dst_pointer| Is_Src_pointer|Source register| 92 | | What instruction | Update this register | Reg=0, Immediate=1| Byte, short, dword, qword | True,false | True, false | Source register if src type is reg| 93 | 94 | So most of the instructions are encoded in 2 bytes + immediate value if used. Instructions that use zero operands effectively being only 1 byte are: 95 | ```assembly 96 | Halt ; Stops the execution and exits the VM execution 97 | ret ; Pops value from the top of the stack and performs absolute jump to that address. Updates stack pointer 98 | ``` 99 | Instructions that use 1 operand do not use either source register or immediate value. They do not use destination register even though it is always defined. Opcodes that use 1 operand: 100 | ```assembly 101 | Jz; Jump if zero flag is set. Example: jz reg0 102 | Jnz; Jump if zero flag is not set. Example: jnz reg0 103 | Jg; Jump if greater flag is set. Example: jg reg0 104 | Js; Jump if smaller flag is set. Example: js reg0 105 | Jmp; Jump ("goto") instruction. Example: jmp reg0 106 | Not; Flip the bits in value. Example: not reg0 107 | Inc; Increases the value by one: Example inc reg0 108 | Dec; Decreases the value by one: Example dec reg0 109 | Call; Pushes the next instructions absolute memory address to the stack and performs relative jump to the given address. Updates stack pointer Example: call reg0 110 | Push; Pushes value to the top of the stack. Example: push reg0 111 | Pop; Pops value from the top of the stack and moves the value to given address. Example: pop reg0 112 | Printi; prints given integer. Example: printi reg0 113 | Prints; prints given null terminated string. Example: prints @reg0 | Note that @reg0 uses reg0 as pointer to the string not as an absolute value 114 | Printc; prints given ASCII char to the console. Example printc reg0 115 | ``` 116 | Instructions with 2 operands: 117 | ```assembly 118 | Mov; mov reg0, reg0 <=> reg0 = reg0 119 | Add; add reg0, reg0 <=> reg0 += reg0 120 | Sub; mov reg0, reg0 <=> reg0 -= reg0 121 | And; mov reg0, reg0 <=> reg0 &= reg0 122 | Or; or reg0, reg0 <=> reg0 |= reg0 123 | Xor; xor reg0, reg0 <=> reg0 ^= reg0 124 | Sar; sar reg0, reg0 <=> reg0 >>= reg0 125 | Sal; sal reg0, reg0 <=> reg0 <<= reg0 126 | Ror; ror reg0, reg0 <=> performs circular shift to the right on reg0, by reg0 times 127 | Rol; rol reg0, reg0 <=> performs circular shift to the left on reg0, by reg0 times 128 | Mul; mul reg0, reg0 <=> reg0 *= reg0 129 | Div; div reg0, reg0 <=> reg0 /= reg0 130 | Mod; mod reg0, reg0 <=> reg0 %= reg0 131 | Cmp; cmp reg0, reg1 | Compares the 2 values and sets flags depending on the comparison. 132 | ``` 133 | ToDo: 134 | * Remove print instructions and move them under the syscall instruction to operate with stream pointers. This allows the printing to support console IO and for example file IO 135 | * Implement syscall instruction 136 | 137 | # NanoAssembler 138 | NanoAssembler is currently a minimalistic assembler for NanoVM. The assembler was made to aid in making simple programs and tests. This project is not so much about making a "programming language" but rather the core VM which could be used as the base which some programming language is compiled to. When more advanced features will be introduced I'll consider creating a new compiler project and leave the assembler for the low level operations. 139 | Currently the assembler supports comments with prefix ';' and uses regex to filter multiple whitespaces to help in processing the input. The assembler also suppors labels which are defined by ':' prefix. This will be mapped to a memory address that points to the next instruction after label. Example: 140 | ```assembly 141 | ; The assembler supports comments 142 | ; The assembler strips multiple whitespaces 143 | ; xor reg0, reg1 144 | ; The above line would be translated to the one below. So the assembler is not sensitive with whitespaces 145 | xor reg0, reg0 ; zero out reg0 146 | :label 147 | printi reg0 ; Label points here 148 | printc '\n' ; The assembler can map characters defined with '' and special characters line \n \r \t to their ascii values 149 | ; The above line is the same as printc 10 150 | inc reg0 ; reg0++ 151 | cmp reg0, 0x10 ; compare reg0 to 0x10 in hex which is the same as cmp reg0, 10 152 | ; The assembler understands base10 and base16 values 153 | jnz label ; if reg0 != 10 jump to label 154 | ; The above code will print numbers 155 | ``` 156 | ToDo: 157 | * Add macros. These would help to reduce the amount of code that needs to be written. 158 | * Add include tags which would allow to write "standard libraries" which could be included to the project 159 | * Size definitions for registers 160 | * ... 161 | 162 | The assembler projects code is not currently clean and the development for that will be most likely be stopped eventually and a new compiler project will be started. Probably with external library for parsing the programming language. I will try and keep the assembler simple 163 | 164 | # NanoDebugger 165 | 166 | The project contains also a simple command line debugger + disassembler. The debugger inherits the NanoVM core and is capable of stepping through the programs. It also supports: 167 | * Breakpoints 168 | * Goto. This allows you to change the current instruction pointer 169 | * print registers. This will print the current register values and flags set by cmp 170 | * Print stack. This will print the stack memory up to the stack pointer. Each line of the dump will be 8 hex values followed by the same values in ascii separated by |. This allows to easily look at potential ASCII strings in stack as well as 64bit integers. 171 | Todo: 172 | * Add commands for modifying the stack and registers 173 | * Add whole memory dump which will dump all the memory pages including code and stack to the disk. 174 | * Add option to disassemble the whole code and dump to the disk with memory offsets 175 | -------------------------------------------------------------------------------- /examples/HelloWorld.nano: -------------------------------------------------------------------------------- 1 | mov reg0, esp 2 | push 'h' 3 | push 'e' 4 | push 'l' 5 | push 'l' 6 | push 'o' 7 | push 32 8 | push 'w' 9 | push 'o' 10 | push 'r' 11 | push 'l' 12 | push 'd' 13 | push 0x00 14 | prints @reg0 15 | ; prints hello world 16 | halt -------------------------------------------------------------------------------- /examples/SieveOfEratosthenes.nano: -------------------------------------------------------------------------------- 1 | ; This program will calculate prime numbers with sieve of eratosthenes and prints them to the console 2 | 3 | mov reg2, 300 ;print prime numbers under This value 4 | mov bp, esp ; set base pointer 5 | ; initialize numbers 6 | :init 7 | push reg0 8 | inc reg0 9 | cmp reg0, reg2 10 | js init 11 | ; stack is now allocated 12 | ; iterate through array 13 | mov reg3, bp ; set to beginning of array 14 | add reg3, 16 ; skip 2 values 15 | :loop 16 | cmp @reg3, 0 17 | jz waszero 18 | mov reg4, @reg3 ;reg2 = array[reg3] 19 | mul reg4, 8 20 | mov reg2, reg4 21 | add reg2, bp 22 | :subloop 23 | add reg2, reg4 24 | cmp reg2, esp 25 | jg waszero 26 | jz waszero 27 | mov reg5, @reg2 28 | mod reg5, @reg3 ; modulo 29 | cmp reg5, 0 ;was divided evenly => zero out 30 | jnz nozero 31 | xor @reg2, @reg2 32 | 33 | :nozero 34 | jmp subloop 35 | ; loop through multiples of reg3 36 | 37 | :waszero 38 | ;printi reg3 39 | ;printc '\n' 40 | add reg3, 8 41 | cmp reg3, esp 42 | jnz loop 43 | ;print prime numbers 44 | mov reg3, bp 45 | add reg3, 16 46 | 47 | :printloop 48 | mov reg0, @reg3 49 | cmp reg0, 0 50 | jz notprime 51 | printi reg0 52 | mov reg1, reg0 ; save last prime 53 | printc '\n' 54 | :notprime 55 | add reg3, 8 56 | cmp reg3, esp 57 | js printloop 58 | mov reg0, reg1 ; return value is the last prime 59 | halt 60 | ; NANO_TEST_EXPECT_RETURN=293 -------------------------------------------------------------------------------- /examples/arithmetic.nano: -------------------------------------------------------------------------------- 1 | mov reg0, 5 ;reg0 is now 5 2 | add reg1, reg0 ;reg1 is now 5 3 | mov reg3, 3 4 | sub reg0, reg3 ;reg0 is now 2 5 | mul reg0, reg1 ;reg0 is now 10 6 | div reg0, 2 ;reg0 is now 5 7 | halt 8 | ; NANO_TEST_EXPECT_RETURN=5 -------------------------------------------------------------------------------- /examples/fibonacciSequence.nano: -------------------------------------------------------------------------------- 1 | ;This code will print fibonacci sequence and return the last number 2 | 3 | ;use reg0 and reg1 for storing numbers 4 | ;set reg3 as counter for how many pairs should be printed 5 | mov reg3, 10 6 | ;set reg1 to 1 7 | inc reg1 8 | ;label could be placed here 9 | :loop 10 | printi reg0 11 | printc '\n' 12 | add reg0, reg1 13 | printi reg1 14 | mov reg4, reg1 15 | printc '\n' 16 | add reg1, reg0 17 | dec reg3 ;subtract 1 from reg3 18 | cmp reg3, reg5 ; compare to 0 (reg5 is initialized as 0) 19 | jnz loop 20 | mov reg0, reg4 21 | halt 22 | ; NANO_TEST_EXPECT_RETURN=4181 -------------------------------------------------------------------------------- /examples/labels.nano: -------------------------------------------------------------------------------- 1 | ;define label 2 | :loop 3 | printi reg0 4 | inc reg0 5 | cmp reg0, 5 6 | jnz loop 7 | ; NANO_TEST_EXPECT_RETURN=5 -------------------------------------------------------------------------------- /examples/labels2.nano: -------------------------------------------------------------------------------- 1 | cmp reg0, 5 2 | jnz end 3 | sub reg0, 1 4 | :end 5 | inc reg0 6 | ; NANO_TEST_EXPECT_RETURN=1 -------------------------------------------------------------------------------- /examples/labels3.nano: -------------------------------------------------------------------------------- 1 | mov reg0, 10 2 | :loop1 3 | dec reg0 4 | mov reg1, reg0 5 | mod reg1, 2 6 | cmp reg1, 0 7 | jnz noprint 8 | printi reg0 9 | :noprint 10 | cmp reg0, 0 11 | jnz loop1 12 | halt 13 | ; NANO_TEST_EXPECT_RETURN=0 -------------------------------------------------------------------------------- /examples/labels4.nano: -------------------------------------------------------------------------------- 1 | mov reg0, 10 2 | :loop1 3 | dec reg0 4 | mov reg1, reg0 5 | mod reg1, 2 6 | cmp reg1, 0 7 | jnz end 8 | printi reg0 9 | :noprint 10 | cmp reg0, 0 11 | jnz loop1 12 | :end 13 | halt 14 | ; NANO_TEST_EXPECT_RETURN=9 -------------------------------------------------------------------------------- /examples/loop.nano: -------------------------------------------------------------------------------- 1 | printi reg0 2 | add reg0, 1 3 | cmp reg0, 5 4 | js -8 5 | halt 6 | ; NANO_TEST_EXPECT_RETURN=5 --------------------------------------------------------------------------------