├── .gitattributes
├── .gitignore
├── CMakeLists.txt
├── CMakeSettings.json
├── Dockerfile
├── LICENSE
├── NanoAssembler
    ├── CMakeLists.txt
    ├── Mapper.cpp
    ├── Mapper.h
    ├── Nano.cpp
    ├── NanoAssembler.cpp
    ├── NanoAssembler.h
    └── Types.h
├── NanoDebugger
    ├── CMakeLists.txt
    ├── Debugger.cpp
    ├── Instructions.cpp
    ├── Instructions.h
    ├── NanoDebugger.cpp
    └── NanoDebugger.h
├── NanoUnitTests
    ├── CMakeLists.txt
    └── test.cpp
├── NanoVM
    ├── CMakeLists.txt
    ├── Nano.cpp
    ├── NanoVM.cpp
    └── NanoVM.h
├── README.md
└── examples
    ├── HelloWorld.nano
    ├── SieveOfEratosthenes.nano
    ├── arithmetic.nano
    ├── fibonacciSequence.nano
    ├── labels.nano
    ├── labels2.nano
    ├── labels3.nano
    ├── labels4.nano
    └── loop.nano


/.gitattributes:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Set default behavior to automatically normalize line endings.
 3 | ###############################################################################
 4 | * text=auto
 5 | 
 6 | ###############################################################################
 7 | # Set default behavior for command prompt diff.
 8 | #
 9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs     diff=csharp
14 | 
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following 
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln       merge=binary
26 | #*.csproj    merge=binary
27 | #*.vbproj    merge=binary
28 | #*.vcxproj   merge=binary
29 | #*.vcproj    merge=binary
30 | #*.dbproj    merge=binary
31 | #*.fsproj    merge=binary
32 | #*.lsproj    merge=binary
33 | #*.wixproj   merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj   merge=binary
36 | #*.wwaproj   merge=binary
37 | 
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg   binary
44 | #*.png   binary
45 | #*.gif   binary
46 | 
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | # 
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the 
52 | # entries below.
53 | ###############################################################################
54 | #*.doc   diff=astextplain
55 | #*.DOC   diff=astextplain
56 | #*.docx  diff=astextplain
57 | #*.DOCX  diff=astextplain
58 | #*.dot   diff=astextplain
59 | #*.DOT   diff=astextplain
60 | #*.pdf   diff=astextplain
61 | #*.PDF   diff=astextplain
62 | #*.rtf   diff=astextplain
63 | #*.RTF   diff=astextplain
64 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .git/
 2 | .vs/
 3 | out/
 4 | build/
 5 | CMakeFiles/
 6 | *.filters
 7 | *.cmake
 8 | *.vcxproj
 9 | CMakeCache.txt
10 | DartConfiguration.tcl
11 | NanoVM.sln


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ﻿# CMakeList.txt : Top-level CMake project file, do global configuration
 2 | # and include sub-projects here.
 3 | #
 4 | cmake_minimum_required (VERSION 3.10)
 5 | 
 6 | project ("NanoVM")
 7 | 
 8 | # Include sub-projects.
 9 | add_subdirectory ("NanoVM")
10 | add_subdirectory ("NanoAssembler")
11 | add_subdirectory ("NanoDebugger")
12 | add_subdirectory ("NanoUnitTests")
13 | 
14 | include( CTest )
15 | enable_testing()


--------------------------------------------------------------------------------
/CMakeSettings.json:
--------------------------------------------------------------------------------
 1 | ﻿{
 2 |   // See https://go.microsoft.com//fwlink//?linkid=834763 for more information about this file.
 3 |   "configurations": [
 4 |     {
 5 |       "name": "x64-Debug",
 6 |       "generator": "Ninja",
 7 |       "configurationType": "Debug",
 8 |       "inheritEnvironments": [ "msvc_x64_x64" ],
 9 |       "buildRoot": "${projectDir}\\out\\build\\${name}",
10 |       "installRoot": "${projectDir}\\out\\install\\${name}",
11 |       "cmakeCommandArgs": "",
12 |       "buildCommandArgs": "-v",
13 |       "ctestCommandArgs": ""
14 |     }
15 |   ]
16 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine
2 | RUN apk --no-cache add cmake clang clang-dev make gcc g++ libc-dev linux-headers


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 etsubu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NanoAssembler/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ﻿# CMakeList.txt : CMake project for NanoVM, include source and define
 2 | # project specific logic here.
 3 | #
 4 | cmake_minimum_required (VERSION 3.8)
 5 | 
 6 | # Add source to this project's executable.
 7 | add_executable (NanoAssembler "Nano.cpp" "NanoAssembler.cpp" "Mapper.cpp" "Mapper.h" "NanoAssembler.h" "Types.h")
 8 | 
 9 | # TODO: Add tests and install targets if needed.
10 | 


--------------------------------------------------------------------------------
/NanoAssembler/Mapper.cpp:
--------------------------------------------------------------------------------
  1 | #include "Mapper.h"
  2 | 
  3 | Mapper::Mapper() {
  4 | 	registerMap["reg0"] = 0x00;
  5 | 	registerMap["reg1"] = 0x01;
  6 | 	registerMap["reg2"] = 0x02;
  7 | 	registerMap["reg3"] = 0x03;
  8 | 	registerMap["reg4"] = 0x04;
  9 | 	registerMap["reg5"] = 0x05;
 10 | 	registerMap["bp"] = 0x06;
 11 | 	registerMap["esp"]  = 0x07;
 12 | 
 13 | 	opcodeMap["mov"]	= std::make_pair(0, 2);
 14 | 	opcodeMap["add"]	= std::make_pair(1, 2);
 15 | 	opcodeMap["sub"]	= std::make_pair(2, 2);
 16 | 	opcodeMap["and"]	= std::make_pair(3, 2);
 17 | 	opcodeMap["or"]		= std::make_pair(4, 2);
 18 | 	opcodeMap["xor"]	= std::make_pair(5, 2);
 19 | 	opcodeMap["sar"]	= std::make_pair(6, 2);
 20 | 	opcodeMap["sal"]	= std::make_pair(7, 2);
 21 | 	opcodeMap["ror"]	= std::make_pair(8, 2);
 22 | 	opcodeMap["rol"]	= std::make_pair(9, 2);
 23 | 	opcodeMap["mul"]    = std::make_pair(10, 2);
 24 | 	opcodeMap["div"]    = std::make_pair(11, 2);
 25 | 	opcodeMap["mod"]    = std::make_pair(12, 2);
 26 | 	opcodeMap["cmp"]	= std::make_pair(13, 2);
 27 | 
 28 | 	opcodeMap["jz"]		= std::make_pair(14, 1);
 29 | 	opcodeMap["jnz"]	= std::make_pair(15, 1);
 30 | 	opcodeMap["jg"]		= std::make_pair(16, 1);
 31 | 	opcodeMap["js"]		= std::make_pair(17, 1);
 32 | 	opcodeMap["jmp"]	= std::make_pair(18, 1);
 33 | 	opcodeMap["not"]	= std::make_pair(19, 1);
 34 | 	opcodeMap["inc"]	= std::make_pair(20, 1);
 35 | 	opcodeMap["dec"]	= std::make_pair(21, 1);
 36 | 	opcodeMap["ret"]	= std::make_pair(22, 0);
 37 | 
 38 | 	opcodeMap["call"]	= std::make_pair(23, 1);
 39 | 	opcodeMap["push"]	= std::make_pair(24, 1);
 40 | 	opcodeMap["pop"]	= std::make_pair(25, 1);
 41 | 	opcodeMap["halt"]	= std::make_pair(26, 0);
 42 | 	opcodeMap["printi"]	= std::make_pair(27, 1);
 43 | 	opcodeMap["prints"]	= std::make_pair(28, 1);
 44 | 	opcodeMap["printc"] = std::make_pair(29, 1);
 45 | 	opcodeMap["syscall"] = std::make_pair(30, 1);
 46 | 	opcodeMap["memcpy"]= std::make_pair(31, 1);
 47 | }
 48 | 
 49 | Mapper::~Mapper() {
 50 | 
 51 | }
 52 | 
 53 | bool Mapper::canMapLabel(std::string label, unsigned int instructionIndex, std::unordered_map<std::string, size_t> labelMap,
 54 | 	std::vector<AssemberInstruction> instructions) {
 55 | 
 56 | 	size_t labelIndex;
 57 | 	try {
 58 | 		labelIndex = labelMap.at(label);
 59 | 	}
 60 | 	catch (std::out_of_range) {
 61 | 		return false;
 62 | 	}
 63 | 	if (labelIndex == instructionIndex) {
 64 | 		return true;
 65 | 	}
 66 | 	if (labelIndex > instructionIndex) {
 67 | 		for (int i = instructionIndex; i < labelIndex; i++) {
 68 | 			if (!instructions[i].length) {
 69 | 				return false;
 70 | 			}
 71 | 		}
 72 | 	}
 73 | 	else {
 74 | 		for (int i = instructionIndex - 1; i >= labelIndex && i > 0; i--) {
 75 | 			if (!instructions[i].length) {
 76 | 				return false;
 77 | 			}
 78 | 			if (i == 0)
 79 | 				return true;
 80 | 		}
 81 | 	}
 82 | 	return true;
 83 | }
 84 | 
 85 | int Mapper::calculateSizeRequirement(std::string label, unsigned int instructionIndex, std::unordered_map<std::string, size_t> labelMap,
 86 | 	std::vector<AssemberInstruction> instructions) {
 87 | 
 88 | 	size_t labelIndex;
 89 | 	try {
 90 | 		labelIndex = labelMap.at(label);
 91 | 	}
 92 | 	catch (std::out_of_range) {
 93 | 		return 0;
 94 | 	}
 95 | 	if (labelIndex == instructionIndex) {
 96 | 		return 0;
 97 | 	}
 98 | 	int64_t delta;
 99 | 	if (labelIndex > instructionIndex) {
100 | 		delta = sizeof(uint64_t) + 2; // assume max length for the jump instruction (10 bytes)
101 | 		for (unsigned int i = instructionIndex + 1; i < labelIndex; i++) {
102 | 			size_t instructionLength = instructions[i].length;
103 | 			int unAssembled = 0;
104 | 			if (instructionLength == 0) {
105 | 				// Instruction hasn't been assembled yet because it contains label that hasn't been resolved, thus it's length is unknown
106 | 				unAssembled++;
107 | 			}
108 | 			else {
109 | 				delta += instructionLength;
110 | 			}
111 | 			if (unAssembled) {
112 | 				// assume the unassembled instructions will take max space
113 | 				delta += (unAssembled * (2 + sizeof(uint64_t)));
114 | 			}
115 | 		}
116 | 	}
117 | 	else {
118 | 		delta = 0;
119 | 		for (unsigned int i = instructionIndex - 1; i >= labelIndex; i--) {
120 | 			size_t instructionLength = instructions[i].length;
121 | 			int unAssembled = 0;
122 | 			if (instructionLength == 0) {
123 | 				// Instruction hasn't been assembled yet because it contains label that hasn't been resolved, thus it's length is unknown
124 | 				unAssembled++;
125 | 			}
126 | 			else {
127 | 				delta += instructionLength;
128 | 			}
129 | 			if (unAssembled) {
130 | 				// assume the unassembled instructions will take max space
131 | 				delta += (unAssembled * (2 + sizeof(uint64_t)));
132 | 			}
133 | 			if (i == 0)
134 | 				break;
135 | 		}
136 | 		delta = -delta;
137 | 	}
138 | 	if (SCHAR_MIN <= delta && delta <= SCHAR_MAX) {
139 | 		return (delta > 0) ? (sizeof(int8_t) + 2) : sizeof(int8_t);
140 | 	}
141 | 	else if (SHRT_MIN <= delta && delta <= SHRT_MAX) {
142 | 		return (delta > 0) ? (sizeof(int16_t) + 2) : sizeof(int16_t);
143 | 	}
144 | 	else if (INT32_MIN <= delta && delta <= INT32_MAX) {
145 | 		return (delta > 0) ? (sizeof(int32_t) + 2) : sizeof(int32_t);
146 | 	}
147 | 	return (delta > 0) ? (2 + sizeof(int64_t)) : sizeof(int64_t);
148 | }
149 | 
150 | unsigned int Mapper::mapLabel(std::string label, unsigned int instructionIndex, std::unordered_map<std::string, size_t> labelMap,
151 | 	std::vector<AssemberInstruction> &instructions, int64_t &value) {
152 | 
153 | 	size_t labelIndex;
154 | 	try {
155 | 		labelIndex = labelMap.at(label);
156 | 	}
157 | 	catch (std::out_of_range) {
158 | 		return 0;
159 | 	}
160 | 	if (labelIndex == instructionIndex) {
161 | 		return 0;
162 | 	}
163 | 	int64_t delta;
164 | 	if (labelIndex > instructionIndex) {
165 | 		delta = (instructions[instructionIndex].length) ? instructions[instructionIndex].length : sizeof(uint64_t) + 2; // assume max length for the jump instruction (10 bytes)
166 | 		for (unsigned int i = instructionIndex + 1; i < labelIndex; i++) {
167 | 			size_t instructionLength = instructions[i].length;
168 | 			int unAssembled = 0;
169 | 			if (instructionLength == 0) {
170 | 				// Instruction hasn't been assembled yet because it contains label that hasn't been resolved, thus it's length is unknown
171 | 				unAssembled++;
172 | 				return 0;
173 | 			}
174 | 			else {
175 | 				delta += instructionLength;
176 | 			}
177 | 			if (unAssembled) {
178 | 				// assume the unassembled instructions will take max space
179 | 				delta += (unAssembled * (2 + sizeof(uint64_t)));
180 | 			}
181 | 		}
182 | 	}
183 | 	else {
184 | 		delta = 0; // assume max length for the jump instruction (10 bytes)
185 | 		for (unsigned int i = instructionIndex - 1; i >= labelIndex; i--) {
186 | 			size_t instructionLength = instructions[i].length;
187 | 			int unAssembled = 0;
188 | 			if (instructionLength == 0) {
189 | 				// Instruction hasn't been assembled yet because it contains label that hasn't been resolved, thus it's length is unknown
190 | 				unAssembled++;
191 | 				return 0;
192 | 			}
193 | 			else {
194 | 				delta += instructionLength;
195 | 			}
196 | 			if (unAssembled) {
197 | 				// assume the unassembled instructions will take max space
198 | 				delta += (unAssembled * (2 + sizeof(uint64_t)));
199 | 			}
200 | 			if (i == 0)
201 | 				break;
202 | 		}
203 | 		delta = -delta;
204 | 	}
205 | 	value = delta;
206 | 	std::cout << "delta " << delta << std::endl;
207 | 	if (instructions[instructionIndex].length) {
208 | 		return instructions[instructionIndex].length - 2;
209 | 	}
210 | 	if (SCHAR_MIN <= value && value <= SCHAR_MAX) {
211 | 		if (delta > 0)
212 | 			value -= (sizeof(int64_t) - sizeof(int8_t));
213 | 		return sizeof(int8_t);
214 | 	}
215 | 	else if (SHRT_MIN <= value && value <= SHRT_MAX - 1) {
216 | 		if (delta > 0)
217 | 			value -= sizeof(int16_t) - sizeof(int8_t);
218 | 		return sizeof(int16_t);
219 | 	}
220 | 	else if (INT32_MIN <= value && value <= INT32_MAX - 1) {
221 | 		if (delta > 0)
222 | 			value -= sizeof(int32_t) - sizeof(int8_t);
223 | 		return sizeof(int32_t);
224 | 	}
225 | 	return sizeof(int64_t);
226 | }
227 | 
228 | bool Mapper::mapRegister(std::string regName, unsigned char& reg) {
229 | 	try {
230 | 		reg = registerMap.at(regName);
231 | 		return true;
232 | 	}
233 | 	catch (std::out_of_range) {
234 | 		return false;
235 | 	}
236 | }
237 | 
238 | bool Mapper::mapOpcode(std::string opcodeName, AssemberInstruction&instruction) {
239 | 	try {
240 | 		std::pair<unsigned char, unsigned int> opcode = opcodeMap[opcodeName];
241 | 		instruction.opcode = opcode.first;
242 | 		instruction.operands = opcode.second;
243 | 		return true;
244 | 	}
245 | 	catch (std::out_of_range) {
246 | 		return false;
247 | 	}
248 | }
249 | 
250 | template<typename T> void Mapper::mapImmediate(unsigned char* bytes, T value) {
251 | 	for (unsigned int i = 0; i < sizeof(T); i++) {
252 | 		bytes[i] = static_cast<uint8_t>(value >> ((sizeof(T) * 8) - 8));
253 | 	}
254 | }
255 | 
256 | int Mapper::mapInteger(int64_t value64, unsigned char* bytes, unsigned int &length) {
257 | 	if (length == sizeof(int8_t) || (!length && INT8_MIN <= value64 && value64 <= INT8_MAX)) {
258 | 		*reinterpret_cast<int8_t*>(bytes) = static_cast<int8_t>(value64);
259 | 		length = sizeof(int8_t);
260 | 		return Byte;
261 | 	}
262 | 	else if (length == sizeof(int16_t) || (INT16_MIN <= value64 && value64 <= INT16_MAX)) {
263 | 		*reinterpret_cast<int16_t*>(bytes) = static_cast<int16_t>(value64);
264 | 		length = sizeof(int16_t);
265 | 		return Short;
266 | 	}
267 | 	else if (length == sizeof(int32_t) || (INT32_MIN <= value64 && value64 <= INT32_MAX)) {
268 | 		*reinterpret_cast<int32_t*>(bytes) = static_cast<int32_t>(value64);
269 | 		length = sizeof(int32_t);
270 | 		return Dword;
271 | 	}
272 | 	else {
273 | 		*reinterpret_cast<int64_t*>(bytes) = static_cast<int64_t>(value64);
274 | 		length = sizeof(int64_t);
275 | 		return Qword;
276 | 	}
277 | }
278 | 
279 | int Mapper::mapImmediate(std::string value, unsigned char* bytes, unsigned int &length) {
280 | 	if (value.empty() || (value.length() == 1 && value[0] == '-'))
281 | 		return -1;
282 | 	try {
283 | 		if (value[0] == '-')
284 | 		{
285 | 			int64_t value64;
286 | 			if (value.length() > 3 && value[1] == '\'' && value[value.length() - 1] == '\'') {
287 | 				size_t diff = value.length() - 1 - 2;
288 | 				if (diff == 1) {
289 | 					value64 = -static_cast<int64_t>(value[2]);
290 | 				}
291 | 				else if (diff == 2 && value[2] == '\\') {
292 | 					switch (value[3]) {
293 | 					case 'n':
294 | 						value64 = -'\n';
295 | 						break;
296 | 					case 'r':
297 | 						value64 = -'\r';
298 | 						break;
299 | 					case 't':
300 | 						value64 = -'t';
301 | 						break;
302 | 					default:
303 | 						return -1;
304 | 					}
305 | 
306 | 				}
307 | 				else
308 | 					return -1;
309 | 			}
310 | 			else
311 | 				value64 = std::stoll(value, nullptr,0);
312 | 			return mapInteger(value64, bytes, length);
313 | 		}
314 | 		else {
315 | 			uint64_t value64;
316 | 			if (value.length() > 2 && value[0] == '\'' && value[value.length() - 1] == '\'') {
317 | 				size_t diff = value.length() - 2;
318 | 				if (diff == 1) {
319 | 					value64 = static_cast<uint64_t>(value[1]);
320 | 				}
321 | 				else if (diff == 2 && value[1] == '\\') {
322 | 					switch (value[2]) {
323 | 					case 'n':
324 | 						value64 = '\n';
325 | 						break;
326 | 					case 'r':
327 | 						value64 = '\r';
328 | 						break;
329 | 					case 't':
330 | 						value64 = 't';
331 | 						break;
332 | 					default:
333 | 						return -1;
334 | 					}
335 | 				}
336 | 				else
337 | 					return -1;
338 | 			}
339 | 			else
340 | 				value64 = std::stoull(value, nullptr, 0);
341 | 			if (value64 <= UINT8_MAX) {
342 | 				*reinterpret_cast<uint8_t*>(bytes) = static_cast<uint8_t>(value64);
343 | 				length = sizeof(uint8_t);
344 | 				return Byte;
345 | 			}
346 | 			else if (value64 <= UINT16_MAX) {
347 | 				*reinterpret_cast<uint16_t*>(bytes) = static_cast<uint16_t>(value64);
348 | 				length = sizeof(uint16_t);
349 | 				return Short;
350 | 			}
351 | 			else if (value64 <= UINT32_MAX) {
352 | 				*reinterpret_cast<uint32_t*>(bytes) = static_cast<uint32_t>(value64);
353 | 				length = sizeof(uint32_t);
354 | 				return Dword;
355 | 			}
356 | 			else {
357 | 				*reinterpret_cast<uint64_t*>(bytes) = static_cast<uint64_t>(value64);
358 | 				length = sizeof(uint64_t);
359 | 				return Qword;
360 | 			}
361 | 		}
362 | 	}
363 | 	catch (std::invalid_argument) {
364 | 		return -1;
365 | 	}
366 | 	catch (std::out_of_range) {
367 | 		return -2;
368 | 	}
369 | }


--------------------------------------------------------------------------------
/NanoAssembler/Mapper.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <iostream>
  3 | #include <vector>
  4 | #include <unordered_map>
  5 | #include <string>
  6 | #include <climits>
  7 | #include "Types.h"
  8 | 
  9 | /**
 10 |  * \brief Handles mapping of text representation of instruction parts to their corresponding structures
 11 |  *
 12 |  * Mapper implements handling for text representation of insturction parts and calculation of sizes for instructions
 13 |  * containing relative addresses
 14 | */
 15 | class Mapper {
 16 | public:
 17 | 
 18 | 	/**
 19 | 	 * Initializes Mapper
 20 | 	*/
 21 | 	Mapper();
 22 | 
 23 | 	/**
 24 | 	 * Mapper destructor
 25 | 	*/
 26 | 	~Mapper();
 27 | 
 28 | 	/**
 29 | 	 * Maps text representation of opcode to instruction struct
 30 | 	 * @param opcodeName Text representation of the opcode
 31 | 	 * @param[out] instruction Instruction struct reference to update with opcode value
 32 | 	 * @return True if opcode was resolved, false if the opcode was unknown
 33 | 	*/
 34 | 	bool mapOpcode(std::string opcodeName, AssemberInstruction& instruction);
 35 | 
 36 | 	/**
 37 | 	 * Maps a text representation of register to its corresponding register value
 38 | 	 * @param[out] reg Reference to the value to hold the resolved register value
 39 | 	 * @return True if register name was resolved, false if the name was unknown
 40 | 	*/
 41 | 	bool mapRegister(std::string regName, unsigned char& reg);
 42 | 
 43 | 	/**
 44 | 	 * Maps a text representation of immediate value to bytes
 45 | 	 * @param value Text representation of integer
 46 | 	 * @param[out] bytes Pointer to array to hold the bytes of the integer
 47 | 	 * @param[out] length Reference to integer to hold the amount of bytes of the resolved immediate value
 48 | 	*/
 49 | 	int mapImmediate(std::string value, unsigned char* bytes, unsigned int& length);
 50 | 
 51 | 	/**
 52 | 	 * Checks if the given text label can be resolved as relative address from the given instruction
 53 | 	 * @param label Name of the label to try mapping to address
 54 | 	 * @param instructionIndex The index of the instruction we try to resolve the relative label address from
 55 | 	 * @param labelMap Map structure holding all labels
 56 | 	 * @param instructions List of all the instructions
 57 | 	 * @return True if the label can be resolved to relative address from the current instruction, false if not
 58 | 	*/
 59 | 	bool canMapLabel(std::string label, unsigned int instructionIndex, std::unordered_map<std::string, size_t> labelMap,
 60 | 		std::vector<AssemberInstruction> instructions);
 61 | 	
 62 | 	/**
 63 | 	 * Calculate the size requirement in bytes for the relative label address from the current instruction.
 64 | 	 * Should be called only if canMapLabel() returns true
 65 |      * @param label Name of the label to try mapping to address
 66 | 	 * @param instructionIndex The index of the instruction we try to resolve the relative label address from
 67 | 	 * @param labelMap Map structure holding all labels
 68 | 	 * @param instructions List of all the instructions
 69 | 	 * @return Size of the relative address in bytes
 70 | 	*/
 71 | 	int calculateSizeRequirement(std::string label, unsigned int instructionIndex, std::unordered_map<std::string, size_t> labelMap,
 72 | 		std::vector<AssemberInstruction> instructions);
 73 | 
 74 | 	/**
 75 | 	 * Maps label to relative address from the current instruction. Should noly be called if canMapLabel returns true
 76 | 	 * @param label Name of the label to try mapping to address
 77 | 	 * @param instructionIndex The index of the instruction we try to resolve the relative label address from
 78 | 	 * @param labelMap Map structure holding all labels
 79 | 	 * @param[out] instructions[out] Reference to the list of all the instructions. Corresponding instructions will be updated with the relative address
 80 | 	 * @return Size of the resolved relative address
 81 | 	*/
 82 | 	unsigned int mapLabel(std::string label, unsigned int instructionIndex, std::unordered_map<std::string, size_t> labelMap,
 83 | 		std::vector<AssemberInstruction> &instructions, int64_t &value);
 84 | 	
 85 | 	/**
 86 | 	 * Maps integer to bytes with minimum required bytes
 87 | 	 * @param value64 64-bit signed integer representing the value to be mapped in bytes
 88 | 	 * @param[out] bytes Pointer to array that will be updated with the integer bytes
 89 | 	 * @param[out] length Reference that will hold the number of bytes that were stored in the array
 90 | 	 * @return Size mask for the instruction bytes to use for setting the size of immediate value
 91 | 	*/
 92 | 	int mapInteger(int64_t value64, unsigned char* bytes, unsigned int &length);
 93 | 
 94 | 	/**
 95 | 	 * Copies given integer to byte array as bytes
 96 | 	 * @param bytes Pointer to array to be updated with the integer bytes
 97 | 	 * @param value Integer value to be copied
 98 | 	*/
 99 | 	template<typename T> void mapImmediate(unsigned char *bytes, T value);
100 | 
101 | private:
102 | 	std::unordered_map<std::string, std::pair<unsigned char, unsigned int>> opcodeMap; /**< Map between all the opcodes text representation and corresponding values */
103 | 	std::unordered_map<std::string, unsigned char> registerMap; /**< Map between register text representations and register number */
104 | };
105 | 


--------------------------------------------------------------------------------
/NanoAssembler/Nano.cpp:
--------------------------------------------------------------------------------
 1 | #include "NanoAssembler.h"
 2 | 
 3 | 
 4 | int main(int argc, char* argv[])
 5 | {
 6 | 	if (argc <= 1) {
 7 | 		std::cout << "Usage NanoAssembler.exe [FILE]" << std::endl;
 8 | 		return 0;
 9 | 	}
10 | 	NanoAssembler assembler;
11 | 	std::string input = argv[1];
12 | 	std::string output = input.substr(0, input.find_last_of('.')) + ".nanoc";
13 | 	AssemblerReturnValues ret = assembler.assembleToFile(input, output);
14 | 	switch (ret) {
15 | 	case AssemblerReturnValues::Success:
16 | 		std::cout << "File successfully assembled to: " << output << std::endl;
17 | 		break;
18 | 	case AssemblerReturnValues::IOError:
19 | 		std::cout << "There was an error while reading/writing a file on disk" << std::endl;
20 | 		break;
21 | 	case AssemblerReturnValues::MemoryAllocationError:
22 | 		std::cout << "Failed to dynamically allocate memory" << std::endl;
23 | 		break;
24 | 	case AssemblerReturnValues::AssemblerError:
25 | 		std::cout << "The input file could be compiled" << std::endl;
26 | 		break;
27 | 	default:
28 | 		std::cout << "Received unknown error: " << ret << std::endl;
29 | 	}
30 | 	return ret;
31 | }
32 | 


--------------------------------------------------------------------------------
/NanoAssembler/NanoAssembler.cpp:
--------------------------------------------------------------------------------
  1 | ﻿#include "NanoAssembler.h"
  2 | 
  3 | NanoAssembler::NanoAssembler() : mapper() {
  4 | 	// Constructor
  5 | }
  6 | 
  7 | NanoAssembler::~NanoAssembler() {
  8 | 	// Destructor
  9 | }
 10 | 
 11 | bool NanoAssembler::readLines(std::string file, std::vector<AssemberInstruction> &lines, std::unordered_map<std::string, size_t> &labelMap) {
 12 | 	std::string line;
 13 | 	std::ifstream f(file);
 14 | 	unsigned int lineNumber = 1;
 15 | 	if (f.is_open()) {
 16 | 		while (std::getline(f, line)) {
 17 | 			AssemberInstruction instruction;
 18 | 			instruction.assembled = false;
 19 | 			instruction.length = 0;
 20 | 			instruction.lineNumber = lineNumber;
 21 | 			lineNumber++;
 22 | 			// remove comments
 23 | 			size_t index = line.find(";");
 24 | 			if (index != -1) {
 25 | 				if (index == 0) {
 26 | 					continue;
 27 | 				}
 28 | 				line = line.substr(0, index);
 29 | 			}
 30 | 			line = std::regex_replace(line, std::regex("^\\s+|\\s+$"), ""); // trim leading and trailing whitespaces
 31 | 			if (line.empty()) // Skip empty lines and comments (prefix ";")
 32 | 			{
 33 | 				continue;
 34 | 			}
 35 | 			if (line[0] == ':' && line.length() > 1) {
 36 | 				// label
 37 | 				labelMap[line.substr(1)] = lines.size();
 38 | 				std::cout << "Label: " << line << std::endl;
 39 | 				continue;
 40 | 			}
 41 | 			instruction.line = std::regex_replace(line, std::regex("\\s{2,}"), " "); // replace all consecutive whitespaces with single space
 42 | 			instruction.line.erase(std::remove(instruction.line.begin(), instruction.line.end(), ','), instruction.line.end()); // Remove ','
 43 | 			std::transform(instruction.line.begin(), instruction.line.end(), instruction.line.begin(), ::tolower); // to lowercase
 44 | 			lines.push_back(instruction);
 45 | 		}
 46 | 		if (!lines.empty() && lines.at(lines.size() - 1).line != "halt") {
 47 | 			std::cout << "Adding line \"halt\" to the end of file!" << std::endl;
 48 | 			AssemberInstruction instruction;
 49 | 			instruction.assembled = false;
 50 | 			instruction.length = 0;
 51 | 			instruction.lineNumber = lineNumber;
 52 | 			instruction.line = "halt";
 53 | 			lines.push_back(instruction);
 54 | 		}
 55 | 		return true;
 56 | 	}
 57 | 	return false;
 58 | }
 59 | 
 60 | int NanoAssembler::assembleInstruction(int i, std::vector<AssemberInstruction> &instructionBytes, std::unordered_map<std::string, size_t> labelMap, bool initial) {
 61 | 	// Skip already assembled instructions
 62 | 	if (instructionBytes[i].assembled)
 63 | 		return 1;
 64 | 	std::istringstream iss(instructionBytes[i].line);
 65 | 	std::vector<std::string> parts(std::istream_iterator<std::string>{iss}, std::istream_iterator<std::string>());
 66 | 	AssemberInstruction&instruction = instructionBytes[i];
 67 | 	// Check that the instruction is valid e.g. 'mov'
 68 | 	if (!mapper.mapOpcode(parts[0], instruction)) {
 69 | 		std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
 70 | 		std::cout << "Unknown instruction \"" << parts[0] << std::endl;
 71 | 		return 0;
 72 | 	}
 73 | 	// Check that there are required amount of parameters for the instruction e.g. 'mov reg0,reg1' requires 2
 74 | 	if (instruction.operands != parts.size() - 1) {
 75 | 		std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
 76 | 		std::cout << "Invalid amount of parameters for instruction \"" << parts[0] << "\" expected: " << instruction.operands
 77 | 			<< " but received: " << (parts.size() - 1) << std::endl;
 78 | 		return 0;
 79 | 	}
 80 | 	// assemble instruction with two operands
 81 | 	if (instruction.operands == 2) {
 82 | 		unsigned char dstReg;
 83 | 		bool isDstMem = false, isSrcMem = false;
 84 | 		// set flags whether the operands refer to memory address (operands are to be treated as pointers)
 85 | 		if (parts[1][0] == '@') {
 86 | 			parts[1] = parts[1].substr(1);
 87 | 			isDstMem = true;
 88 | 		}
 89 | 		if (parts[2][0] == '@') {
 90 | 			parts[2] = parts[2].substr(1);
 91 | 			isSrcMem = true;
 92 | 		}
 93 | 		// parse destination register
 94 | 		if (!mapper.mapRegister(parts[1], dstReg)) {
 95 | 			std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
 96 | 			std::cout << "Invalid register name: \"" << parts[1] << "\"" << std::endl;
 97 | 			return 0;
 98 | 		}
 99 | 		// add first instruction byte
100 | 		instruction.bytecode[0]  = ((dstReg << 5) | (instruction.opcode));
101 | 		unsigned char srcReg;
102 | 		// parse source register if it exists (optional parameter)
103 | 		if (mapper.mapRegister(parts[2], srcReg)) {
104 | 			// second operand was register. add final instruction byte
105 | 			instruction.bytecode[1] = ((DataType::Reg | SRC_SIZE | (isSrcMem ? SRC_MEM : 0) | (isDstMem ? DST_MEM : 0)) | srcReg);
106 | 			instruction.length = 2;
107 | 			instruction.assembled = true;
108 | 		}
109 | 		else {
110 | 			//Second parameter is immediate value
111 | 			unsigned int length = 0;
112 | 			int size = mapper.mapImmediate(parts[2], instruction.bytecode + 2, length);
113 | 			if (size == -1) {
114 | 				// parameter was not integer or register
115 | 				if (labelMap.find(parts[2]) == labelMap.end()) {
116 | 					std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
117 | 					std::cout << "Unknown parameter: \"" << parts[2] << "\"";
118 | 					return 0;
119 | 				}
120 | 				
121 | 			}
122 | 			else if (size == -2) {
123 | 				// immediate value couldn't fit in 64bit unsinged integer...
124 | 				std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
125 | 				std::cout << "Integer too large: " << parts[2] << std::endl;
126 | 				return 0;
127 | 			}
128 | 			// we now have the size of instruction. Update to the previous byte
129 | 			instruction.bytecode[1] = ((DataType::Immediate | size | (isSrcMem ? SRC_MEM : 0) | (isDstMem ? DST_MEM : 0)));
130 | 			instruction.length = 2 + length;
131 | 			instruction.assembled = true;
132 | 		}
133 | 	}
134 | 	else if (instruction.operands == 0) {
135 | 		// Instructions w/o operands or with known to have one register can be pushed by the opcode (e.g. halt, inc reg0)
136 | 		instruction.bytecode[0] = instruction.opcode;
137 | 		instruction.length = 1;
138 | 		instruction.assembled = true;
139 | 		// Check if the instruction has register parameter
140 | 		if (parts.size() == 2) {
141 | 			unsigned char dstReg;
142 | 			if (!mapper.mapRegister(parts[1], dstReg)) {
143 | 				std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
144 | 				std::cout << "Invalid register name: \"" << parts[1] << "\"" << std::endl;
145 | 				return 0;
146 | 			}
147 | 			instruction.bytecode[0] |= (dstReg << 5);
148 | 		}
149 | 	}
150 | 	else if (instruction.operands == 1) {
151 | 		// Instruction has only one operand (e.g. jz, push, pop, ...)
152 | 		unsigned char srcReg;
153 | 		bool isSrcMem = false;
154 | 		// set flags whether the operands refer to memory address (operands are to be treated as pointers)
155 | 		if (parts[1][0] == '@') {
156 | 			parts[1] = parts[1].substr(1);
157 | 			isSrcMem = true;
158 | 		}
159 | 		// Check if the single operand is register
160 | 		if (mapper.mapRegister(parts[1], srcReg)) {
161 | 			// Operand is register
162 | 			instruction.bytecode[0] = instruction.opcode;
163 | 			instruction.bytecode[1] = ((DataType::Reg | SRC_SIZE | (isSrcMem ? SRC_MEM : 0) | srcReg));
164 | 			instruction.length = 2;
165 | 			instruction.assembled = true;
166 | 		}
167 | 		else {
168 | 			// The single operand is immediate value
169 | 			instruction.bytecode[0] = instruction.opcode;
170 | 			unsigned int length = 0;
171 | 			// parse the immediate value
172 | 			int size = mapper.mapImmediate(parts[1], instruction.bytecode + 2, length);
173 | 			if (size == -1) {
174 | 				// parameter was not integer or register
175 | 				if (labelMap.find(parts[1]) == labelMap.end()) {
176 | 					std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
177 | 					std::cout << "Unknown parameter: \"" << parts[1] << "\"" << std::endl;
178 | 					return 0;
179 | 				}
180 | 				// Check if the label can already be mapped to an immediate value
181 | 				if (mapper.canMapLabel(parts[1], i, labelMap, instructionBytes)) {
182 | 					// Assemble the instruction
183 | 					int64_t value;
184 | 					length = mapper.mapLabel(parts[1], i, labelMap, instructionBytes, value);
185 | 					if (length == 0) {
186 | 						std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
187 | 						std::cout << "Failed to map label: \"" << parts[1] << "\"" << std::endl;
188 | 						return 0;
189 | 					}
190 | 					std::cout << "Mapped to " << value << std::endl;
191 | 					size = mapper.mapInteger(value, instruction.bytecode + 2, length);
192 | 				}
193 | 				else if (initial) {
194 | 					return -1;
195 | 				}
196 | 				else {
197 | 					size = mapper.calculateSizeRequirement(parts[1], i, labelMap, instructionBytes);
198 | 					if (size == 0) {
199 | 						std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
200 | 						std::cout << "Failed to map label: \"" << parts[1] << "\"" << std::endl;
201 | 						return 0;
202 | 					}
203 | 					instruction.length = size;
204 | 					std::cout << parts[1] << " require " << size << " bytes" << std::endl;
205 | 					std::cout << "Did not map label but defined size requirement" << std::endl;
206 | 					return -1;
207 | 				}
208 | 			}
209 | 			else if (size == -2) {
210 | 				// immediate value couldn't fit in 64bit unsinged integer...
211 | 				std::cout << "Error on line (" << i << "): " << instructionBytes[i].line << std::endl;
212 | 				std::cout << "Integer too large: " << parts[1] << std::endl;
213 | 				return 0;
214 | 			}
215 | 			// we now have the size of instruction. Update to the previous byte
216 | 			instruction.bytecode[1] = ((DataType::Immediate | size | (isSrcMem ? SRC_MEM : 0)));
217 | 			instruction.length = 2 + length;
218 | 			instruction.assembled = true;
219 | 		}
220 | 	}
221 | 	return 1;
222 | }
223 | 
224 | bool NanoAssembler::assemble(std::vector<AssemberInstruction> &instruction, std::unordered_map<std::string, size_t> &labelMap) {
225 | 	int rounds = 3;
226 | 	// Iterate over all instructions 3 times if needed because instructions with labels need other instructions to be assembled to calculate
227 | 	// relative distance from itself to the label
228 | 	bool reiterate = true;
229 | 	while (rounds-- && reiterate) {
230 | 		reiterate = false;
231 | 		bool ready = true;
232 | 		for (int i = 0; i < instruction.size(); i++) {
233 | 			int success = assembleInstruction(i, instruction, labelMap, rounds == 2);
234 | 			if (instruction[i].assembled)
235 | 				continue;
236 | 			ready &= success == 1;
237 | 			if (success == 0)
238 | 				return false;
239 | 			if (success == -1) {
240 | 				reiterate = true;
241 | 				std::cout << "require reiteration for mapping label" << std::endl;
242 | 			}
243 | 		}
244 | 		if (ready)
245 | 			return true;
246 | 	}
247 | 	return false;
248 | }
249 | 
250 | AssemblerReturnValues NanoAssembler::assembleToFile(std::string inputFile, std::string outputFile) {
251 | 	std::vector<AssemberInstruction> lines;
252 | 	std::unordered_map<std::string, size_t> labelMap;
253 | 	// Load file from disk
254 | 	if (!readLines(inputFile, lines, labelMap)) {
255 | 		return AssemblerReturnValues::IOError;
256 | 	}
257 | 	// Compile the file to bytecode
258 | 	if (assemble(lines, labelMap)) {
259 | 		// Write to disk
260 | 		std::ofstream file(outputFile, std::ios::out | std::ios::binary);
261 | 		if (file.is_open()) {
262 | 			for (AssemberInstruction inst : lines)
263 | 				file.write((const char*)& inst.bytecode[0], inst.length);
264 | 			file.close();
265 | 			return AssemblerReturnValues::Success;
266 | 		}
267 | 		return AssemblerReturnValues::IOError;
268 | 	}
269 | 	return AssemblerReturnValues::AssemblerError;
270 | }
271 | 
272 | AssemblerReturnValues NanoAssembler::assembleToMemory(std::string inputFile, unsigned char*& bytecodeBuffer, unsigned int& size) {
273 | 	std::vector<AssemberInstruction> lines;
274 | 	std::unordered_map<std::string, size_t> labelMap;
275 | 	// Load assembler file from disk
276 | 	if (!readLines(inputFile, lines, labelMap)) {
277 | 		return AssemblerReturnValues::IOError;
278 | 	}
279 | 	// Compile to bytecode
280 | 	if (assemble(lines, labelMap)) {
281 | 		size = 0;
282 | 		// Calculate the resulting bytecode size
283 | 		for (AssemberInstruction inst : lines) {
284 | 			size += inst.length;
285 | 		}
286 | 		// Allocate buffer to store the bytecode
287 | 		bytecodeBuffer = new unsigned char[size];
288 | 		if (bytecodeBuffer) {
289 | 			unsigned int index = 0;
290 | 			// Copy the bytecode to output buffer
291 | 			for (AssemberInstruction inst : lines) {
292 | 				memcpy(bytecodeBuffer + index, inst.bytecode, inst.length);
293 | 				index += inst.length;
294 | 			}
295 | 			return AssemblerReturnValues::Success;
296 | 		}
297 | 		return AssemblerReturnValues::MemoryAllocationError;
298 | 	}
299 | 	return AssemblerReturnValues::AssemblerError;
300 | }


--------------------------------------------------------------------------------
/NanoAssembler/NanoAssembler.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <iostream>
 3 | #include <fstream>
 4 | #include <vector>
 5 | #include <string>
 6 | #include <regex>
 7 | #include <algorithm>
 8 | #include <sstream>
 9 | #include <iterator>
10 | #include <cstring>
11 | #include "Types.h"
12 | #include "Mapper.h"
13 | 
14 | /**
15 |  * NanoAssembler instance handles loading assembler files and compiling those to bytecode format
16 | */
17 | class NanoAssembler {
18 | public:
19 | 	NanoAssembler();
20 | 	~NanoAssembler();
21 | 
22 | 	/**
23 | 	 * \brief Assembles input file and writes the resulting bytecode to a file on disk
24 | 	 *
25 | 	 * NanoAssembler loads the input file containing assembler instructions, compiles those in to binary format
26 | 	 * and writes them to a binary file on disk
27 | 	 * @param inputFile Points to the assembler file to load
28 | 	 * @param outputFile Points to the file where the compiled bytecode will be written
29 | 	 * @return 1 on success and anything else meaning failure
30 | 	 */
31 | 	AssemblerReturnValues assembleToFile(std::string inputFile, std::string outputFile);
32 | 
33 | 	/**
34 | 	 * \brief Assembles input file to buffer in memory
35 | 	 *
36 | 	 * NanoAssembler loads the input file containing assembler instructions, compiles those in to binary format
37 | 	 * and outputs them to dynamically allocated buffer
38 | 	 * @param inputFile Points to the assembler file to load
39 | 	 * @param[out] bytecodeBuffer Reference to a pointer that will point to the compiled bytecode buffer
40 | 	 * @param[out] size Reference to an int that will hold the size of the bytecodeBuffer in bytes
41 | 	 * @return 1 on success and anything else meaning failure
42 | 	*/
43 | 	AssemblerReturnValues assembleToMemory(std::string inputFile, unsigned char*& bytecodeBuffer, unsigned int &size);
44 | private:
45 | 	bool readLines(std::string file, std::vector<AssemberInstruction>& lines, std::unordered_map<std::string, size_t>& labelMap);
46 | 	int assembleInstruction(int i, std::vector<AssemberInstruction>& instructionBytes, std::unordered_map<std::string, size_t> labelMap, bool initial);
47 | 	bool assemble(std::vector<AssemberInstruction>& instruction, std::unordered_map<std::string, size_t>& labelMap);
48 | 
49 | 	Mapper mapper;
50 | };


--------------------------------------------------------------------------------
/NanoAssembler/Types.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <iostream>
 3 | #include <cstdint>
 4 | 
 5 | constexpr uint8_t SRC_TYPE = 0b10000000;
 6 | constexpr uint8_t SRC_SIZE = 0b01100000;
 7 | constexpr uint8_t DST_MEM =  0b00010000;
 8 | constexpr uint8_t SRC_MEM =  0b00001000;
 9 | 
10 | #ifndef TYPE_H
11 | #define TYPE_H
12 | 
13 | /**
14 |  * DataType enum holds the type mask of value reg/immediate
15 | */
16 | enum DataType {
17 | 	Reg = 0,
18 | 	Immediate = 0b10000000
19 | };
20 | 
21 | enum Size {
22 | 	Byte = 0b00000000,
23 | 	Short = 0b00100000,
24 | 	Dword = 0b01000000,
25 | 	Qword = 0b01100000
26 | };
27 | #endif
28 | 
29 | /**
30 |  * Instruction represent a single instruction to be assembled
31 | */
32 | struct AssemberInstruction {
33 | 	std::string line;
34 | 	unsigned char bytecode[2 + sizeof(int64_t)];
35 | 	unsigned char opcode;
36 | 	unsigned int operands;
37 | 	unsigned int length;
38 | 	unsigned int lineNumber;
39 | 	bool assembled;
40 | };
41 | typedef struct AssemberInstruction AssemberInstruction;
42 | 
43 | enum AssemblerReturnValues {
44 | 	Success,
45 | 	AssemblerError,
46 | 	MemoryAllocationError,
47 | 	IOError
48 | };


--------------------------------------------------------------------------------
/NanoDebugger/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ﻿# CMakeList.txt : CMake project for NanoVM, include source and define
 2 | # project specific logic here.
 3 | #
 4 | cmake_minimum_required (VERSION 3.8)
 5 | include_directories(../NanoVM)
 6 | # Add source to this project's executable.
 7 | add_executable (NanoDebugger "NanoDebugger.cpp" "../NanoVM/NanoVM.cpp" "../NanoVM/NanoVM.h" "NanoDebugger.h" "Instructions.cpp" "Instructions.h" "Debugger.cpp")
 8 | 
 9 | # TODO: Add tests and install targets if needed.
10 | 


--------------------------------------------------------------------------------
/NanoDebugger/Debugger.cpp:
--------------------------------------------------------------------------------
 1 | #include "NanoDebugger.h"
 2 | 
 3 | int main(int argc, char *argv[])
 4 | {
 5 | 	if (argc < 1) {
 6 | 		std::cout << "Usage NanoDebugger.exe [FILE]" << std::endl;
 7 | 	}
 8 | 	std::string file = (argv[1]);
 9 | 	NanoDebugger debugger(file);
10 | 	debugger.debug();
11 | 	return 0;
12 | }


--------------------------------------------------------------------------------
/NanoDebugger/Instructions.cpp:
--------------------------------------------------------------------------------
1 | #include "Instructions.h"
2 | 
3 | const char *instructionStr[] = { "mov","add", "sub","and", "or", "xor", "sar", "sal", "ror", "rol", "mul",
4 |    "div", "mod", "cmp", "jz", "jnz", "jg", "js", "jmp", "not", "inc", "dec", "ret", "call", "push", "pop", "halt",
5 |    "printi", "prints", "printc", "syscall", "memcpy" };


--------------------------------------------------------------------------------
/NanoDebugger/Instructions.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | extern const char *instructionStr[];


--------------------------------------------------------------------------------
/NanoDebugger/NanoDebugger.cpp:
--------------------------------------------------------------------------------
  1 | #include "NanoDebugger.h"
  2 | 
  3 | NanoDebugger::NanoDebugger(std::string file) : NanoVM(file) {
  4 | 	run = false;
  5 | }
  6 | 
  7 | NanoDebugger::NanoDebugger(unsigned char *bytecode, uint64_t size) : NanoVM(bytecode, size) {
  8 | 	run = false;
  9 | }
 10 | 
 11 | NanoDebugger::~NanoDebugger() {
 12 | 
 13 | }
 14 | 
 15 | bool NanoDebugger::disassembleInstruction(std::string &instruction) {
 16 | 	Instruction ins;
 17 | 	if (!fetch(ins)) {
 18 | 		return false;
 19 | 	}
 20 | 	std::string opcode = instructionStr[ins.opcode];
 21 | 	if (ins.opcode == Opcodes::Halt || ins.opcode == Opcodes::Ret) {
 22 | 		instruction = opcode;
 23 | 		return true;
 24 | 	}
 25 | 	// Single param instructions
 26 | 	if (ins.opcode == Opcodes::Jg || ins.opcode == Opcodes::Js || ins.opcode == Opcodes::Jnz || ins.opcode == Opcodes::Jz ||
 27 | 		ins.opcode == Opcodes::Jmp || ins.opcode == Opcodes::Push || ins.opcode == Opcodes::Pop || ins.opcode == Opcodes::Call ||
 28 | 		ins.opcode == Opcodes::Dec || ins.opcode == Opcodes::Inc || ins.opcode == Opcodes::Printc || ins.opcode == Opcodes::Printi ||
 29 | 		ins.opcode == Opcodes::Prints) {
 30 | 		if (ins.srcType == DataType::Reg) {
 31 | 			instruction = opcode + ((ins.isSrcMem) ? " @reg" : " reg") + std::to_string(ins.srcReg);
 32 | 		}
 33 | 		else {
 34 | 			instruction = opcode + ((ins.isSrcMem) ? " @" : " ") + std::to_string(ins.immediate);
 35 | 		}
 36 | 	}
 37 | 	// two param instruction 
 38 | 	else {
 39 | 		if (ins.srcType == DataType::Reg) {
 40 | 			instruction = opcode + ((ins.isDstMem) ? " @reg" : " reg") + std::to_string(ins.dstReg) + ", " +
 41 | 				((ins.isSrcMem) ? " @reg" : "reg") + std::to_string(ins.srcReg);
 42 | 		}
 43 | 		else {
 44 | 			instruction = opcode + ((ins.isDstMem) ? " @reg" : " reg") + std::to_string(ins.dstReg) + ", " +
 45 | 				((ins.isSrcMem) ? "@" : "") + std::to_string(ins.immediate);
 46 | 		}
 47 | 	}
 48 | 	return true;
 49 | }
 50 | 
 51 | bool NanoDebugger::handleInteractive() {
 52 | 	int value = 0;
 53 | 	do {
 54 | 		std::string instruction;
 55 | 		if (!disassembleInstruction(instruction)) {
 56 | 			std::cout << "Failed to fetch instruction: IP out of bounds! IP: " << cpu.registers[ip] << std::endl;
 57 | 			return false;
 58 | 		}
 59 | 		std::cout << cpu.registers[ip] << ". " << instruction << std::endl;
 60 | 		std::cout << "> ";
 61 | 		value = getchar();
 62 | 		std::cout << "\b\b";
 63 | 		if (value == 'h') {
 64 | 			std::cout << "\n(s)tack\nr(e)gisters\n(b)reakpoint\n(r)un\n(c)lean breakpoint\n(q)uit" << std::endl;
 65 | 		}
 66 | 		else if (value == 'e') {
 67 | 			std::cout << "\nRegisters:\n";
 68 | 			for (int i = 0; i < 8; i++) {
 69 | 				std::cout << "reg" << i << ": " << cpu.registers[i] << std::endl;
 70 | 			}
 71 | 		}
 72 | 		else if (value == 'r') {
 73 | 			run = true;
 74 | 			return true;
 75 | 		}
 76 | 		else if (value == 'b') {
 77 | 			std::cout << "Breakpoint where (offset): ";
 78 | 			int offset;
 79 | 			std::cin >> offset;
 80 | 			breakpoints.insert(offset);
 81 | 		}
 82 | 		else if (value == 'c') {
 83 | 			auto a = breakpoints.find(cpu.registers[ip]);
 84 | 			if (a == breakpoints.end()) {
 85 | 				std::cout << "No breakpoint was placed here!" << std::endl;
 86 | 			}
 87 | 			else {
 88 | 				breakpoints.erase(a);
 89 | 				std::cout << "Breakpoint removed!" << std::endl;
 90 | 			}
 91 | 		}
 92 | 		else if (value == 's') {
 93 | 			printStack();
 94 | 		}
 95 | 		else if (value == 'q') {
 96 | 			return false;
 97 | 		}
 98 | 	} while (value != 13);
 99 | 	return true;
100 | }
101 | 
102 | void NanoDebugger::printStack() {
103 | 	int counter = 0;
104 | 	unsigned char *p = (cpu.stackBase);
105 | 	uint64_t size = (cpu.registers[esp] + cpu.codeBase) - cpu.stackBase;
106 | 	std::cout << "\nStack size: " << size << "\n";
107 | 	for (int i = 0; i < size; i++) {
108 | 		if (counter == 7) {
109 | 			counter = 0;
110 | 			std::printf("%02X | %c %c %c %c %c %c %c %c\n", p[i], p[i - 7], p[i - 6], p[i - 5], p[i - 4], p[i - 3], p[i - 2], p[i - 1], p[i]);
111 | 			continue;
112 | 		}
113 | 		else {
114 | 			std::printf("%02X ", p[i]);
115 | 		}
116 | 		counter++;
117 | 	}
118 | 	if (counter) {
119 | 		std::printf("|  ");
120 | 		for (int i = counter; i > 0; i--) {
121 | 			std::printf("%c  ", p[size - i]);
122 | 		}
123 | 		std::printf("\n");
124 | 	}
125 | 	std::printf("\n");
126 | }
127 | 
128 | bool NanoDebugger::debug() {
129 | 	run = false;
130 | 	while (cpu.registers[ip] < cpu.bytecodeSize) {
131 | 		Instruction inst;
132 | 		if (fetch(inst)) {
133 | 			if (breakpoints.find(cpu.registers[ip]) != breakpoints.end()) {
134 | 				std::cout << "Breakpoint triggered! " << cpu.registers[ip] << std::endl;
135 | 				run = false;
136 | 				handleInteractive();
137 | 			}
138 | 			else if (!run) {
139 | 				handleInteractive();
140 | 			}
141 | 			if (inst.opcode == Halt) {
142 | 				std::cout << "VM halted!" << std::endl;
143 | 				handleInteractive();
144 | 				break;
145 | 			}
146 | 			if (!execute(inst)) {
147 | 				switch (errorFlag) {
148 | 				case MEMORY_ACCESS:
149 | 					std::cout << "Tried to read/write memory outside of VM!" << std::endl;
150 | 					break;
151 | 				default:
152 | 					std::cout << "Unknown error!" << std::endl;
153 | 				}
154 | 				return false;
155 | 			}
156 | 		}
157 | 		else {
158 | 			std::cout << "Invalid instruction!" << std::endl;
159 | 			return false;
160 | 		}
161 | 	}
162 | 	std::cout << "VM exited with return code: " << cpu.registers[Reg0] << std::endl;
163 | 	handleInteractive();
164 | 	return true;
165 | }
166 | 


--------------------------------------------------------------------------------
/NanoDebugger/NanoDebugger.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "NanoVM.h"
 3 | #include "Instructions.h"
 4 | #include <iostream>
 5 | #include <string>
 6 | #include <set>
 7 | #include <vector>
 8 | #include <fstream>
 9 | // Windows only #include <conio.h>
10 | 
11 | /**
12 |  * \brief NanoDebugger inherits NanoVM allowing more control over the execution of the program
13 |  *
14 |  * NanoDebugger inherits NanoVM implementation and allows to step through the execution, dump stack, set breakpoints
15 |  * disassembling of instructions and other debugger behavior. 
16 | */
17 | class NanoDebugger : NanoVM {
18 | public:
19 | 	/**
20 | 	 * Initializes NanoDebugger
21 | 	 * @param file Bytecode file to load
22 | 	*/
23 | 	NanoDebugger(std::string file);
24 | 
25 | 	/**
26 | 	 * Initializes NanoDebugger
27 | 	 * @param bytecode Bytecode buffer to load
28 | 	 * @param size Size of the bytecode buffer
29 | 	*/
30 | 	NanoDebugger(unsigned char *bytecode, uint64_t size);
31 | 
32 | 	/**
33 | 	 * NanDebugger destructor
34 | 	*/
35 | 	~NanoDebugger();
36 | 
37 | 	/**
38 | 	 * Starts interactive debugging of the loaded bytecode program
39 | 	 * @return True if bytecode program was executed successfully, false if error occurred
40 | 	*/
41 | 	bool debug();
42 | 	//bool disassembleToFile(std::string out);
43 | private:
44 | 
45 | 	/**
46 | 	 * Disassembles the next instruction pointed by IP
47 | 	 * @param[out] instruction String reference to hold the text representation of disassembled instruction
48 | 	 * @return True if instruction was disassembled successfully, false if failed
49 | 	*/
50 | 	bool disassembleInstruction(std::string &instruction);
51 | 
52 | 	/**
53 | 	 * Prints stack dump of the stack memory on screen
54 | 	*/
55 | 	void printStack();
56 | 
57 | 	/**
58 | 	 * Handles interactive mode for the current instruction allowing user to interact with the program
59 | 	 * @return True if successfull, false if failed
60 | 	*/
61 | 	bool handleInteractive();
62 | 
63 | 	std::set<uint64_t> breakpoints; /**< Set of all active breakpoints */
64 | 	bool run; /**< Boolean value whether to run until breakpoint is hit or false if stepping through */
65 | };
66 | 


--------------------------------------------------------------------------------
/NanoUnitTests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # CMakeList.txt : CMake project for NanoVM, include source and define
 2 | # project specific logic here.
 3 | #
 4 | cmake_minimum_required (VERSION 3.8)
 5 | include_directories(../NanoVM)
 6 | # Add source to this project's executable.
 7 | # add_executable (NanoUnitTests "test.cpp" "../NanoAssembler/NanoAssembler.cpp" "../NanoAssembler/NanoAssembler.h" "../NanoVM/NanoVM.cpp" "../NanoVM/NanoVM.h" "NanoDebugger.h" "Instructions.cpp" "Instructions.h" "Debugger.cpp")
 8 | add_executable (NanoUnitTests "test.cpp" "../NanoAssembler/NanoAssembler.cpp" "../NanoAssembler/NanoAssembler.h" "../NanoAssembler/Mapper.h" "../NanoAssembler/Mapper.cpp" "../NanoAssembler/Types.h" "../NanoVM/NanoVM.cpp" "../NanoVM/NanoVM.h")
 9 | # TODO: Add tests and install targets if needed.
10 | 
11 | set_property(TARGET NanoUnitTests PROPERTY CXX_STANDARD 20)
12 | set_property(TARGET NanoUnitTests PROPERTY CXX_STANDARD_REQUIRED ON)
13 | 


--------------------------------------------------------------------------------
/NanoUnitTests/test.cpp:
--------------------------------------------------------------------------------
 1 | #include "../NanoAssembler/NanoAssembler.h"
 2 | #include "../NanoVM/NanoVM.h"
 3 | #include <fstream>
 4 | #include <iostream>
 5 | #include <filesystem>
 6 | namespace fs = std::filesystem;
 7 | 
 8 | /**
 9 |  * This file contains unit tests for NanoVM + NanoAssembler
10 |  * The tests load up Nano assembler files (.nano file extension), assemble those to binary files, run those
11 |  * and verify the results.
12 |  * This way we do not bind the unit tests to bytecode format but rather enforce that the NanoVM bytecode
13 |  * is executed properly and does not contain bugs while allowing us to modify the assembler
14 |  * without breaking the tests
15 | */
16 | 
17 | int runSingleTest(NanoAssembler& assembler, std::string& path) {
18 | 	unsigned char* bytecode;
19 | 	unsigned int length;
20 | 	AssemblerReturnValues ret = assembler.assembleToMemory(path, bytecode, length);
21 | 	// Assembling should succeed
22 | 	if (ret != AssemblerReturnValues::Success)
23 | 		return 1;
24 | 	// Read the assembly file
25 | 	std::string expectedReturnKey = "NANO_TEST_EXPECT_RETURN=";
26 | 	std::ifstream file(path, std::ios::in | std::ios::ate);
27 | 	int expectedValue;
28 | 	if (file.is_open())
29 | 	{
30 | 		unsigned long size = file.tellg();
31 | 		char *memblock = new char[size + 1];
32 | 		file.seekg(0, std::ios::beg);
33 | 		file.read(memblock, size);
34 | 		file.close();
35 | 		memblock[size] = '\0';
36 | 		std::string content(memblock);
37 | 		delete[] memblock;
38 | 		int index = content.find(expectedReturnKey);
39 | 		if (index == -1 || index == content.length() - expectedReturnKey.length()) {
40 | 			// Not a test file
41 | 			return 0;
42 | 		}
43 | 		std::string expectedReturnStr = content.substr(index + expectedReturnKey.length());
44 | 		try {
45 | 			expectedValue = std::stoi(expectedReturnStr);
46 | 		}
47 | 		catch (std::invalid_argument & e) {
48 | 			return 3;
49 | 		}
50 | 		catch (std::out_of_range & e) {
51 | 			return 3;
52 | 		}
53 | 	}
54 | 	else {
55 | 		return 4;
56 | 	}
57 | 	// Fire up the VM
58 | 	NanoVM vm(bytecode, length);
59 | 	int vmValue = vm.Run();
60 | 	if (vmValue == expectedValue) {
61 | 		std::cout << "Test passed: " << path.substr(path.find_last_of("/")) << std::endl;
62 | 		return 0;
63 | 	}
64 | 	std::cout << "Test failed: " << path.substr(path.find_last_of("/")) << " Expected value: " << expectedValue << " but was " << vmValue << std::endl;
65 | 	return 5;
66 | }
67 | 
68 | int runTests() {
69 | 	NanoAssembler assembler;
70 | 	std::string path = "../../../../examples";
71 | 	std::string ending = ".nano";
72 | 	int totalTests = 0;
73 | 	int failedTests = 0;
74 | 	for (const auto& entry : fs::directory_iterator(path)) {
75 | 		std::string path = entry.path().string();
76 | 		std::cout << path << std::endl;
77 | 		if (path.compare(path.length() - ending.length(), ending.length(), ending) == 0) {
78 | 			// Run only test for files with .nano ending
79 | 			int status = runSingleTest(assembler, path);
80 | 			if (status) {
81 | 				failedTests++;
82 | 			}
83 | 			totalTests++;
84 | 		}
85 | 	}
86 | 	if (!failedTests) {
87 | 		// All available tests passed
88 | 		std::cout << "All tests passed! " << totalTests << "/" << totalTests << std::endl;
89 | 		return 0;
90 | 	}
91 | 	std::cout << "Failed tests " << failedTests << " / " << totalTests << std::endl;
92 | 	return 1;
93 | }
94 | 
95 | // main
96 | int main(int argc, char* argv[]) {
97 | 	runTests();
98 | 	return 0;
99 | }


--------------------------------------------------------------------------------
/NanoVM/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | ﻿# CMakeList.txt : CMake project for NanoVM, include source and define
2 | # project specific logic here.
3 | #
4 | cmake_minimum_required (VERSION 3.8)
5 | 
6 | # Add source to this project's executable.
7 | add_executable (NanoVM "Nano.cpp" "NanoVM.cpp" "NanoVM.h")
8 | 
9 | # TODO: Add tests and install targets if needed.


--------------------------------------------------------------------------------
/NanoVM/Nano.cpp:
--------------------------------------------------------------------------------
 1 | #include "NanoVM.h"
 2 | 
 3 | int main(int argc, char* argv[])
 4 | {
 5 | 	if (argc <= 1) {
 6 | 		std::cout << "Usage NanoVM.exe [FILE]" << std::endl;
 7 | 		return 0;
 8 | 	}
 9 | 	NanoVM vm(argv[1]);
10 | 	// Return the VM's exit code
11 | 	return vm.Run();
12 | }


--------------------------------------------------------------------------------
/NanoVM/NanoVM.cpp:
--------------------------------------------------------------------------------
  1 | ﻿#include "NanoVM.h"
  2 | #include <inttypes.h>
  3 | 
  4 | NanoVM::NanoVM(unsigned char* code, uint64_t size) {
  5 | 	cpu.bytecodeSize = size;
  6 | 	// Initialize cpu
  7 | 	memset(&cpu, 0x00, sizeof(cpu));
  8 | 	// Zero out registers
  9 | 	memset(cpu.registers, 0x00, sizeof(cpu.registers));
 10 | 	cpu.codeSize = (NANOVM_PAGE_SIZE * (1 + (size / NANOVM_PAGE_SIZE)));
 11 | 	cpu.stackSize = NANOVM_PAGE_SIZE;
 12 | 	// allocate whole memory, code pages, stack, +10 bytes
 13 | 	// +10 bytes is for instruction fetching which might read more bytes than the instruction size
 14 | 	// This avoids reading memory out side of the VM
 15 | 	cpu.codeBase = (unsigned char*) malloc(cpu.stackSize + 10 + cpu.codeSize);
 16 | 	// Set stack base. Stack grows up instead of down like in x86
 17 | 	cpu.stackBase = cpu.codeBase + cpu.codeSize;
 18 | 	// Zero out stack + the last 10 bytes
 19 | 	memset(cpu.stackBase, 0x00, sizeof(cpu.stackSize) + 10);
 20 | 	// copy the bytecode to the vm
 21 | 	memcpy(cpu.codeBase, code, size);
 22 | 	// Set IP to the beginning of code
 23 | 	cpu.registers[ip] = 0;
 24 | 	cpu.registers[esp] = cpu.codeSize;
 25 | 	cpu.registers[bp] = cpu.codeSize;
 26 | }
 27 | 
 28 | NanoVM::NanoVM(std::string fileName) {
 29 | 	memset(&cpu, 0x00, sizeof(cpu));
 30 | 	// Zero out registers
 31 | 	memset(cpu.registers, 0x00, sizeof(cpu.registers));
 32 | 
 33 | 	std::streampos size;
 34 | 
 35 | 	std::ifstream file(fileName, std::ios::in | std::ios::binary | std::ios::ate);
 36 | 	if (file.is_open())
 37 | 	{
 38 | 		size = file.tellg();
 39 | 
 40 | 		cpu.codeSize = (NANOVM_PAGE_SIZE * (1 + (size / NANOVM_PAGE_SIZE)));
 41 | 		cpu.stackSize = NANOVM_PAGE_SIZE;
 42 | 
 43 | 		// allocate whole memory, code pages, stack, +10 bytes
 44 | 		// +10 bytes is for instruction fetching which might read more bytes than the instruction size
 45 | 		// This avoids reading memory out side of the VM
 46 | 		cpu.codeBase = (unsigned char*)malloc(cpu.stackSize + 10 + cpu.codeSize);
 47 | 
 48 | 		file.seekg(0, std::ios::beg);
 49 | 		file.read((char*)cpu.codeBase, size);
 50 | 		file.close();
 51 | 
 52 | 		cpu.bytecodeSize = size;
 53 | 		// Set stack base. Stack grows up instead of down like in x86
 54 | 		cpu.stackBase = cpu.codeBase + cpu.codeSize;
 55 | 		// Zero out stack + the last 10 bytes
 56 | 		memset(cpu.stackBase, 0x00, sizeof(cpu.stackSize) + 10);
 57 | 		cpu.registers[esp] = cpu.codeSize;
 58 | 		cpu.registers[bp] = cpu.codeSize;
 59 | 		// Set IP to the beginning of code
 60 | 		cpu.registers[ip] = 0;
 61 | 	}
 62 | 	else std::cout << "Unable to open file";
 63 | }
 64 | NanoVM::~NanoVM() {
 65 | 	free(cpu.codeBase);
 66 | }
 67 | 
 68 | template<class T> inline void NanoVM::push(T value) {
 69 | 	// Check bounds
 70 | 	if (sizeof(value) + cpu.registers[esp] >= reinterpret_cast<uint64_t>(cpu.stackBase) + cpu.stackSize) {
 71 | 		// No room in stack.
 72 | 		// Throw error or reallocate more pages
 73 | 		errorFlag = STACK_ERROR;
 74 | 		return;
 75 | 	}
 76 | 	// push to stack
 77 | 	*reinterpret_cast<T*>(cpu.codeBase + cpu.registers[esp]) = value;
 78 | 	// update stack pointer
 79 | 	cpu.registers[esp] += sizeof(value);
 80 | }
 81 | 
 82 | template<class T> inline T NanoVM::pop() {
 83 | 	// Check bounds
 84 | 	if (cpu.registers[esp] - sizeof(T) < cpu.codeSize) {
 85 | 		// Reached the bottom of stack
 86 | 		errorFlag = STACK_ERROR;
 87 | 		return 0;
 88 | 	}
 89 | 	// pop value from stack
 90 | 	T value = *reinterpret_cast<T*>(cpu.codeBase + cpu.registers[esp] - sizeof(T));
 91 | 	// update esp
 92 | 	cpu.registers[esp] -= sizeof(value);
 93 | 	return value;
 94 | }
 95 | 
 96 | uint64_t NanoVM::Run() {
 97 | 	while (true) {
 98 | 		Instruction inst;
 99 | 		if (fetch(inst)) {
100 | 			if (inst.opcode == Halt) {
101 | 				// Return value will be in reg0
102 | 				return cpu.registers[Reg0];
103 | 			}
104 | 			if (!execute(inst)) {
105 | 				// More error flags will be added
106 | 				switch (errorFlag) {
107 | 				case MEMORY_ACCESS:
108 | 					return 1;
109 | 					break;
110 | 				default:
111 | 					return 2;
112 | 				}
113 | 				return false;
114 | 			}
115 | 		}
116 | 		else {
117 | 			return 3;
118 | 		}
119 | 	}
120 | }
121 | 
122 | bool NanoVM::execute(Instruction &inst) {
123 | 	// set source and destination addresses
124 | 	void *dst, *src;
125 | 	bool isDstReg = false;
126 | 	dst = (inst.isDstMem) ? reinterpret_cast<void*>(cpu.codeBase + cpu.registers[inst.dstReg]) : reinterpret_cast<void*>(&cpu.registers[inst.dstReg]);
127 | 	if (inst.srcType == DataType::Reg) {
128 | 		src = (inst.isSrcMem) ? reinterpret_cast<void*>(cpu.codeBase + cpu.registers[inst.srcReg]) : reinterpret_cast<void*>(&cpu.registers[inst.srcReg]);
129 | 	}
130 | 	else {
131 | 		isDstReg = (inst.isDstMem) ? false : true;
132 | 		src = (inst.isSrcMem) ? reinterpret_cast<void*>(cpu.codeBase + inst.immediate) : reinterpret_cast<void*>(&inst.immediate);
133 | 	}
134 | 	// Do bounds check
135 | 	if ((src != &inst.immediate && src != &cpu.registers[inst.srcReg] && (src < cpu.codeBase || src >= cpu.codeBase + cpu.codeSize + cpu.stackSize)) || (dst != &cpu.registers[inst.dstReg] && (dst < cpu.codeBase || dst > cpu.codeBase + cpu.codeSize + cpu.stackSize))) {
136 | 		// Source or destination is out side of VM memory
137 | 		errorFlag = MEMORY_ACCESS;
138 | 		return false;
139 | 	}
140 | 
141 | 	#define MATHOP(INST, OP, SIZE, DSTSIZE) \
142 |     case INST: {         \
143 | 		*reinterpret_cast<DSTSIZE*>(dst) OP *reinterpret_cast<SIZE*>(src); \
144 | 		break; \
145 |     }
146 | 
147 | 	//USIZE is unsigned and SIZE is signed type => e.g. uint8_t and int8_t 
148 | 	#define BRANCH(USIZE, SIZE, DSTSIZE) \
149 | 	switch(inst.opcode) { \
150 | 		MATHOP(Opcodes::Add, +=, USIZE, DSTSIZE) \
151 | 		MATHOP(Opcodes::Mov, =, USIZE, DSTSIZE) \
152 | 		MATHOP(Opcodes::Sub, -=, USIZE, DSTSIZE) \
153 | 		MATHOP(Opcodes::Xor, ^=, USIZE, DSTSIZE) \
154 | 		MATHOP(Opcodes::And, &=, USIZE, DSTSIZE) \
155 | 		MATHOP(Opcodes::Or, |=, USIZE, DSTSIZE) \
156 | 		MATHOP(Opcodes::Sar, >>=, USIZE, DSTSIZE) \
157 | 		MATHOP(Opcodes::Sal, <<=, USIZE, DSTSIZE) \
158 | 		MATHOP(Opcodes::Div, /=, USIZE, DSTSIZE) \
159 | 		MATHOP(Opcodes::Mul, *=, USIZE, DSTSIZE) \
160 | 		MATHOP(Opcodes::Mod, %=, USIZE, DSTSIZE) \
161 | 	case Opcodes::Printi: \
162 | 		std::printf("%" PRIu64 "", *reinterpret_cast<USIZE*>(src)); \
163 | 		break; \
164 | 	case Opcodes::Prints: \
165 | 		std::printf("%s", src); \
166 | 		break; \
167 | 	case Opcodes::Printc: \
168 | 		std::printf("%c", *reinterpret_cast<USIZE*>(src)); \
169 | 		break; \
170 | 	case Opcodes::Inc: \
171 | 		*reinterpret_cast<USIZE*>(src) += 1; \
172 | 		break; \
173 | 	case Opcodes::Dec: \
174 | 		*reinterpret_cast<USIZE*>(src) -= 1; \
175 | 		break; \
176 | 	case Opcodes::Push: \
177 | 		push(*reinterpret_cast<USIZE*>(src)); \
178 | 		break; \
179 | 	case Opcodes::Pop: \
180 | 		*reinterpret_cast<USIZE*>(dst) = pop<USIZE>(); \
181 | 		break; \
182 | 	case Opcodes::Jz: \
183 | 		if (cpu.registers[flags] & ZERO_FLAG) { \
184 | 			cpu.registers[ip] += *reinterpret_cast<SIZE*>(src); \
185 | 			return true; \
186 | 		} \
187 | 		break; \
188 | 	case Opcodes::Jnz: \
189 | 		if (!(cpu.registers[flags] & ZERO_FLAG)) { \
190 | 			cpu.registers[ip] += *reinterpret_cast<SIZE*>(src); \
191 | 			return true; \
192 | 		} \
193 | 		break; \
194 | 	case Opcodes::Jg: \
195 | 		if (cpu.registers[flags] & GREATER_FLAG) { \
196 | 			cpu.registers[ip] += *reinterpret_cast<SIZE*>(src); \
197 | 			return true; \
198 | 		} \
199 | 		break; \
200 | 	case Opcodes::Js: \
201 | 		if (cpu.registers[flags] & SMALLER_FLAG) { \
202 | 			cpu.registers[ip] += *reinterpret_cast<SIZE*>(src); \
203 | 			return true; \
204 | 		} \
205 | 		break; \
206 | 	case Opcodes::Jmp: \
207 | 		cpu.registers[ip] += *reinterpret_cast<SIZE*>(src); \
208 | 		return true; \
209 | 	case Opcodes::Call: \
210 | 		push(cpu.registers[ip] + inst.instructionSize); \
211 | 		cpu.registers[ip] += *reinterpret_cast<SIZE*>(src); \
212 | 		return true; \
213 | 	case Opcodes::Ret: \
214 | 		cpu.registers[ip] = pop<uint64_t>(); \
215 | 		return true; \
216 | 	case Opcodes::Cmp: \
217 | 		if (*reinterpret_cast<USIZE*>(dst) == *reinterpret_cast<USIZE*>(src)) \
218 | 			cpu.registers[flags] = ZERO_FLAG; \
219 | 		else if (*reinterpret_cast<USIZE*>(dst) > *reinterpret_cast<USIZE*>(src)) \
220 | 			cpu.registers[flags] = GREATER_FLAG; \
221 | 		else \
222 | 			cpu.registers[flags] = SMALLER_FLAG; \
223 | 		break; \
224 | 	default: \
225 | 		return false; \
226 | 	}
227 | 
228 | 
229 | 	switch (inst.srcSize) {	
230 | 	case Size::Byte: 
231 | 		if (isDstReg) {
232 | 			BRANCH(uint8_t, int8_t, uint64_t);
233 | 		}
234 | 		else {
235 | 			BRANCH(uint8_t, int8_t, uint8_t);
236 | 		}
237 | 		break; 
238 | 	case Size::Short: 
239 | 		if (isDstReg) {
240 | 			BRANCH(uint16_t, int16_t, uint64_t);
241 | 		}
242 | 		else {
243 | 			BRANCH(uint16_t, int16_t, uint16_t);
244 | 		}
245 | 		break;
246 | 	case Size::Dword: 
247 | 		if (isDstReg) {
248 | 			BRANCH(uint32_t, int32_t, uint64_t);
249 | 		}
250 | 		else {
251 | 			BRANCH(uint32_t, int32_t, uint32_t);
252 | 		}
253 | 		break;
254 | 	default:
255 | 		BRANCH(uint64_t, int64_t, uint64_t);
256 | 		break;
257 | 	} 
258 | 	cpu.registers[ip] += inst.instructionSize;
259 | 	return true;
260 | }
261 | 
262 | bool NanoVM::fetch(Instruction &inst) const {
263 | 	// Read 64bit to try and minimize the required memory reading
264 | 	// This increases the performance
265 | 
266 | 	// Sanity check the ip that it is within code page
267 | 	if (cpu.registers[ip] >= cpu.codeSize) {
268 | 		std::cout << "IP out of bounds" << std::endl;
269 | 		return false;
270 | 	}
271 | 	// Parse the instruction
272 | 	unsigned char* rawIp = cpu.codeBase + cpu.registers[ip];
273 | 	uint64_t value = *reinterpret_cast<uint64_t*>(rawIp);
274 | 	inst.opcode   =  (value & (unsigned char)OPCODE_MASK);
275 | 	inst.dstReg   =  ((value & DST_REG_MASK) >> 5);
276 | 	inst.srcType  =  (value >> 8) & SRC_TYPE_MASK;
277 | 	inst.srcReg   =   (value >> 8) & SRC_REG_MASK;
278 | 	inst.srcSize  =  ((value >> 8) & SRC_SIZE_MASK) >> 5;
279 | 	inst.isDstMem =  ((value >> 8) & DST_MEM_MASK);
280 | 	inst.isSrcMem =  ((value >> 8) & SRC_MEM_MASK);
281 | 	// If source is immediate value, read it to the instruction struct
282 | 	if (inst.srcType) {
283 | 		// If the immediate value fit in the initial value. Parse it with bitshift. It is faster than reading memory again
284 | 		switch (inst.srcSize) {
285 | 		case Byte:
286 | 			inst.immediate = (uint8_t)(value >> 16);
287 | 			inst.instructionSize = 3;
288 | 			break;
289 | 		case Short:
290 | 			inst.immediate = (uint16_t)(value >> 16);
291 | 			inst.instructionSize = 4;
292 | 			break;
293 | 		case Dword:
294 | 			inst.immediate = (uint32_t)(value >> 16);
295 | 			inst.instructionSize = 6;
296 | 			break;
297 | 		case Qword:
298 | 			// In the case of qword we have to perform another read operations
299 | 			inst.immediate = *(uint64_t*)(rawIp + 2);
300 | 			inst.instructionSize = 10;
301 | 			break;
302 | 		}
303 | 	}
304 | 	else {
305 | 		inst.instructionSize = 2;
306 | 	}
307 | 	return true;
308 | 
309 | }


--------------------------------------------------------------------------------
/NanoVM/NanoVM.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <iostream>
  4 | #include <fstream>
  5 | #include <cstring>
  6 | #include <cstdint>
  7 | 
  8 | // VM masks and constants
  9 | constexpr uint32_t NANOVM_PAGE_SIZE	= 4096;
 10 | constexpr uint8_t OPCODE_MASK	= 0b00011111;
 11 | constexpr uint8_t DST_REG_MASK	= 0b11100000;
 12 | constexpr uint8_t SRC_TYPE_MASK	= 0b10000000;
 13 | constexpr uint8_t SRC_SIZE_MASK = 0b01100000;
 14 | constexpr uint8_t DST_MEM_MASK  = 0b00010000;
 15 | constexpr uint8_t SRC_MEM_MASK  = 0b00001000;
 16 | constexpr uint8_t SRC_REG_MASK  = 0b00000111;
 17 | 
 18 | // Error flags
 19 | constexpr uint8_t STACK_ERROR	= 0b10000000;
 20 | constexpr uint8_t IP_ERROR		= 0b01000000;
 21 | constexpr uint8_t MEMORY_ACCESS = 0b00100000;
 22 | 
 23 | // Comparison flags
 24 | constexpr uint8_t ZERO_FLAG		= 0b10000000;
 25 | constexpr uint8_t GREATER_FLAG	= 0b01000000;
 26 | constexpr uint8_t SMALLER_FLAG	= 0b00100000;
 27 | 
 28 | /**
 29 |  * Register enum defines all the CPU registers + flags and instruction pointer
 30 | */
 31 | enum Register {
 32 | 	Reg0,
 33 | 	Reg1,
 34 | 	Reg2,
 35 | 	Reg3,
 36 | 	Reg4,
 37 | 	Reg5,
 38 | 	bp, // base pointer for current stack frame
 39 | 	esp, 
 40 | 	ip,
 41 | 	flags
 42 | };
 43 | 
 44 | /**
 45 |  * Opcodes enum defines all the implemented opcodes for the NanoVM. Limited to 5 bits aka 32 opcodes
 46 | */
 47 | enum Opcodes {
 48 | 	Mov,
 49 | 	Add,
 50 | 	Sub,
 51 | 	And,
 52 | 	Or,
 53 | 	Xor,
 54 | 	Sar,
 55 | 	Sal,
 56 | 	Ror,
 57 | 	Rol,
 58 | 	Mul,
 59 | 	Div,
 60 | 	Mod,
 61 | 	Cmp,
 62 | 
 63 | 	Jz,
 64 | 	Jnz,
 65 | 	Jg,
 66 | 	Js,
 67 | 	Jmp,
 68 | 	Not,
 69 | 	Inc,
 70 | 	Dec,
 71 | 	Ret,
 72 | 
 73 | 	Call,
 74 | 	Push,
 75 | 	Pop,
 76 | 	Halt,
 77 | 	Printi,
 78 | 	Prints,
 79 | 	Printc,
 80 | 	Syscall,
 81 | 	Memcpy
 82 | };
 83 | 
 84 | #ifndef TYPE_H
 85 | #define TYPE_H
 86 | 
 87 | /**
 88 |  * Size enum defines used integer sizes which are 8, 16, 32, 64 bits
 89 | */
 90 | enum Size {
 91 | 	Byte,
 92 | 	Short,
 93 | 	Dword,
 94 | 	Qword
 95 | };
 96 | 
 97 | /**
 98 |  * DataType enum defines data types for the instructions which can be register or immediate value
 99 | */
100 | enum DataType {
101 | 	Reg,
102 | 	Immediate
103 | };
104 | 
105 | #endif // !TYPE_H
106 | 
107 | /**
108 |  * NanoVMCpu struct defines the CPU core which holds registers and pointers to code base, stack base and their respective sizes
109 | */
110 | struct NanoVMCpu{
111 | 	uint64_t registers[10]; /**< CPU registers + IP and flags */
112 | 	unsigned char* codeBase; /**< Pointer to the base of the VM memory */
113 | 	unsigned char* stackBase; /**< Pointer to the base of the stack */
114 | 	uint64_t codeSize; /**< Size of the allocated VM memory including stack */
115 | 	uint64_t stackSize; /**< Size of the allocated stack memory */
116 | 	uint64_t bytecodeSize; /**< Size of the loaded bytecode */
117 | };
118 | 
119 | /**
120 |  * Instruction holds a single fetched instruction which the VM can run
121 | */
122 | struct Instruction {
123 | 	unsigned char opcode; /**< Opcode of the instruction */
124 | 	unsigned char dstReg; /**< Destination register which is defined in the 1st byte of instruction with opcode */ 
125 | 	unsigned char srcReg; /**< Source register (optional) */
126 | 	unsigned char srcType; /**< Source value type reg/immediate (optional) */
127 | 	bool isDstMem; /**< Is destination register pointer to memory */
128 | 	bool isSrcMem; /**< Is source value pointer to memory */
129 | 	unsigned char srcSize; /**< Size of the source value (optional) */
130 | 	uint64_t immediate; /**< Immediate value aka source value (optinal) */
131 | 	unsigned char instructionSize; /**< Size of this instruction. This allows the vm to adjust the IP accordingly */
132 | };
133 | 
134 | typedef struct NanoVMCpu NanoVMCpu;
135 | typedef struct Instruction Instruction;
136 | 
137 | /**
138 |  * \brief NanoVM is the VM core which will load and run nano bytecode
139 |  *
140 |  * NanoVM is the VM core which will load the bytecode, allocate and initialize memory and the CPU.
141 |  * It implements feching and executing of instructions, stack memory handling and running of the bytecode
142 | */
143 | class NanoVM {
144 | public:
145 | 	/**
146 | 	 * \brief Initializes the NanoVM from bytecode
147 | 	 * @param code Points to the bytecode to be loaded
148 | 	 * @param size Holds the size of the bytecode to be loaded
149 | 	*/
150 | 	NanoVM(unsigned char* code, uint64_t size);
151 | 
152 | 	/**
153 | 	 * Initializes the NanoVM from bytecode file
154 | 	 * @param file File to load the bytecode from
155 | 	*/
156 | 	NanoVM(std::string file);
157 | 
158 | 	/**
159 | 	 * NanoVM destructor
160 | 	*/
161 | 	~NanoVM();
162 | 
163 | 	/**
164 | 	 * Runs the whole loaded bytecode program
165 | 	 * @return Return value of the bytecode program
166 | 	*/
167 | 	uint64_t Run();
168 | protected:
169 | 	/**
170 | 	 * Pops a value from the stack and adjusts the stack pointer
171 | 	 * @return Single value from the stack
172 | 	*/
173 | 	template<class T> T pop();
174 | 
175 | 	/**
176 | 	 * Pushes a value to the stack
177 | 	 * @param value Value to push to the stack
178 | 	*/
179 | 	template<class T> void push(T value);
180 | 
181 | 	/**
182 | 	 * Fetches the next instruction pointed by the instruction pointer (IP). Note that fetch does not check if the instruction is valid
183 | 	 * @param[out] Reference to instruction struct to be updated
184 | 	 * @return True if instruction was fetched successfully, false if failed (e.g IP out of bounds)
185 | 	*/
186 | 	bool fetch(Instruction &instruction) const;
187 | 
188 | 	/**
189 | 	 * Executes a single instruction and updates the internal state of the VM including IP
190 | 	 * @param instruction Instruction to be executed
191 | 	 * @return True if the instruction was executed successfully, false if the instruction was not valid or an error occurred
192 | 	*/
193 | 	bool execute(Instruction &instruction);
194 | 
195 | 	unsigned char errorFlag; /**< 8 bit flag that will be set with error masks if an error occurs */
196 | 	NanoVMCpu cpu; /**< Holds the internal state of the CPU */
197 | };
198 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # NanoVM
  2 | PoC lightweight x64 VM implementation
  3 | 
  4 | ### Table of contents
  5 | 
  6 | - [NanoVM](#nanovm)
  7 |   * [General](#general)
  8 |   * [How to build](#how-to-build)
  9 |     + [Windows (Visual Studio 2019)](#windows--visual-studio-2019-)
 10 |     + [Debian](#debian)
 11 |   * [VM architecture](#vm-architecture)
 12 |     + [Registers](#registers)
 13 |     + [Instructions](#instructions)
 14 | - [NanoAssembler](#nanoassembler)
 15 | - [NanoDebugger](#nanodebugger)
 16 | 
 17 | ## General 
 18 | 
 19 | NanoVM is cross-platform register based turing complete VM with stack memory. The project also includes assembler and debugger with similiar syntax to x86 asm with intel syntax. 
 20 | Note that the project is still in very early development and many things including the insturction set and format is a subject to change, so bytecode from previous versions might not work in future. 
 21 | The documentation will be updated when changes happen.
 22 | 
 23 | The longer term goal of the project is to be embeddable VM with a small bytecode format while maintaining reasonable performance speed. 
 24 | Syscall instruction that contains some implemented functions like IO but user can register custom functions as callbacks for different syscall function values will be added eventually when the bytecode format has been finalized. 
 25 | This allows one to implement more "outside of the VM" functionality". Performance comparison tests to other languages will be added later. 
 26 | Longer term goal is to eventually actually program the Compiler/Assembler in NanoVM bytecode. 
 27 | 
 28 | Note that even though the VM does do bounds checking for read write and execute operations on memory these checks are more for catching bugs in the code + avoiding VM crashing, and not so much about hardening the VM. 
 29 | Escaping the VM sandbox is likely very trivial. 
 30 | However, if you notice a way to read, write or execute memory outside of the VM I'll gladly fix those. 
 31 | That being said **!this VM should not be used to run unknown and potentially hostile code!**. 
 32 | Also stuff like executing stack memory is currently possible and this is made on purpose to allow dynamic code generation or encryption. 
 33 | Read/write/execute permissions to memory pages might be added in future.
 34 | 
 35 | ## How to build
 36 | 
 37 | Build instructions have been tested on Windows and Debian based linux distros
 38 | 
 39 | ### Windows (Visual Studio 2019)
 40 | 
 41 | You need to have Visual Studio 2019 and cmake installed on your system.\
 42 | Visual Studio 2019 is compatible with cmake projects so you can build the project by opening the project in visual studio, right click the root CMakeLists.txt -> "Generate Cache for NanoVM". This will generate the cmake cache for you and now you can build the project by selecting 
 43 | from the menu bar: Build -> Build all.\
 44 | If you rather wish to generate visual studio specific build files you can do that by running the following command in the project root with cmd/powershell:
 45 | 
 46 | ```
 47 | cmake . -B ./build
 48 | ```
 49 | 
 50 | This will generate new Visual Studio build files under build/
 51 | 
 52 | ### Debian
 53 | 
 54 | You need to have build tools and cmake available. You can install those by running the following commands in terminal 
 55 | ```
 56 | sudo apt install build-essentials
 57 | sudo apt install cmake
 58 | ```
 59 | Now to build the project run the following commands
 60 | ```
 61 | git clone https://github.com/etsubu/NanoVM.git
 62 | cd NanoVM
 63 | cmake .
 64 | make
 65 | ```
 66 | This will build all the binaries in their own folders along the source files.
 67 | 
 68 | ## VM architecture
 69 | 
 70 | The VM memory are defined as pages which by default are 4096 bytes each. When initialized the VM bytecode will be placed at the bottom of the allocated memory followed by the stack memory base on the next page. While the VM is similiar to x86 the stack grows up unlike in x86. This can be utilized to dynamically increase the stack memory if required with minimal effort.
 71 | 
 72 | ### Registers
 73 | The VM is register based so the instuctions utilize different registers. Registers are encoded with 3 bits so there are 8 registers in total (the names will change in future):
 74 | 
 75 | | Register        | Number        | Description                                  |
 76 | | -------------   |:-------------:| --------------------------------------------:|
 77 | | Reg0            | 0             | General purpose. Used to store return values |
 78 | | Reg1            | 1             | General purpose.                             |
 79 | | Reg2            | 2             | General purpose.                             |
 80 | | Reg3            | 3             | General purpose.                             |
 81 | | Reg4            | 4             | General purpose.                             |
 82 | | Reg5            | 5             | General purpose.                             |
 83 | | Reg6            | 6             | General purpose.                             |
 84 | | Esp             | 7             | Stack pointer. Points to the top of the stack|
 85 | 
 86 | ### Instructions
 87 | Instructions have always an opcode and 0-2 operands. Below is the instruction encoding defined from LSB to MSB
 88 | 
 89 | | 5 bits           | 3 bits                | 1 bit             | 2 bits                      | 1 bits        | 1 bit         | 3 bits        |
 90 | | -------------    |:---------------------:|:-----------------:|:---------------------------:|:-------------:|:-------------:|:-------------:|
 91 | | Opcode           | Destination register  | Source type       | Source size                 | Is_Dst_pointer| Is_Src_pointer|Source register|
 92 | | What instruction | Update this register  | Reg=0, Immediate=1| Byte, short, dword, qword   | True,false    | True, false   | Source register if src type is reg|
 93 | 
 94 | So most of the instructions are encoded in 2 bytes + immediate value if used. Instructions that use zero operands effectively being only 1 byte are:
 95 | ```assembly
 96 | Halt ; Stops the execution and exits the VM execution
 97 | ret ; Pops value from the top of the stack and performs absolute jump to that address. Updates stack pointer
 98 | ```
 99 | Instructions that use 1 operand do not use either source register or immediate value. They do not use destination register even though it is always defined. Opcodes that use 1 operand:
100 | ```assembly
101 | 	Jz; Jump if zero flag is set. Example: jz reg0
102 | 	Jnz; Jump if zero flag is not set. Example: jnz reg0
103 | 	Jg;  Jump if greater flag is set. Example: jg reg0
104 | 	Js;  Jump if smaller flag is set. Example: js reg0
105 | 	Jmp; Jump ("goto") instruction. Example: jmp reg0
106 | 	Not; Flip the bits in value. Example: not reg0
107 | 	Inc; Increases the value by one: Example inc reg0
108 | 	Dec; Decreases the value by one: Example dec reg0
109 | 	Call; Pushes the next instructions absolute memory address to the stack and performs relative jump to the given address. Updates stack pointer Example: call reg0
110 | 	Push; Pushes value to the top of the stack. Example: push reg0
111 | 	Pop; Pops value from the top of the stack and moves the value to given address. Example: pop reg0
112 | 	Printi; prints given integer. Example: printi reg0
113 | 	Prints; prints given null terminated string. Example: prints @reg0 | Note that @reg0 uses reg0 as pointer to the string not as an absolute value
114 | 	Printc; prints given ASCII char to the console. Example printc reg0
115 | ```
116 | Instructions with 2 operands:
117 | ```assembly
118 | 	Mov; mov reg0, reg0 <=> reg0 = reg0
119 | 	Add; add reg0, reg0 <=> reg0 += reg0
120 | 	Sub; mov reg0, reg0 <=> reg0 -= reg0
121 | 	And; mov reg0, reg0 <=> reg0 &= reg0
122 | 	Or;  or reg0, reg0 <=> reg0 |= reg0
123 | 	Xor; xor reg0, reg0 <=> reg0 ^= reg0
124 | 	Sar; sar reg0, reg0 <=> reg0 >>= reg0
125 | 	Sal; sal reg0, reg0 <=> reg0 <<= reg0
126 | 	Ror; ror reg0, reg0 <=> performs circular shift to the right on reg0, by reg0 times
127 | 	Rol; rol reg0, reg0 <=> performs circular shift to the left on reg0, by reg0 times
128 | 	Mul; mul reg0, reg0 <=> reg0 *= reg0
129 | 	Div; div reg0, reg0 <=> reg0 /= reg0
130 | 	Mod; mod reg0, reg0 <=> reg0 %= reg0
131 | 	Cmp; cmp reg0, reg1 | Compares the 2 values and sets flags depending on the comparison.
132 | ```
133 | ToDo:
134 | * Remove print instructions and move them under the syscall instruction to operate with stream pointers. This allows the printing to support console IO and for example file IO
135 | * Implement syscall instruction
136 | 
137 | # NanoAssembler
138 | NanoAssembler is currently a minimalistic assembler for NanoVM. The assembler was made to aid in making simple programs and tests. This project is not so much about making a "programming language" but rather the core VM which could be used as the base which some programming language is compiled to. When more advanced features will be introduced I'll consider creating a new compiler project and leave the assembler for the low level operations.
139 | Currently the assembler supports comments with prefix ';' and uses regex to filter multiple whitespaces to help in processing the input. The assembler also suppors labels which are defined by ':' prefix. This will be mapped to a memory address that points to the next instruction after label. Example:
140 | ```assembly
141 | ; The assembler supports comments
142 | ; The assembler strips multiple whitespaces
143 | ;          xor        reg0,     reg1 
144 | ; The above line would be translated to the one below. So the assembler is not sensitive with whitespaces
145 | xor reg0, reg0 ; zero out reg0
146 | :label
147 | printi reg0 ; Label points here
148 | printc '\n' ; The assembler can map characters defined with '' and special characters line \n \r \t to their ascii values
149 | ; The above line is the same as printc 10
150 | inc reg0 ; reg0++
151 | cmp reg0, 0x10 ; compare reg0 to 0x10 in hex which is the same as cmp reg0, 10
152 | ; The assembler understands base10 and base16 values
153 | jnz label    ; if reg0 != 10 jump to label
154 | ; The above code will print numbers
155 | ```
156 | ToDo:
157 | * Add macros. These would help to reduce the amount of code that needs to be written.
158 | * Add include tags which would allow to write "standard libraries" which could be included to the project
159 | * Size definitions for registers
160 | * ...
161 | 
162 | The assembler projects code is not currently clean and the development for that will be most likely be stopped eventually and a new compiler project will be started. Probably with external library for parsing the programming language. I will try and keep the assembler simple
163 | 
164 | # NanoDebugger
165 | 
166 | The project contains also a simple command line debugger + disassembler. The debugger inherits the NanoVM core and is capable of stepping through the programs. It also supports:
167 | * Breakpoints
168 | * Goto. This allows you to change the current instruction pointer
169 | * print registers. This will print the current register values and flags set by cmp
170 | * Print stack. This will print the stack memory up to the stack pointer. Each line of the dump will be 8 hex values followed by the same values in ascii separated by |. This allows to easily look at potential ASCII strings in stack as well as 64bit integers.
171 | Todo:
172 | * Add commands for modifying the stack and registers
173 | * Add whole memory dump which will dump all the memory pages including code and stack to the disk.
174 | * Add option to disassemble the whole code and dump to the disk with memory offsets
175 | 


--------------------------------------------------------------------------------
/examples/HelloWorld.nano:
--------------------------------------------------------------------------------
 1 | mov reg0, esp
 2 | push 'h'
 3 | push 'e'
 4 | push 'l'
 5 | push 'l'
 6 | push 'o'
 7 | push 32
 8 | push 'w'
 9 | push 'o'
10 | push 'r'
11 | push 'l'
12 | push 'd'
13 | push 0x00
14 | prints @reg0
15 | ; prints hello world
16 | halt


--------------------------------------------------------------------------------
/examples/SieveOfEratosthenes.nano:
--------------------------------------------------------------------------------
 1 | ; This program will calculate prime numbers with sieve of eratosthenes and prints them to the console
 2 | 
 3 | mov reg2, 300 ;print prime numbers under This value
 4 | mov bp, esp ; set base pointer
 5 | ; initialize numbers
 6 | :init
 7 | push reg0
 8 | inc reg0
 9 | cmp reg0, reg2
10 | js init
11 | ; stack is now allocated
12 | ; iterate through array
13 | mov reg3, bp ; set to beginning of array
14 | add reg3, 16 ; skip 2 values
15 | :loop
16 | cmp @reg3, 0
17 | jz waszero
18 | mov reg4, @reg3 ;reg2 = array[reg3]
19 | mul reg4, 8
20 | mov reg2, reg4
21 | add reg2, bp
22 | :subloop
23 | add reg2, reg4
24 | cmp reg2, esp
25 | jg waszero
26 | jz waszero
27 | mov reg5, @reg2
28 | mod reg5, @reg3 ; modulo
29 | cmp reg5, 0 ;was divided evenly => zero out
30 | jnz nozero
31 | xor @reg2, @reg2
32 | 
33 | :nozero
34 | jmp subloop
35 | ; loop through multiples of reg3
36 | 
37 | :waszero
38 | ;printi reg3
39 | ;printc '\n'
40 | add reg3, 8
41 | cmp reg3, esp
42 | jnz loop
43 | ;print prime numbers
44 | mov reg3, bp
45 | add reg3, 16
46 | 
47 | :printloop
48 | mov reg0, @reg3
49 | cmp reg0, 0
50 | jz notprime
51 | printi reg0
52 | mov reg1, reg0 ; save last prime
53 | printc '\n'
54 | :notprime
55 | add reg3, 8
56 | cmp reg3, esp
57 | js printloop
58 | mov reg0, reg1 ; return value is the last prime
59 | halt
60 | ; NANO_TEST_EXPECT_RETURN=293


--------------------------------------------------------------------------------
/examples/arithmetic.nano:
--------------------------------------------------------------------------------
1 | mov reg0, 5 	;reg0 is now 5
2 | add reg1, reg0 	;reg1 is now 5
3 | mov reg3, 3
4 | sub reg0, reg3  ;reg0 is now 2
5 | mul reg0, reg1  ;reg0 is now 10
6 | div reg0, 2		;reg0 is now 5
7 | halt
8 | ; NANO_TEST_EXPECT_RETURN=5


--------------------------------------------------------------------------------
/examples/fibonacciSequence.nano:
--------------------------------------------------------------------------------
 1 | ;This code will print fibonacci sequence and return the last number
 2 | 
 3 | ;use reg0 and reg1 for storing numbers
 4 | ;set reg3 as counter for how many pairs should be printed
 5 | mov reg3, 10
 6 | ;set reg1 to 1
 7 | inc reg1
 8 | ;label could be placed here
 9 | :loop
10 | printi reg0
11 | printc '\n'
12 | add reg0, reg1
13 | printi reg1
14 | mov reg4, reg1
15 | printc '\n'
16 | add reg1, reg0
17 | dec reg3 ;subtract 1 from reg3
18 | cmp reg3, reg5 ; compare to 0 (reg5 is initialized as 0)
19 | jnz loop
20 | mov reg0, reg4
21 | halt
22 | ; NANO_TEST_EXPECT_RETURN=4181


--------------------------------------------------------------------------------
/examples/labels.nano:
--------------------------------------------------------------------------------
1 | ;define label
2 | :loop
3 | printi reg0
4 | inc reg0
5 | cmp reg0, 5
6 | jnz loop
7 | ; NANO_TEST_EXPECT_RETURN=5


--------------------------------------------------------------------------------
/examples/labels2.nano:
--------------------------------------------------------------------------------
1 | cmp reg0, 5
2 | jnz end
3 | sub reg0, 1
4 | :end
5 | inc reg0
6 | ; NANO_TEST_EXPECT_RETURN=1


--------------------------------------------------------------------------------
/examples/labels3.nano:
--------------------------------------------------------------------------------
 1 | mov reg0, 10
 2 | :loop1
 3 | dec reg0
 4 | mov reg1, reg0
 5 | mod reg1, 2
 6 | cmp reg1, 0
 7 | jnz noprint
 8 | printi reg0
 9 | :noprint
10 | cmp reg0, 0
11 | jnz loop1
12 | halt
13 | ; NANO_TEST_EXPECT_RETURN=0


--------------------------------------------------------------------------------
/examples/labels4.nano:
--------------------------------------------------------------------------------
 1 | mov reg0, 10
 2 | :loop1
 3 | dec reg0
 4 | mov reg1, reg0
 5 | mod reg1, 2
 6 | cmp reg1, 0
 7 | jnz end
 8 | printi reg0
 9 | :noprint
10 | cmp reg0, 0
11 | jnz loop1
12 | :end
13 | halt
14 | ; NANO_TEST_EXPECT_RETURN=9


--------------------------------------------------------------------------------
/examples/loop.nano:
--------------------------------------------------------------------------------
1 | printi reg0
2 | add reg0, 1
3 | cmp reg0, 5
4 | js -8
5 | halt
6 | ; NANO_TEST_EXPECT_RETURN=5


--------------------------------------------------------------------------------