├── .gitignore ├── CMakeLists.txt ├── README.md ├── src ├── code │ ├── binaryFileParser.cpp │ ├── binaryFileParser.hpp │ ├── codeObject.cpp │ └── codeObject.hpp ├── main.cpp ├── object │ ├── lyxInteger.cpp │ ├── lyxInteger.hpp │ ├── lyxObject.hpp │ ├── lyxString.cpp │ └── lyxString.hpp └── util │ ├── arrayList.cpp │ ├── arrayList.hpp │ └── bufferedInputStream.hpp └── test └── hello.py /.gitignore: -------------------------------------------------------------------------------- 1 | /.vscode 2 | /build/ 3 | /bin/ 4 | /test/*.pyc -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | 3 | project(LyxPythonVM) 4 | 5 | set (EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) 6 | 7 | if(CMAKE_BUILD_TYPE MATCHES Debug) 8 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -w -msse3 -funroll-loops -std=c++11 -O0 -pg" ) 9 | else() 10 | SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -w -std=c++11 -O2 -march=native" ) 11 | endif() 12 | 13 | include_directories(src/) 14 | 15 | set(SRCS src/main.cpp 16 | src/code/binaryFileParser.cpp 17 | src/code/codeObject.cpp 18 | src/object/lyxInteger.cpp 19 | src/object/lyxString.cpp 20 | src/util/arrayList.cpp) 21 | 22 | add_executable(vm ${SRCS}) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LyxPythonVM 2 | 🐍 Python Virtual Machine from Scratch 3 | 👨‍💻 Currently in development 4 | If you have any question about LyxPythonVM, feel free to post an issue or send me an email: -------------------------------------------------------------------------------- /src/code/binaryFileParser.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "binaryFileParser.hpp" 5 | 6 | BinaryFileParser::BinaryFileParser(BufferedInputStream *buf_file_stream) { 7 | file_stream = buf_file_stream; 8 | } 9 | 10 | CodeObject *BinaryFileParser::parse() { 11 | int magic_number = file_stream->read_init(); 12 | printf("magic number is 0x%x\n", magic_number); 13 | int moddate = file_stream->read_init(); 14 | printf("moddate is 0x%x\n", moddate); 15 | 16 | char object_type = file_stream->read(); 17 | 18 | if (object_type == 'c') { 19 | CodeObject *result = get_code_object(); 20 | printf("parse OK!\n"); 21 | return result; 22 | } 23 | 24 | return NULL; 25 | } 26 | 27 | CodeObject *BinaryFileParser::get_code_object() { 28 | int argcount = file_stream->read_init(); 29 | int nlocals = file_stream->read_init(); 30 | int stacksize = file_stream->read_init(); 31 | int flags = file_stream->read_init(); 32 | printf("flags is 0x%x\n", flags); 33 | 34 | LyxString *byte_codes = get_byte_codes(); 35 | ArrayList *consts = get_consts(); 36 | ArrayList *names = get_names(); 37 | ArrayList *var_names = get_var_names(); 38 | ArrayList *free_vars = get_free_vars(); 39 | ArrayList *cell_vars = get_cell_vars(); 40 | 41 | LyxString *file_name = get_file_name(); 42 | LyxString *module_name = get_name(); 43 | int begin_line_no = file_stream->read_init(); 44 | LyxString *lnotab = get_no_table(); 45 | 46 | return new CodeObject(argcount, nlocals, stacksize, flags, byte_codes, consts, 47 | names, var_names, free_vars, cell_vars, file_name, 48 | module_name, begin_line_no, lnotab); 49 | } 50 | 51 | LyxString *BinaryFileParser::get_string() { 52 | int length = file_stream->read_init(); 53 | char *str_value = new char[length]; 54 | 55 | for (int i = 0; i < length; i++) { 56 | str_value[i] = file_stream->read(); 57 | } 58 | 59 | LyxString *s = new LyxString(str_value, length); 60 | delete[] str_value; 61 | 62 | return s; 63 | } 64 | 65 | LyxString *BinaryFileParser::get_name() { 66 | char ch = file_stream->read(); 67 | 68 | if (ch == 's') { 69 | return get_string(); 70 | } else if (ch == 't') { 71 | LyxString *str = get_string(); 72 | _string_table.add(str); 73 | return str; 74 | } else if (ch == 'R') { 75 | return _string_table.get(file_stream->read_init()); 76 | } 77 | 78 | return NULL; 79 | } 80 | 81 | LyxString *BinaryFileParser::get_file_name() { return get_name(); } 82 | 83 | LyxString *BinaryFileParser::get_byte_codes() { 84 | if (file_stream->read() != 's') { 85 | return NULL; 86 | } 87 | 88 | return get_string(); 89 | } 90 | 91 | LyxString *BinaryFileParser::get_no_table() { 92 | char ch = file_stream->read(); 93 | 94 | if (ch != 's' && ch != 't') { 95 | file_stream->unread(); 96 | return NULL; 97 | } 98 | 99 | return get_string(); 100 | } 101 | 102 | ArrayList *BinaryFileParser::get_consts() { 103 | if (file_stream->read() == '(') { 104 | return get_tuple(); 105 | } 106 | 107 | file_stream->unread(); 108 | return NULL; 109 | } 110 | 111 | ArrayList *BinaryFileParser::get_names() { 112 | if (file_stream->read() == '(') { 113 | return get_tuple(); 114 | } 115 | 116 | file_stream->unread(); 117 | return NULL; 118 | } 119 | 120 | ArrayList *BinaryFileParser::get_var_names() { 121 | if (file_stream->read() == '(') { 122 | return get_tuple(); 123 | } 124 | 125 | file_stream->unread(); 126 | return NULL; 127 | } 128 | 129 | ArrayList *BinaryFileParser::get_free_vars() { 130 | if (file_stream->read() == '(') { 131 | return get_tuple(); 132 | } 133 | 134 | file_stream->unread(); 135 | return NULL; 136 | } 137 | 138 | ArrayList *BinaryFileParser::get_cell_vars() { 139 | if (file_stream->read() == '(') { 140 | return get_tuple(); 141 | } 142 | 143 | file_stream->unread(); 144 | return NULL; 145 | } 146 | 147 | ArrayList *BinaryFileParser::get_tuple() { 148 | int length = file_stream->read_init(); 149 | LyxString *str; 150 | 151 | ArrayList *list = new ArrayList(length); 152 | for (int i = 0; i < length; i++) { 153 | char obj_type = file_stream->read(); 154 | 155 | switch (obj_type) { 156 | case 'c': 157 | printf("got a code object\n"); 158 | list->add(get_code_object()); 159 | break; 160 | case 'i': 161 | list->add(new LyxInteger(file_stream->read_init())); 162 | break; 163 | case 'N': 164 | list->add(NULL); 165 | break; 166 | case 't': 167 | str = get_string(); 168 | list->add(str); 169 | _string_table.add(str); 170 | break; 171 | case 's': 172 | list->add(get_string()); 173 | break; 174 | case 'R': 175 | list->add(_string_table.get(file_stream->read_init())); 176 | break; 177 | default: 178 | printf("parser, unrecognized type : %c\n", obj_type); 179 | } 180 | } 181 | 182 | return list; 183 | } -------------------------------------------------------------------------------- /src/code/binaryFileParser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BINARY_FILE_PARSER_HPP 2 | #define BINARY_FILE_PARSER_HPP 3 | 4 | #include "util/bufferedInputStream.hpp" 5 | #include "util/arrayList.hpp" 6 | #include "object/lyxObject.hpp" 7 | #include "object/lyxInteger.hpp" 8 | #include "object/lyxString.hpp" 9 | #include "code/codeObject.hpp" 10 | 11 | class BinaryFileParser { 12 | private: 13 | BufferedInputStream *file_stream; 14 | int cur; 15 | ArrayList _string_table; 16 | 17 | public: 18 | BinaryFileParser(BufferedInputStream *stream); 19 | 20 | public: 21 | CodeObject *parse(); 22 | CodeObject *get_code_object(); 23 | LyxString *get_byte_codes(); 24 | LyxString *get_no_table(); 25 | int get_int(); 26 | LyxString *get_string(); 27 | LyxString *get_name(); 28 | 29 | LyxString *get_file_name(); 30 | 31 | ArrayList *get_consts(); 32 | ArrayList *get_names(); 33 | ArrayList *get_var_names(); 34 | ArrayList *get_free_vars(); 35 | ArrayList *get_cell_vars(); 36 | ArrayList *get_tuple(); 37 | }; 38 | 39 | #endif -------------------------------------------------------------------------------- /src/code/codeObject.cpp: -------------------------------------------------------------------------------- 1 | #include "codeObject.hpp" 2 | 3 | CodeObject::CodeObject(int argcount, int nlocals, int stacksize, int flag, 4 | LyxString *bytecodes, ArrayList *consts, 5 | ArrayList *names, 6 | ArrayList *varnames, 7 | ArrayList *freevars, 8 | ArrayList *cellvars, LyxString *file_name, 9 | LyxString *co_name, int lineno, LyxString *notable) 10 | : _argcount(argcount), _nlocals(nlocals), _stack_size(stacksize), 11 | _flag(flag), _bytecodes(bytecodes), _names(names), _consts(consts), 12 | _var_names(varnames), _free_vars(freevars), _cell_vars(cellvars), 13 | _co_name(co_name), _file_name(file_name), _lineno(lineno), 14 | _notable(notable) {} -------------------------------------------------------------------------------- /src/code/codeObject.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CODE_OBJECT_HPP 2 | #define CODE_OBJECT_HPP 3 | 4 | #include "object/lyxObject.hpp" 5 | 6 | class LyxString; 7 | 8 | template class ArrayList; 9 | 10 | class CodeObject : public LyxObject { 11 | public: 12 | int _argcount; 13 | int _nlocals; 14 | int _stack_size; 15 | int _flag; 16 | 17 | LyxString *_bytecodes; 18 | ArrayList *_names; 19 | ArrayList *_consts; 20 | ArrayList *_var_names; 21 | 22 | ArrayList *_free_vars; 23 | ArrayList *_cell_vars; 24 | 25 | LyxString *_co_name; 26 | LyxString *_file_name; 27 | 28 | int _lineno; 29 | LyxString *_notable; 30 | CodeObject(int argcount, int nlocals, int stacksize, int flag, 31 | LyxString *bytecodes, ArrayList *consts, 32 | ArrayList *names, ArrayList *varnames, 33 | ArrayList *freevars, ArrayList *cellvars, 34 | LyxString *file_name, LyxString *co_name, int lineno, 35 | LyxString *notable); 36 | }; 37 | 38 | #endif -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "util/bufferedInputStream.hpp" 2 | #include "code/binaryFileParser.hpp" 3 | 4 | int main(int argc, char **argv) { 5 | if (argc <= 1) { 6 | printf("LyxPythonVM need a parameter : filename\n"); 7 | return 0; 8 | } 9 | 10 | BufferedInputStream stream(argv[1]); 11 | // printf("magic number is 0x%x\n", stream.read_init()); 12 | BinaryFileParser parser(&stream); 13 | CodeObject *main_code = parser.parse(); 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /src/object/lyxInteger.cpp: -------------------------------------------------------------------------------- 1 | #include "lyxInteger.hpp" 2 | 3 | LyxInteger::LyxInteger(int x) { _value = x; } -------------------------------------------------------------------------------- /src/object/lyxInteger.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _LYX_INTEGER_HPP 2 | #define _LYX_INTEGER_HPP 3 | 4 | #include "lyxObject.hpp" 5 | 6 | class LyxInteger : public LyxObject { 7 | private: 8 | int _value; 9 | 10 | public: 11 | LyxInteger(int x); 12 | int value() { return _value; } 13 | }; 14 | 15 | #endif -------------------------------------------------------------------------------- /src/object/lyxObject.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _LYX_OBJECT_HPP 2 | #define _LYX_OBJECT_HPP 3 | 4 | class LyxObject {}; 5 | 6 | #endif -------------------------------------------------------------------------------- /src/object/lyxString.cpp: -------------------------------------------------------------------------------- 1 | #include "lyxString.hpp" 2 | #include 3 | 4 | LyxString::LyxString(const char *x) { 5 | _length = strlen(x); 6 | _value = new char[_length]; 7 | strcpy(_value, x); 8 | } 9 | 10 | LyxString::LyxString(const char *x, const int length) { 11 | _length = length; 12 | _value = new char[length]; 13 | 14 | for (int i = 0; i < length; i++) { 15 | _value[i] = x[i]; 16 | } 17 | } -------------------------------------------------------------------------------- /src/object/lyxString.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _LYX_STRING_HPP 2 | #define _LYX_STRING_HPP 3 | 4 | #include "lyxObject.hpp" 5 | 6 | class LyxString : public LyxObject { 7 | private: 8 | char *_value; 9 | int _length; 10 | 11 | public: 12 | LyxString(const char *x); 13 | LyxString(const char *x, const int length); 14 | 15 | const char *value() { return _value; } 16 | int length() { return _length; } 17 | }; 18 | 19 | #endif -------------------------------------------------------------------------------- /src/util/arrayList.cpp: -------------------------------------------------------------------------------- 1 | #include "arrayList.hpp" 2 | #include 3 | 4 | template ArrayList::ArrayList(int n) { 5 | _length = n; 6 | _size = 0; 7 | _array = new T[n]; 8 | } 9 | 10 | template void ArrayList::add(T t) { 11 | if (_size >= _length) 12 | expand(); 13 | 14 | _array[_size++] = t; 15 | } 16 | 17 | template void ArrayList::insert(int index, T t) { 18 | add(NULL); 19 | 20 | for (int i = _size; i > index; i--) { 21 | _array[i] = _array[i - 1]; 22 | } 23 | 24 | _array[index] = t; 25 | } 26 | 27 | template void ArrayList::expand() { 28 | T *new_array = new T[_length << 1]; 29 | for (int i = 0; i < _length; i++) { 30 | new_array[i] = _array[i]; 31 | } 32 | _array = new_array; 33 | delete[] _array; 34 | 35 | _length <<= 1; 36 | printf("expand an array to %d, size is %d\n", _length, _size); 37 | } 38 | 39 | template int ArrayList::size() { return _size; } 40 | 41 | template int ArrayList::length() { return _length; } 42 | 43 | template T ArrayList::get(int index) { return _array[index]; } 44 | 45 | template void ArrayList::set(int index, T t) { 46 | if (_size <= index) 47 | _size = index + 1; 48 | 49 | while (_size > _length) 50 | expand(); 51 | 52 | _array[index] = t; 53 | } 54 | 55 | template T ArrayList::pop() { return _array[--_size]; } 56 | 57 | class LyxObject; 58 | template class ArrayList; 59 | 60 | class LyxString; 61 | template class ArrayList; -------------------------------------------------------------------------------- /src/util/arrayList.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ARRAY_LIST_HPP 2 | #define ARRAY_LIST_HPP 3 | 4 | #include 5 | 6 | template class ArrayList { 7 | private: 8 | int _length; 9 | T *_array; 10 | int _size; 11 | 12 | void expand(); 13 | 14 | public: 15 | ArrayList(int n = 8); 16 | 17 | void add(T t); 18 | void insert(int index, T t); 19 | T get(int index); 20 | void set(int index, T t); 21 | int size(); 22 | int length(); 23 | T pop(); 24 | }; 25 | 26 | #endif // ARRAY_LIST_HPP -------------------------------------------------------------------------------- /src/util/bufferedInputStream.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BUFFERED_INPUT_STREAM_HPP_ 2 | #define BUFFERED_INPUT_STREAM_HPP_ 3 | 4 | #include 5 | 6 | #define BUFFER_LEN 256 7 | 8 | class BufferedInputStream { 9 | private: 10 | FILE *fp; 11 | char szBuffer[BUFFER_LEN]; 12 | unsigned short index; 13 | 14 | public: 15 | BufferedInputStream(char const *filename) { 16 | fp = fopen(filename, "rb"); 17 | fread(szBuffer, BUFFER_LEN * sizeof(char), 1, fp); 18 | index = 0; 19 | } 20 | 21 | ~BufferedInputStream() { close(); } 22 | 23 | char read() { 24 | if (index < BUFFER_LEN) 25 | return szBuffer[index++]; 26 | else { 27 | index = 0; 28 | fread(szBuffer, BUFFER_LEN * sizeof(char), 1, fp); 29 | return szBuffer[index++]; 30 | } 31 | } 32 | 33 | int read_init() { 34 | int b1 = read() & 0xff; 35 | int b2 = read() & 0xff; 36 | int b3 = read() & 0xff; 37 | int b4 = read() & 0xff; 38 | 39 | return b4 << 24 | b3 << 16 | b2 << 8 | b1; 40 | } 41 | 42 | void unread() { index--; } 43 | 44 | void close() { 45 | if (fp != NULL) { 46 | fclose(fp); 47 | fp = NULL; 48 | } 49 | } 50 | }; 51 | #endif // BUFFERED_INPUT_STREAM_HPP_ -------------------------------------------------------------------------------- /test/hello.py: -------------------------------------------------------------------------------- 1 | print 1 + 2 * 3 --------------------------------------------------------------------------------