├── .gitignore
├── CMakeLists.txt
├── README.md
├── api
├── Array.jack
├── IO.jack
├── Input.jack
├── Math.jack
├── Memory.jack
├── Output.jack
├── String.jack
└── Sys.jack
├── folder
├── 1.png
├── 2.png
├── 3.png
├── 4.jpg
├── 5.png
└── 6.png
├── jack
├── CMakeLists.txt
├── include
│ └── VM.h
└── src
│ ├── VM.cpp
│ └── jack.cpp
└── jackc
├── CMakeLists.txt
├── driver
└── jackc.cpp
├── include
├── Analyzer.h
├── CodeGen.h
├── Error.h
├── Parser.h
├── Scanner.h
└── SymbolTable.h
└── src
├── Analyzer.cpp
├── CodeGen.cpp
├── Error.cpp
├── Parser.cpp
├── Scanner.cpp
└── SymbolTable.cpp
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled Object files
2 | *.slo
3 | *.lo
4 | *.o
5 | *.obj
6 |
7 | # Precompiled Headers
8 | *.gch
9 | *.pch
10 |
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 |
16 | # Fortran module files
17 | *.mod
18 |
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 |
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 |
30 | # QtCreator .user files
31 | *.user
32 | *.user.*
33 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required (VERSION 2.8)
2 | project (jack)
3 | add_subdirectory (jack)
4 | add_subdirectory (jackc)
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # jack-Compiler
2 |
3 | ### 效果
4 | ##### Demo1:
5 | ```C++
6 | class Main
7 | {
8 | function void main()
9 | {
10 | String s;
11 |
12 | Output.printString("Hello, world!");
13 | Output.println();
14 |
15 | Output.printString("What's your name?");
16 | Output.println();
17 | s = Input.readLine();
18 | Output.printString("Your name is: ");
19 | Output.printString(s);
20 | Output.println();
21 |
22 | return;
23 | }
24 |
25 | }
26 | ```
27 | ##### 运行结果:
28 | 
29 |
30 |
31 | ##### Demo2
32 | ```C++
33 | class Main
34 | {
35 | function void main()
36 | {
37 | Array arr;
38 | String s;
39 | int i;
40 |
41 | arr = Array.new(5); // 创建一个大小为5的数组
42 | i = 0;
43 | while (i < 5)
44 | {
45 | s = Input.readLine();
46 | arr[i] = s.intValue();
47 | i = i + 1;
48 | }
49 |
50 | Main.bubble_sort(arr, 5);
51 |
52 | i = 0;
53 | while (i < 5)
54 | {
55 | Output.printInt(arr[i]);
56 | i = i + 1;
57 | }
58 | Output.println();
59 |
60 | return;
61 | }
62 |
63 | /* 冒泡排序 */
64 | function void bubble_sort(Array arr, int n)
65 | {
66 | int i, j, tmp;
67 | i = n - 1;
68 |
69 | while (i > 0 | i == 0) // 由于还没有加上 >= 运算符, 所以暂时用这个代替
70 | {
71 | j = 0;
72 | while (j < i)
73 | {
74 | if (arr[j] > arr[j + 1])
75 | {
76 | tmp = arr[j];
77 | arr[j] = arr[j + 1];
78 | arr[j + 1] = tmp;
79 | }
80 | j = j + 1;
81 | }
82 | i = i - 1;
83 | }
84 |
85 | return;
86 | }
87 | }
88 | ```
89 | ##### 运行结果:
90 | 
91 |
92 | ##### Demo3:
93 | ```C++
94 | class Main
95 | {
96 | function void main()
97 | {
98 | int a, b, c;
99 | String s;
100 |
101 | s = Input.readLine();
102 | a = s.intValue();
103 |
104 | s = Input.readLine();
105 | b = s.intValue();
106 |
107 | c = Main.gcd(a, b);
108 |
109 | Output.printInt(c);
110 | Output.println();
111 |
112 | return;
113 | }
114 |
115 | // 求最大公约数
116 | function int gcd(int a, int b)
117 | {
118 | if (b == 0)
119 | {
120 | return a;
121 | }
122 | else
123 | {
124 | return Main.gcd(b, a - a / b * b);
125 | /* a - a / b * b相当于 a % b */
126 | }
127 | }
128 |
129 | }
130 | ```
131 | ##### 运行结果:
132 | 
133 |
134 |
135 |
136 | [TOC]
137 |
138 | # 背景介绍
139 |
140 | 去年学了编译原理,但是这门课的理论太多了,而且很难,学得是云里雾里.网上很多大神说学了编译原理之后最好能够实际动手做一个编译器出来,这样对能力有很大的提升.于是就下了定决心,带着写一个编译器的目的来重新学习编译原理.然后开始找公开课,买书,就这样开始了.
141 |
142 | ## jack--语言介绍
143 | ### 语法要素
144 |
145 | 1, 保留字:
146 |
147 | class, constructor, method, function, int, boolean, char, void,
148 | static, field, if, else, while, return, true, false, null, this
149 | 2, 标识符:
150 |
151 | 由字母或下划线开头, 后接任意任意个字母或数字或下划线
152 | 3, 常量:
153 |
154 | int类型的常数规定都是正整数, 没有负整数, 但是可以在正整数前面加上负号, 这是对正整数取负值的一元运算表达式
155 | String类型的常量是把一个字符串用一对双引号括起来, 与java和C里面的字符串一样
156 | boolean类型的常量可以是true和false
157 | 常数null表示一个空引用, 实际上就是数值0
158 | 4, 合法的符号:
159 |
160 | ( ) [ ] , . ; = + - * / & | ~ < > <= >= ==
161 | 5, 注释:
162 |
163 | 与C语言和java一样, 支持两种注释形式, 单行注释// 和多行注释 /* */
164 |
165 | ### 程序结构
166 |
167 | 1, jack的基本编程单元是类, 每个类存在于独立的文件中, 可以单独编译, 下面是类的定义形式:
168 |
169 | class 类名
170 | {
171 | 成员变量(field)声明 和 静态变量(static)声明 // 比如放在子程序声明之前
172 | 子程序声明 // 子程序声明可以是构造函数声明(construtor), 静态函数声明(function)和方法声明(method)
173 | }
174 |
175 | 2, 子程序声明:
176 |
177 | subroutine 类型 名称 (参数列表)
178 | {
179 | 局部变量声明
180 | 语句
181 | }
182 | 3, jack必须至少包含一个Main类, 而且在Main类中必须包含一个function void main() 函数
183 |
184 | ### 变量
185 |
186 | 1, 变量分类
187 |
188 | jack中有四种变量类型: 成员变量, 静态变量, 局部变量和参数变量
189 | 成员变量通过field关键字来声明
190 | 静态变量通过static来声明
191 | 在函数体的开始声明的变量是局部变量
192 | 在函数声明中声明的变量是参数变量
193 |
194 | 2, 数据类型
195 |
196 | 基本数据类型和对象类型
197 |
198 | 3, 基本类型
199 |
200 | int, boolean, char
201 | 4, 对象类型
202 |
203 | 同java一样, 声明一个对象实际上只是创建一个指向该对象的引用
204 | 5, 数组
205 |
206 | 数组是通过内置类Array类声明的, 用Array声明的对象也是一个引用, 指向堆内存.
207 | 对数组的引用可以与传统的一样
208 | Array arr;
209 | arr[3] = 4;
210 | 不支持多维数组.
211 | 6, 字符串
212 |
213 | 字符串是通过内置类String类来声明的, 同样, 用String声明的对象也是一个引用, 指向堆内存, 例如:
214 | String s;
215 | char c;
216 | s = String.new("hello, world!\n");
217 | c = s.charAt(4);
218 | 7, 类型转换
219 |
220 | jack是弱类型语言, 没有禁止不同类型之间的转换
221 |
222 | ### 语句
223 |
224 | 1, 赋值语句
225 |
226 | 变量 = 表达式
227 | 变量[表达式] = 表达式
228 | 2, if语句
229 |
230 | if(表达式) // 不能省略大括号
231 | {
232 | 语句
233 | }
234 | else
235 | {
236 | 语句
237 | }
238 | 3, while语句
239 |
240 | while(表达式)
241 | {
242 | 语句
243 | }
244 | 4, 函数调用语句
245 |
246 | 方法名(表达式)
247 | 类名.函数名(表达式)
248 | 5, return语句
249 |
250 | return 表达式
251 | return ; // 即使子程序返回void, 也要有return语句
252 |
253 | ### 表达式
254 |
255 | jack--表达式必须是下列之一:
256 |
257 | * 常数
258 | * 在作用域内的变量名(变量可以是静态、局部、成员或参数类型)
259 | * 关键字this, 引用当前对象 (不能用于函数中)
260 | * 数组语法是: 数组名称[表达式], 其中数组名称是Array类型的变量名
261 | * 返回值为非空类型的子程序调用
262 | * 一元运算符 "-" 或 "~" 作前缀的表达式
263 | ** - 表达式: 算术求反
264 | ** ~ 表达式: 布尔求反
265 | * 形如 "表达式 运算符 表达式" 的表达式, 其中运算符可以是以下二元运算符中的一种;
266 | *** + - * / & | <= < >= > ==
267 | * (表达式): 位于圆括号内的表达式
268 |
269 |
270 | ### 标准库
271 |
272 | 标准库包括下面的类
273 |
274 | Math 提供基本的数学运算
275 | String 实现字符串String类型和字符串相关操作
276 | Array 实现数组Array类型和数组相关操作
277 | Output 处理屏幕上的文字输出
278 | Input 处理键盘的输入
279 | Memory 处理内存操作
280 | Sys 提供与程序执行相关的服务
281 |
282 |
283 | #### Math类
284 |
285 | 该类实现各种数学运算操作
286 |
287 | #### String类
288 |
289 | 该类实现String数据类型以及与字符串相关的操作
290 |
291 | #### Array类
292 | 该类构造和清除数组
293 |
294 | #### Output类
295 |
296 | 该类提供在屏幕上打印文本的服务
297 |
298 | #### Input类
299 |
300 | 该类提供从标准键盘上读取输入的服务
301 |
302 | #### Memory类
303 |
304 | 该类允许直接访问宿主平台的主内存的服务
305 |
306 | #### Sys类
307 |
308 | 该类提供与程序指向相关的服务
309 |
310 | ### Demo
311 |
312 | ##项目介绍
313 |
314 | ## 使用说明
315 | 在linux下运行compiler.sh或者make就可以编译出jackc.exe和jack.exe了
316 |
317 | ## 模块介绍
318 |
319 | jack编译器主要有词法分析器,语法分析器,语义分析器,vm代码生成 和 虚拟机
320 |
321 | ### 词法分析器
322 |
323 | 词法分析器的源代码为Scanner.cpp 使用的手工编码的方法实现的
324 | 词法分析器的主要任务是识别源程序中的单词(Token),假如有下面的C代码:
325 | ```C++
326 | int main()
327 | {
328 | printf("Hello, world!\n");
329 | return 0;
330 | }
331 | ```
332 | 通过词法分析器的扫描之后,返回的是一个一个单词(Token):
333 |
334 | 关键字 int
335 | 标识符 main
336 | 左圆括号 '('
337 | 左花括号 '{'
338 | 标识符 printf
339 | 左圆括号 '('
340 | 字符串 "Hello, world!\n"
341 | 右圆括号 ')'
342 | 分号 ';'
343 | 标识符 return
344 | 数字 0
345 | 右花括号 '}'
346 |
347 | #### 词法规则
348 |
349 | 首先定义一些词法规则,即这门语言对能够识别出来的单词,词法规则是用正则表达式来定义的
350 |
351 | #### 转移图
352 |
353 | 根据上面的词法规则可以画出状态转移图(FA),以方便编程
354 |
355 | 1, 简单的转移图示例:
356 |
357 |
358 | 2, 标识符,整型和浮点型的转移图:
359 |
360 | 
361 |
362 | 3, 字符串的转移图:
363 |
364 | 
365 |
366 | 4, 字符的转移图
367 | 
368 |
369 | ### 语法分析器
370 |
371 | 语法分析器的源代码文件是Parser.cpp 使用的是递归下降的方法实现的
372 | 语法分析器有两个任务:
373 | 1, 判断源程序是否符合语法规则
374 | 2, 生成抽象语法树
375 |
376 | #### jack语言的语法
377 |
378 | jack语言的语法由如下的上下文无关文法(BNF)定义.
379 | 非粗体字表示非终结符, 粗体字表示终结符
380 |
381 |
382 | program -> classlist
383 | classlist -> classlist class
384 | | class
385 | class -> class ID { classVarDecList subroutineDecList }
386 | classVarDecList -> classVarDecList classVarDec
387 | |
388 | classVarDec -> static type varNameList ;
389 | | field type varNameList ;
390 | varNameList -> varNameList , ID
391 | | ID
392 | type -> int
393 | | float
394 | | char
395 | | boolean
396 | | void
397 | | ID
398 | subroutineDecList -> subroutineDecList subroutineDec
399 | |
400 | subroutineDec -> constructor type ID ( params ) subroutineBody
401 | | function type ID ( params ) subroutineBody
402 | | method type ID (params ) subroutineBody
403 | params -> paramList
404 | |
405 | paramList -> paramList , param
406 | | param
407 | param -> type ID
408 | subroutineBody -> { varDecList statements }
409 | varDecList -> varDecList varDec
410 | |
411 | varDec -> type varNameList ;
412 | statements -> statements statement
413 | |
414 | statement -> assign_statement
415 | | if_statement
416 | | while_statement
417 | | return_statement
418 | | call_statement ;
419 | assign_statement -> leftValue = expression ;
420 | leftValue -> ID
421 | | ID [ expression ]
422 | if_statement -> if ( expression ) statement
423 | | if ( expression ) statement else statement
424 | while_statement -> while ( expression ) { statement }
425 | return_statement -> return ;
426 | | return expression ;
427 | call_statement -> ID ( expressions )
428 | | ID . ID ( expressions )
429 | expressions -> expression_list
430 | |
431 | expression_list -> expression_list , expression
432 | | expression
433 | expression -> expression & boolExpression
434 | | expression | boolExpression
435 | | boolExpression
436 | boolExpression -> additive_expression relational_operator additive_expression
437 | | additive_expression
438 | relational_operator -> <=
439 | | >=
440 | | ==
441 | | <
442 | | >
443 | | !=
444 | additive_expression -> additive_expression + term
445 | | additive_expression – term
446 | | term
447 | term -> term * factor
448 | | term / factor
449 | | factor
450 | factor -> - positive_factor
451 | | positive_factor
452 | positive_factor -> ~ not_factor
453 | | not_factor
454 | not_factor -> INT_CONST
455 | | CHAR_CONST
456 | | STRING_CONST
457 | | keywordConstant
458 | | ID
459 | | ID [ expression ]
460 | | call_expression
461 | | ( expression )
462 | keywordConstant -> true
463 | | false
464 | | null
465 | | this
466 | call_expression -> ID ( expression )
467 | | ID . ID ( expression )
468 |
469 |
470 | #### 语法树
471 | 树的节点类型:
472 |
473 | ### 语义分析器
474 |
475 | #### 语义规则
476 |
477 | #### 符号表
478 |
479 | #### 虚拟机
480 |
481 |
--------------------------------------------------------------------------------
/api/Array.jack:
--------------------------------------------------------------------------------
1 | class Array
2 | {
3 | /** 构造大小为size的新数组 */
4 | function Array new(int size)
5 | {
6 | Array a;
7 | a = Memory.alloc(size);
8 | return a;
9 | }
10 |
11 | /** 清除数组 */
12 | method void dispose()
13 | {
14 | Memory.deAlloc(this);
15 | return;
16 | }
17 | }
--------------------------------------------------------------------------------
/api/IO.jack:
--------------------------------------------------------------------------------
1 | class IO
2 | {
3 | function void putchar(char ch)
4 | {
5 | return;
6 | }
7 | function char getchar()
8 | {
9 | return '0';
10 | }
11 | }
--------------------------------------------------------------------------------
/api/Input.jack:
--------------------------------------------------------------------------------
1 | class Input
2 | {
3 | function char readChar()
4 | {
5 | char ch;
6 | ch = IO.getchar();
7 | return ch;
8 | }
9 |
10 | function String readLine()
11 | {
12 | char ch;
13 | String s;
14 | s = String.new(100);
15 | while (true)
16 | {
17 | ch = IO.getchar();
18 | if (ch == 10)
19 | {
20 | return s;
21 | }
22 | s = s.appendChar(ch);
23 | }
24 | return s;
25 | }
26 | }
--------------------------------------------------------------------------------
/api/Math.jack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/api/Math.jack
--------------------------------------------------------------------------------
/api/Memory.jack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/api/Memory.jack
--------------------------------------------------------------------------------
/api/Output.jack:
--------------------------------------------------------------------------------
1 | class Output
2 | {
3 | function void printChar(char ch)
4 | {
5 | IO.putchar(ch);
6 | return;
7 | }
8 |
9 | function void printString(String s)
10 | {
11 | int strLength;
12 | char temp;
13 | int i;
14 | i = 0;
15 | strLength = s.length();
16 | while(i < strLength)
17 | {
18 | temp = s.charAt(i);
19 | Output.printChar(temp);
20 | i = i + 1;
21 | }
22 | return;
23 | }
24 |
25 | function void printInt(int i)
26 | {
27 | String str;
28 | str = String.new(6);
29 | str.setInt(i);
30 | Output.printString(str);
31 | return;
32 | }
33 |
34 | function void println()
35 | {
36 | IO.putchar(10);
37 | return;
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/api/String.jack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/api/String.jack
--------------------------------------------------------------------------------
/api/Sys.jack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/api/Sys.jack
--------------------------------------------------------------------------------
/folder/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/folder/1.png
--------------------------------------------------------------------------------
/folder/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/folder/2.png
--------------------------------------------------------------------------------
/folder/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/folder/3.png
--------------------------------------------------------------------------------
/folder/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/folder/4.jpg
--------------------------------------------------------------------------------
/folder/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/folder/5.png
--------------------------------------------------------------------------------
/folder/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shellphy/jack-compiler/5ddc42d40abc6c479506496d530cfdae90de19d0/folder/6.png
--------------------------------------------------------------------------------
/jack/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required (VERSION 2.8)
2 |
3 | project (jack)
4 |
5 | set (CMAKE_CXX_STANDARD 11)
6 | if (CMAKE_CXX_COMPILER_ID MATCHES "GCC" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
7 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror")
8 | endif()
9 |
10 | include_directories (include)
11 | add_executable(${PROJECT_NAME} src/jack.cpp src/VM.cpp)
12 |
--------------------------------------------------------------------------------
/jack/include/VM.h:
--------------------------------------------------------------------------------
1 | #ifndef _VM_H
2 | #define _VM_H
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | using namespace std;
9 |
10 | extern vector filenames;
11 |
12 | void executeArithmetic(string const& command); // 执行算术指令
13 | void executePush(string const& segment, int index); // 执行push指令
14 | void executePop(string const& segment, int index); // 执行pop指令
15 | void executeLabel(string const& label); // 执行label指令
16 | void executeGoto(string const& label); // 执行goto指令
17 | void executeIf(string const& label); // 执行if-goto指令
18 | void executeCall(string const& functionName, int numArgs); // 执行call指令
19 | void executeReturn(); // 执行return指令
20 | void executeFunction(string const& functionName, int numLocals); // 执行function指令
21 | void executeEnd(); // 程序结束
22 |
23 | void init(); // cpu通电之后初始化ip
24 | void instructionFetch(); // cpu取指令
25 | void execute(); // cpu执行指令
26 |
27 | void setKeyboardValue(short val);
28 | void loadProgram(); // 载入程序到指令存储器中
29 | void run(); // CPU通电开始运行
30 |
31 | #endif
32 |
--------------------------------------------------------------------------------
/jack/src/VM.cpp:
--------------------------------------------------------------------------------
1 | #include "VM.h"
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | static int sp; // 寄存器 栈指针: 指向栈中下一个最顶的基址
8 | static int local; // 寄存器 指向当前VM函数local的基址
9 | static int argument; // 寄存器 指向当前VM函数argument段的基址
10 | static int _this; // 寄存器 指向当前this段(在堆中)的基址
11 | static int that; // 寄存器 指向当前that段(在堆中)的基址
12 | static int ip; // 寄存器 指向下一条要执行的指令
13 | static int temp[7]; // 寄存器 存储临时值
14 | /*
15 | * RAM地址 功能
16 | * 0~15 保留着,未使用
17 | * 16~155 VM程序的所有VM函数的静态变量
18 | * 256~2047 栈
19 | * 2048~16383 堆(用于存放对象和数组)
20 | **/
21 | static short ram[266385]; // 数据存储器
22 | static vector> instructions_ram; // 指令存储器
23 | static int staticCount; // 记录静态变量已经分配的数量
24 | static unordered_map staticVarNames; // 记录静态变量在内存中的位置
25 | static unordered_map
26 | instruction_address; // 保存label和function指令在指令存储器中的地址
27 | static vector currentInstruction; // 保存当前正在执行的指令
28 | static string currentClassName; // 保存当前正在执行的指令所在的类的名字
29 | static bool arriveEnd = false; // 标记是否到达程序结尾
30 |
31 | void executeArithmetic(const string &command) {
32 | if (command == "add") {
33 | ram[sp - 2] = ram[sp - 2] + ram[sp - 1];
34 | sp--;
35 | } else if (command == "sub") {
36 | ram[sp - 2] = ram[sp - 2] - ram[sp - 1];
37 | sp--;
38 | } else if (command == "neg") {
39 | ram[sp - 1] = -ram[sp - 1];
40 | } else if (command == "eq") {
41 | if (ram[sp - 2] == ram[sp - 1])
42 | ram[sp - 2] = -1;
43 | else
44 | ram[sp - 2] = 0;
45 | sp--;
46 | } else if (command == "gt") {
47 | if (ram[sp - 2] > ram[sp - 1])
48 | ram[sp - 2] = -1;
49 | else
50 | ram[sp - 2] = 0;
51 | sp--;
52 | } else if (command == "lt") {
53 | if (ram[sp - 2] < ram[sp - 1])
54 | ram[sp - 2] = -1;
55 | else
56 | ram[sp - 2] = 0;
57 | sp--;
58 | } else if (command == "and") {
59 | ram[sp - 2] = ram[sp - 2] & ram[sp - 1];
60 | sp--;
61 | } else if (command == "or") {
62 | ram[sp - 2] = ram[sp - 2] | ram[sp - 1];
63 | sp--;
64 | } else if (command == "not") {
65 | ram[sp - 1] = ~ram[sp - 1];
66 | }
67 | }
68 |
69 | void executePush(const string &segment, int index) {
70 | if (segment == "static") {
71 | string t;
72 | ostringstream iss(t);
73 | iss << index;
74 | string staticVarName = currentClassName + "." + t;
75 | auto result = staticVarNames.find(staticVarName);
76 | if (result == staticVarNames.end()) {
77 | staticVarNames.insert({staticVarName, staticCount});
78 | staticCount++;
79 | } else {
80 | int temp = ram[16 + result->second];
81 | ram[sp++] = temp;
82 | }
83 | } else if (segment == "argument")
84 | ram[sp++] = ram[argument + index];
85 | else if (segment == "local")
86 | ram[sp++] = ram[local + index];
87 | else if (segment == "constant")
88 | ram[sp++] = index;
89 | else if (segment == "this")
90 | ram[sp++] = ram[_this + index];
91 | else if (segment == "that")
92 | ram[sp++] = ram[that + index];
93 | else if (segment == "pointer") {
94 | if (index == 0)
95 | ram[sp++] = _this;
96 | else if (index == 1)
97 | ram[sp++] = that;
98 | } else if (segment == "temp")
99 | ram[sp++] = temp[index];
100 | }
101 |
102 | void executePop(const string &segment, int index) {
103 | if (segment == "static") {
104 | string t;
105 | ostringstream iss(t);
106 | iss << index;
107 | string staticVarName = currentClassName + "." + t;
108 | auto result = staticVarNames.find(staticVarName);
109 | if (result == staticVarNames.end()) {
110 | staticVarNames.insert({staticVarName, staticCount});
111 | staticCount++;
112 | } else {
113 | int temp = ram[--sp];
114 | ram[16 + result->second] = temp;
115 | }
116 | }
117 | if (segment == "argument")
118 | ram[argument + index] = ram[--sp];
119 | else if (segment == "local")
120 | ram[local + index] = ram[--sp];
121 | else if (segment == "this")
122 | ram[_this + index] = ram[--sp];
123 | else if (segment == "that")
124 | ram[that + index] = ram[--sp];
125 | else if (segment == "pointer") {
126 | if (index == 0)
127 | _this = ram[--sp];
128 | else if (index == 1)
129 | that = ram[--sp];
130 | } else if (segment == "temp")
131 | temp[index] = ram[--sp];
132 | }
133 | void executeLabel(const string &label) {
134 | // do nothing
135 | }
136 |
137 | void executeGoto(const string &label) { ip = instruction_address.find(label)->second; }
138 |
139 | void executeIf(const string &label) {
140 | int temp = ram[--sp];
141 | if (temp != 0)
142 | ip = instruction_address.find(label)->second;
143 | }
144 |
145 | void executeCall(const string &functionName, int numArgs) {
146 | if (functionName == "IO.putchar") {
147 | putchar(ram[sp - 1]);
148 | return;
149 | } else if (functionName == "IO.getchar") {
150 | ram[sp++] = getchar();
151 | return;
152 | }
153 | ram[sp++] = ip;
154 | ram[sp++] = local;
155 | ram[sp++] = argument;
156 | ram[sp++] = _this;
157 | ram[sp++] = that;
158 | argument = sp - numArgs - 5;
159 | local = sp;
160 | ip = instruction_address.find(functionName)->second;
161 | }
162 |
163 | void executeReturn() {
164 | int temp = local;
165 | ip = ram[temp - 5];
166 | ram[argument] = ram[--sp]; // 重置调用者的返回值
167 | sp = argument + 1;
168 | that = ram[temp - 1];
169 | _this = ram[temp - 2];
170 | argument = ram[temp - 3];
171 | local = ram[temp - 4];
172 | }
173 |
174 | void executeFunction(const string &functionName, int numLocals) {
175 | auto iter = functionName.cbegin();
176 | while (iter != functionName.cend())
177 | if (*iter++ == '.')
178 | break;
179 | currentClassName = string(functionName.cbegin(), --iter);
180 | for (int i = 0; i < numLocals; i++)
181 | ram[sp++] = 0;
182 | }
183 |
184 | void loadProgram() {
185 | std::ifstream fin;
186 | int count = 0;
187 |
188 | for (auto iter = filenames.cbegin(); iter != filenames.cend(); ++iter) {
189 | fin.open(*iter);
190 | if (fin.fail()) {
191 | std::cerr << "Error: file " << *iter << "does not exsist!\n";
192 | exit(-1);
193 | }
194 | string temp;
195 | while (std::getline(fin, temp)) {
196 | vector instruction;
197 | std::istringstream iss(temp);
198 | string word;
199 | while (iss >> word) {
200 | instruction.push_back(word);
201 | }
202 | if (instruction[0] == "label" || instruction[0] == "function")
203 | instruction_address.insert({instruction[1], count});
204 | instructions_ram.push_back(instruction);
205 | count++;
206 | }
207 | fin.close();
208 | }
209 | vector end;
210 | end.push_back("end");
211 | instructions_ram.push_back(end);
212 | }
213 |
214 | void init() {
215 | ip = instruction_address.find("Sys.init")->second;
216 | sp = 50;
217 | local = sp;
218 | ram[local - 5] = instructions_ram.size() - 1;
219 | }
220 |
221 | void run() {
222 | init();
223 | while (true) {
224 | instructionFetch();
225 | if (ip == 1) {
226 | int temp;
227 | temp = 0;
228 | /// @todo find out what this makes
229 | // sp;
230 | // local;
231 | // argument;
232 | // _this;
233 | // that;
234 | // currentInstruction;
235 | }
236 | ++ip;
237 | execute();
238 | if (arriveEnd == true)
239 | break;
240 | }
241 | }
242 |
243 | void instructionFetch() { currentInstruction = instructions_ram[ip]; }
244 |
245 | void execute() {
246 | string command = currentInstruction[0];
247 | if (command == "add" || command == "sub" || command == "neg" ||
248 | command == "eq" || command == "gt" || command == "lt" ||
249 | command == "and" || command == "or" || command == "not")
250 | executeArithmetic(currentInstruction[0]);
251 | else if (command == "push")
252 | executePush(currentInstruction[1], atoi(currentInstruction[2].c_str()));
253 | else if (command == "pop")
254 | executePop(currentInstruction[1], atoi(currentInstruction[2].c_str()));
255 | else if (command == "label")
256 | executeLabel(currentInstruction[1]);
257 | else if (command == "goto")
258 | executeGoto(currentInstruction[1]);
259 | else if (command == "if-goto")
260 | executeIf(currentInstruction[1]);
261 | else if (command == "call")
262 | executeCall(currentInstruction[1], atoi(currentInstruction[2].c_str()));
263 | else if (command == "return")
264 | executeReturn();
265 | else if (command == "function")
266 | executeFunction(currentInstruction[1], atoi(currentInstruction[2].c_str()));
267 | else if (command == "end")
268 | arriveEnd = true;
269 | }
270 |
--------------------------------------------------------------------------------
/jack/src/jack.cpp:
--------------------------------------------------------------------------------
1 | #include "VM.h"
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | using namespace std;
8 |
9 | vector filenames;
10 |
11 | int main(int argc, char *argv[]) {
12 | if (argc == 1) {
13 | cout << "Usage: vm sourcefiles\n";
14 | exit(-1);
15 | }
16 | ifstream fin;
17 | for (int i = 1; i < argc; i++) {
18 | fin.open(argv[i]);
19 | if (fin.fail()) {
20 | cerr << "file '" << argv[i] << "' not exist!";
21 | exit(-1);
22 | }
23 | filenames.push_back(argv[i]);
24 | fin.close();
25 | }
26 | // 未判断文件是否存在
27 | filenames.push_back("./api/Array.vm");
28 | filenames.push_back("./api/Memory.vm");
29 | filenames.push_back("./api/Math.vm");
30 | filenames.push_back("./api/Input.vm");
31 | filenames.push_back("./api/Output.vm");
32 | filenames.push_back("./api/Sys.vm");
33 | filenames.push_back("./api/String.vm");
34 | filenames.push_back("./api/IO.vm");
35 |
36 | loadProgram();
37 | run();
38 |
39 | return 0;
40 | }
41 |
--------------------------------------------------------------------------------
/jackc/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required (VERSION 2.8)
2 | project(jackc)
3 |
4 | set (CMAKE_CXX_STANDARD 11)
5 | if (CMAKE_CXX_COMPILER_ID MATCHES "GCC" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
6 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror")
7 | endif()
8 |
9 | include_directories (include)
10 | aux_source_directory (src JACKC_LIB_SRC)
11 | add_library (jackclib "${JACKC_LIB_SRC}")
12 | add_executable (${PROJECT_NAME} driver/jackc.cpp)
13 | target_link_libraries (${PROJECT_NAME} jackclib)
14 |
--------------------------------------------------------------------------------
/jackc/driver/jackc.cpp:
--------------------------------------------------------------------------------
1 | #include "Analyzer.h"
2 | #include "CodeGen.h"
3 | #include "Parser.h"
4 | #include "Scanner.h"
5 | #include
6 | #include
7 | #include
8 |
9 | using namespace std;
10 |
11 | int main(int argc, char *argv[]) {
12 | if (argc < 2) {
13 | cerr << "usage: " << argv[0] << " " << endl;
14 | exit(-1);
15 | }
16 | ifstream fin;
17 | vector filenames;
18 | for (int i = 1; i < argc; i++) {
19 | fin.open(argv[i]);
20 | if (fin.fail()) {
21 | cerr << "file '" << argv[i] << "' not exist!";
22 | exit(-1);
23 | }
24 | filenames.push_back(argv[i]);
25 | fin.close();
26 | }
27 | filenames.push_back("./api/Sys.jack");
28 | filenames.push_back("./api/Math.jack");
29 | filenames.push_back("./api/Array.jack");
30 | filenames.push_back("./api/Memory.jack");
31 | filenames.push_back("./api/String.jack");
32 | filenames.push_back("./api/Output.jack");
33 | filenames.push_back("./api/Input.jack");
34 | filenames.push_back("./api/IO.jack");
35 | Parser parser(filenames);
36 | parser.parse_program();
37 | if (!hasError()) {
38 | Analyzer analyzer(parser.getSyntaxTree());
39 | analyzer.check();
40 | }
41 | if (!hasError()) {
42 | CodeGen cgen;
43 | cgen.write(parser.getSyntaxTree());
44 | }
45 |
46 | return 0;
47 | }
48 |
--------------------------------------------------------------------------------
/jackc/include/Analyzer.h:
--------------------------------------------------------------------------------
1 | #ifndef _ANALYZER_H
2 | #define _ANALYZER_H
3 |
4 | #include "Error.h"
5 | #include "Parser.h"
6 | #include "SymbolTable.h"
7 | #include
8 |
9 | class Analyzer {
10 | private:
11 | Parser::TreeNode *tree;
12 | SymbolTable *symbolTable;
13 | string currentClassName; // 遍历树的时候, 保存当前类的名称
14 | string currentFunctionName; // 遍历树的时候, 保存当前函数的名称
15 | void buildClassesTable(Parser::TreeNode *t);
16 | void checkStatements(Parser::TreeNode *t);
17 | void checkStatement(Parser::TreeNode *t);
18 | void checkExpression(Parser::TreeNode *t);
19 | void checkArguments(Parser::TreeNode *t, vector const& parameter,
20 | string const& functionName);
21 | void checkMain();
22 |
23 | public:
24 | Analyzer(Parser::TreeNode *t);
25 | void check();
26 | };
27 |
28 | #endif
29 |
--------------------------------------------------------------------------------
/jackc/include/CodeGen.h:
--------------------------------------------------------------------------------
1 | #ifndef _CODE_GEN_H
2 | #define _CODE_GEN_H
3 |
4 | #include "Parser.h"
5 | #include "SymbolTable.h"
6 | #include
7 | #include
8 |
9 | using namespace std;
10 |
11 | class CodeGen {
12 | private:
13 | enum Segment { CONST, ARG, LOCAL, STATIC, THIS, THAT, POINTER, TEMP };
14 | enum Command { ADD, SUB, NEG, EQ, GT, LT, AND, OR, NOT };
15 | SymbolTable *symbolTable;
16 | Parser::TreeNode *tree;
17 | string currentClassName;
18 | string currentFunctionName;
19 | int ifLabelCount;
20 | int whileLabelCount;
21 | ofstream fout;
22 | bool isMethod;
23 | void writePush(Segment segment, int index);
24 | void writePop(Segment segment, int index);
25 | void writeArithmetic(Command command);
26 | void writeLabel(string const& label);
27 | void writeGoto(string const& label);
28 | void writeIf(string const& label);
29 | void writeCall(string const& name, int nArgs);
30 | void writeFunction(string const& name, int nArgs);
31 | void writeReturn();
32 | void writeExpression(Parser::TreeNode *t);
33 | void writeCallExpression(Parser::TreeNode *t);
34 | void translateCall(Parser::TreeNode *t);
35 |
36 | public:
37 | CodeGen();
38 | void translate(Parser::TreeNode *t);
39 | void write(Parser::TreeNode *t);
40 | };
41 |
42 | #endif
43 |
--------------------------------------------------------------------------------
/jackc/include/Error.h:
--------------------------------------------------------------------------------
1 | #ifndef _ERROR_H
2 | #define _ERROR_H
3 |
4 | #include "Scanner.h"
5 |
6 | extern int errorNum;
7 |
8 | bool hasError();
9 | void syntaxError(string currentParserFilename, string expected,
10 | Scanner::Token token);
11 |
12 | // 类名和函数名不一致
13 | void error1(string currentParserFilename);
14 |
15 | // 变量重定义
16 | void error2(string currentClass, int row, string type, string name);
17 |
18 | // 函数重定义
19 | void error3(string currentClass, int row, string type, string name);
20 |
21 | // 类型未定义
22 | void error4(string currentClassName, int row, string type);
23 |
24 | // 变量未定义
25 | void error5(string currentClassName, int row, string varName);
26 |
27 | // 类型不匹配
28 | void error6(string currentClassName, int row, string type);
29 |
30 | // 函数未定义
31 | void error7(string currentClassName, string callerName, int row,
32 | string functionName);
33 |
34 | // 函数类型不一样
35 | void error8(string currentClassName, int row, string functionName);
36 |
37 | // 函数类型错误
38 | void error9(string currentClassName, string callerName, int row,
39 | string functionName);
40 |
41 | // 函数类型错误
42 | void error10(string currentClassName, string callerName, int row,
43 | string functionName);
44 |
45 | // 返回值错误
46 | void error11(string currentClassName, string type, int row);
47 |
48 | // 返回值错误
49 | void error12(string currentClassName, int row);
50 |
51 | // 返回值错误
52 | void error13(string currentClassName, int row);
53 |
54 | // 参数太少
55 | void error14(string currentClassName, string functionName, int row);
56 |
57 | // 参数太多
58 | void error15(string currentClassName, string functionName, int row);
59 |
60 | void error16();
61 |
62 | void error17();
63 |
64 | void error18();
65 |
66 | void error19();
67 |
68 | void error20();
69 |
70 | #endif
71 |
--------------------------------------------------------------------------------
/jackc/include/Parser.h:
--------------------------------------------------------------------------------
1 | #ifndef _PARSER_H
2 | #define _PARSER_H
3 |
4 | #include "Error.h"
5 | #include "Scanner.h"
6 | #include
7 | #include
8 | #include
9 |
10 | using namespace std;
11 |
12 | class Parser {
13 | public:
14 | enum NodeKind {
15 | None,
16 | CLASS_K,
17 | CLASS_VAR_DEC_K,
18 | SUBROUTINE_DEC_K,
19 | BASIC_TYPE_K,
20 | CLASS_TYPE_K,
21 | NULL_K,
22 | PARAM_K,
23 | VAR_DEC_K,
24 | ARRAY_K,
25 | VAR_K,
26 | IF_STATEMENT_K,
27 | WHILE_STATEMENT_K,
28 | CALL_EXPRESSION_K,
29 | RETURN_STATEMENT_K,
30 | CALL_STATEMENT_K,
31 | BOOL_EXPRESSION_K,
32 | FUNCTION_CALL_K,
33 | CONSTRUCTOR_CALL_K,
34 | COMPARE_K,
35 | OPERATION_K,
36 | BOOL_K,
37 | ASSIGN_K,
38 | SUBROUTINE_BODY_K,
39 | BOOL_CONST_K,
40 | NEGATIVE_K,
41 | METHOD_CALL_K,
42 | INT_CONST_K,
43 | CHAR_CONST_K,
44 | STRING_CONST_K,
45 | KEY_WORD_CONST,
46 | THIS_K
47 | };
48 | class TreeNode {
49 | public:
50 | Scanner::Token token;
51 | TreeNode *child[5];
52 | TreeNode *next;
53 | NodeKind nodeKind;
54 | TreeNode() {
55 | nodeKind = None;
56 | child[0] = child[1] = child[2] = child[3] = child[4] = nullptr;
57 | next = nullptr;
58 | }
59 | };
60 |
61 | private:
62 | vector filenames;
63 | string currentParserFilename;
64 | TreeNode *syntaxTree;
65 | Scanner scanner;
66 | bool haveReturnStatement; // 要保证每个函数都有return语句, 即使返回值为void
67 |
68 | Scanner::Token getToken(); // 从缓冲区中取出一个token
69 | Scanner::Token ungetToken(); // 把上一次取出的token放入到缓冲区中
70 | deque tokenBuffer1; // 左缓冲区
71 | deque tokenBuffer2; // 右缓冲区
72 | string getFullName(string name); // 返回
73 |
74 | TreeNode *parseClassList();
75 | TreeNode *parseClass();
76 | TreeNode *parseClassVarDecList();
77 | TreeNode *parseClassVarDec();
78 | TreeNode *parseVarNameList();
79 | TreeNode *parseType();
80 | TreeNode *parseSubroutineDecList();
81 | TreeNode *parseSubroutinDec();
82 | TreeNode *parseParams();
83 | TreeNode *parseParamList();
84 | TreeNode *parseParam();
85 | TreeNode *parseSubroutineBody();
86 | TreeNode *parseVarDecList();
87 | TreeNode *parseVarDec();
88 | TreeNode *parseStatements();
89 | TreeNode *parseStatement();
90 | TreeNode *parseAssignStatement();
91 | TreeNode *parseLeftValue();
92 | TreeNode *parseIfStatement();
93 | TreeNode *parseWhileStatement();
94 | TreeNode *parseReturnStatement();
95 | TreeNode *parseCallStatement();
96 | TreeNode *parseExpressions();
97 | TreeNode *parseExpressionList();
98 | TreeNode *parseExpression();
99 | TreeNode *parseBoolExpression();
100 | TreeNode *parseAdditiveExpression();
101 | TreeNode *parseTerm();
102 | TreeNode *parseFactor();
103 | TreeNode *parsePositiveFactor();
104 | TreeNode *parseNotFactor();
105 | TreeNode *parseCallExpression();
106 |
107 | void printSyntaxTree(TreeNode *tree);
108 |
109 | public:
110 | Parser(vector &filenames);
111 | bool hasError();
112 | TreeNode *getSyntaxTree();
113 | void print();
114 | void parse_program();
115 | static string getCallerName(string const& fullName);
116 | static string getFunctionName(string const &fullName);
117 | };
118 |
119 | #endif
120 |
--------------------------------------------------------------------------------
/jackc/include/Scanner.h:
--------------------------------------------------------------------------------
1 | #ifndef _Scanner_H_
2 | #define _Scanner_H_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | using namespace std;
10 |
11 | class Scanner {
12 | private:
13 | enum State // 转移图中的状态
14 | { START_STATE, // 开始状态
15 | ID_STATE, // 标识符状态
16 | INT_STATE, // 整型数状态
17 | CHAR_STATE, // 字符状态
18 | CHAR_STATE_A,
19 | CHAR_STATE_B,
20 | CHAR_STATE_C,
21 | FLOAT_STATE, // 浮点数状态
22 | D_FLOAT_STATE, // 接近带小数点的浮点数状态
23 | E_FLOAT_STATE, // 接近科学技术法的浮点数状态
24 | STRING_STATE, // 字符串状态
25 | S_STRING_STATE, // 含有转移字符的字符串
26 | SYMBOL_STATE,
27 | INCOMMENT_STATE, // 注释状态
28 | P_INCOMMENT_STATE, // 快要结束注释状态
29 | DONE_STATE, // 结束状态
30 | ERROR_STATE // 错误状态
31 | };
32 |
33 | public:
34 | set keyWords;
35 | set symbols;
36 | enum TokenType {
37 | KEY_WORD,
38 | ID, // 标识符
39 | INT, // 整型数字
40 | BOOL, // 布尔类型
41 | CHAR, // 字符
42 | STRING, // 字符串
43 | SYMBOL, // 合法的符号
44 | NONE, // 无类型
45 | ERROR, // 错误
46 | ENDOFFILE // 文件结束
47 | };
48 | struct Token {
49 | TokenType kind; // Token的类型
50 | string lexeme; // Token的值
51 | unsigned row; // 当前行
52 | };
53 | void initKeyWords();
54 | void initSymbols();
55 |
56 | private:
57 | string lineBuffer; // 缓冲行, 保存源程序中的一行数据
58 | unsigned bufferPos; // 缓冲行的指针
59 | unsigned row; // 保存当前缓冲行在源程序中的行号
60 | ifstream fin; // 源程序文件的输入流对象
61 | char nextChar(); // 返回缓冲区中的下一个字符
62 | void rollBack(); // 回滚缓冲区
63 | TokenType searchReserved(string const&s); // 查找关键字
64 | public:
65 | Scanner();
66 | void openFile(string const& filename);
67 | void closeFile();
68 | Token nextToken(); // 返回下一个Token
69 | void resetRow();
70 | };
71 |
72 | #endif
73 |
--------------------------------------------------------------------------------
/jackc/include/SymbolTable.h:
--------------------------------------------------------------------------------
1 | #ifndef _SYMBOL_TABLE_H
2 | #define _SYMBOL_TABLE_H
3 |
4 | #include "Parser.h"
5 | #include