├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cast.cpp
├── cast.h
├── cexception.cpp
├── cexception.h
├── cgui.cpp
├── cgui.h
├── clexer.cpp
├── clexer.h
├── cparser.cpp
├── cparser.h
├── csub.cpp
├── csub.h
├── cunit.cpp
├── cunit.h
├── cvm.cpp
├── cvm.h
├── main.cpp
├── memory.h
├── memory_gc.h
├── screenshots
└── 1.png
├── test.cpp
├── types.cpp
└── types.h
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | cmake-build-*
3 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.12)
2 | project(cliblisp)
3 |
4 | set(CMAKE_CXX_STANDARD 14)
5 |
6 | link_libraries(freeglut opengl32 glu32)
7 |
8 | add_executable(cliblisp
9 | main.cpp
10 | memory.h
11 | memory_gc.h
12 | types.h
13 | types.cpp
14 | clexer.h
15 | clexer.cpp
16 | cparser.h
17 | cparser.cpp
18 | cunit.h
19 | cunit.cpp
20 | cexception.h
21 | cexception.cpp
22 | cast.h
23 | cast.cpp
24 | cvm.cpp
25 | cvm.h
26 | csub.cpp
27 | csub.h
28 | cgui.cpp
29 | cgui.h)
30 |
31 | add_executable(cliblisp-test
32 | test.cpp
33 | memory.h
34 | memory_gc.h
35 | types.h
36 | types.cpp
37 | clexer.h
38 | clexer.cpp
39 | cparser.h
40 | cparser.cpp
41 | cunit.h
42 | cunit.cpp
43 | cexception.h
44 | cexception.cpp
45 | cast.h
46 | cast.cpp
47 | cvm.cpp
48 | cvm.h
49 | csub.cpp
50 | csub.h
51 | cgui.cpp
52 | cgui.h)
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # cliblisp(C++ 简易LISP解释器 + 通用LR语法分析)
2 |
3 | 借鉴[CMiniLang](https://github.com/bajdcc/CMiniLang)的部分代码。
4 |
5 | 主要借鉴了CMiniLang的类型系统、词法分析、语法分析、AST、内存管理等代码(均为原创)。
6 |
7 | 事实证明CMiniLang的框架还是非常经典耐用的(再次强调)。
8 |
9 | **语法分析采用LR分析。项目见:[clibparser](https://github.com/bajdcc/clibparser)。**
10 |
11 | - 文法书写方式:以C++重载为基础的Parser Generator。
12 | - 识别方式:**以下推自动机为基础,向看查看一个字符、带回溯的LR分析**。
13 | - 内存管理:自制内存池。
14 |
15 | 本说明完善中,**末尾有测试用例**。
16 |
17 | 注:经[Qlib2d](https://github.com/bajdcc/Qlib2d)项目测试,本项目于**x64**环境下也可编译成功。
18 |
19 | ## 截图
20 |
21 | 
22 |
23 | ## 文章
24 |
25 | - [【Lisp系列】开篇](http://zhuanlan.zhihu.com/p/45897626)
26 | - [【Lisp系列】实现四则运算](http://zhuanlan.zhihu.com/p/46723048)
27 | - [【Lisp系列】实现GC](http://zhuanlan.zhihu.com/p/46993463)
28 | - [【Lisp系列】实现Lambda](http://zhuanlan.zhihu.com/p/47309037)
29 | - [【Lisp系列】大功告成](http://zhuanlan.zhihu.com/p/47569910)
30 | - [【Lisp系列】手动递归](http://zhuanlan.zhihu.com/p/47869195)
31 |
32 | - [【Parser系列】实现LR分析——开篇](https://zhuanlan.zhihu.com/p/52478414)
33 | - [【Parser系列】实现LR分析——生成AST](https://zhuanlan.zhihu.com/p/52528516)
34 | - [【Parser系列】实现LR分析——支持C语言文法](https://zhuanlan.zhihu.com/p/52812144)
35 | - [【Parser系列】实现LR分析——完成编译器前端!](https://zhuanlan.zhihu.com/p/53070412)
36 |
37 | ## 功能
38 |
39 | 当前完成了四则运算和常用函数,采用解释器求值。
40 |
41 | **运行时所有对象采用标识回收GC,采用不可变值,传递拷贝。**
42 |
43 | 已实现:引用,变量,函数,四则,比较,递归,闭包,if,测试用例。
44 |
45 | 已实现**Y-combinator**,见测试用例#47-#49,由于递归运算会大量消耗内存,因此必要时需更改cvm.h中的**VM_MEM**宏的值为更大值。
46 |
47 | **改进:将eval调用转化为手动调归,使得递归可以人工控制,后续可能将出错机制从throw方式转变为手动调归跳出方式。测试:除大数溢出外,其余均通过。**
48 |
49 | - [x] 词法分析
50 | - [x] 语法分析
51 | - [x] 内存管理
52 | - [x] 序列化
53 | - [x] 识别数字
54 | - [x] 识别S-表达式
55 | - [x] 识别Q-表达式
56 | - [x] GC
57 | - [x] 运行时环境
58 | - [x] 异常恢复
59 | - [x] 简单的内建四则运算
60 | - [x] Subroutine和Symbol
61 | - [x] nil
62 | - [x] 常用内建函数
63 | - [ ] 输入
64 | - [x] 输出
65 | - [x] 字符串处理
66 | - [x] 识别变量,设置变量
67 | - [x] 识别函数Lambda
68 | - [x] 支持递归
69 | - [ ] 完善控制流:if
70 | - [ ] 更多测试用例
71 | - [ ] 添加更多功能
72 |
73 | 内建函数:
74 |
75 | - 四则运算
76 | - 比较运算
77 | - lambda
78 | - eval
79 | - quote
80 | - list
81 | - cons
82 | - car
83 | - cdr
84 | - def
85 | - if
86 | - len
87 | - append
88 |
89 | ## 调试信息
90 |
91 | - cval结点内存申请情况
92 | - GC释放情况
93 | - 内存池结点情况
94 | - GC中的栈对象引用树
95 |
96 | 生成NGA图,去EPSILON化,生成PDA表,生成AST。
97 |
98 | 以下为下推自动机的识别过程(**太长,略**),如需查看,请修改cparser.cpp中的:
99 |
100 | ```cpp
101 | #define TRACE_PARSING 0
102 | #define DUMP_PDA 0
103 | #define DEBUG_AST 0
104 | #define CHECK_AST 0
105 | ```
106 |
107 | 将值改为1即可。
108 |
109 | ## 使用
110 |
111 | ```cpp
112 | int main(int argc, char *argv[]) {
113 | clib::cvm vm;
114 | std::string input;
115 | while (true) {
116 | std::cout << "lisp> ";
117 | std::getline(std::cin, input);
118 | if (input == "exit")
119 | break;
120 | if (input.empty())
121 | continue;
122 | try {
123 | vm.save();
124 | clib::cparser p(input);
125 | auto root = p.parse();
126 | //clib::cast::print(root, 0, std::cout);
127 | auto val = vm.run(root);
128 | clib::cvm::print(val, std::cout);
129 | std::cout << std::endl;
130 | vm.gc();
131 | } catch (const std::exception &e) {
132 | printf("RUNTIME ERROR: %s\n", e.what());
133 | vm.restore();
134 | vm.gc();
135 | }
136 | }
137 | return 0;
138 | }
139 | ```
140 |
141 | ## 例子
142 |
143 | ```lisp
144 | lisp> + 1 2
145 | 3
146 | lisp> * 1 2 3 4 5 6
147 | 720
148 | lisp> - 8 4 2 9 8
149 | -15
150 | lisp> + a 5
151 | + a 5
152 | COMPILER ERROR: unsupported calc op
153 | RUNTIME ERROR: std::exception
154 | lisp> + "Hello" " " "world!"
155 | "Hello world!"
156 | lisp> eval 5
157 | 5
158 | lisp> eval `(+ 1 2)
159 | 3
160 | lisp> eval (+ 1 2)
161 | 3
162 | lisp> `a
163 | `a
164 | lisp> `(a b c)
165 | `(a b c)
166 | ```
167 |
168 | ### Subroutine
169 |
170 | ```lisp
171 | lisp> + "Project: " __project__ ", author: " __author__
172 | "Project: cliblisp, author: bajdcc"
173 | lisp> +
174 |
175 | ```
176 |
177 | ### List
178 |
179 | ```lisp
180 | lisp> list
181 |
182 | lisp> car (list 1 2 3)
183 | 1
184 | lisp> cdr (list 1 2 3)
185 | `(2 3)
186 | lisp> (eval (car (list + - * /))) 1 1
187 | 2
188 | ```
189 |
190 | ### Builtin
191 |
192 | ```lisp
193 | lisp> def `(a b c d) 1 2 3 4
194 | nil
195 | lisp> + a b c d
196 | 10
197 | ```
198 |
199 | ### Lambda
200 |
201 | ```lisp
202 | lisp> def `a (\ `(x y) `(+ x y))
203 | nil
204 | lisp> a
205 |
206 | lisp> a 1 2 3
207 | 6
208 | ```
209 |
210 | ### comparison
211 |
212 | ```lisp
213 | lisp> def `a (\ `(x y) `(+ x y))
214 | nil
215 | lisp> == (a 1 2) (a 2 1)
216 | 1
217 | lisp> < (a 1 2) (a 1 1)
218 | 0
219 | ```
220 |
221 | ### recursion
222 |
223 | ```lisp
224 | lisp> def `sum (\ `(n) `(if (> n 0) `(+ n (sum (- n 1))) `0))
225 | nil
226 | lisp> sum 100
227 | 5050
228 | lisp> sum (- 0 5)
229 | 0
230 | lisp> def `len (\ `l `(if (== l nil) `0 `(+ 1 (len (cdr l)))))
231 | nil
232 | lisp> len (list 1 2 3 )
233 | 3
234 | ```
235 |
236 | ## 测试用例
237 |
238 | 在目录下的test.cpp中。
239 |
240 | ```cpp
241 | TEST #1> [PASSED] (+ 1 2) => 3
242 | TEST #2> [PASSED] (* 1 2 3 4 5 6) => 720
243 | TEST #3> [PASSED] (- 8 4 2 9 8) => -15
244 | TEST #4> [PASSED] (+ "Hello" " " "world!") => "Hello world!"
245 | TEST #5> [PASSED] (eval 5) => 5
246 | TEST #6> [PASSED] (eval `(+ 1 2)) => 3
247 | TEST #7> [PASSED] (eval (+ 1 2)) => 3
248 | TEST #8> [PASSED] `a => `a
249 | TEST #9> [PASSED] `(a b c) => `(a b c)
250 | TEST #10> [PASSED] (+ "Project: " __project__ ", author: " __author__) => "Project: cliblisp, author: bajdcc"
251 | TEST #11> [PASSED] + =>
252 | TEST #12> [PASSED] (quote (testing 1 2 -3.14e+159)) => `(testing 1 2 -3.14e+159)
253 | TEST #13> [PASSED] (+ 2 2) => 4
254 | TEST #14> [PASSED] (+ (* 2 100) (* 1 10)) => 210
255 | TEST #15> [PASSED] (if (> 6 5) `(+ 1 1) `(+ 2 2)) => 2
256 | TEST #16> [PASSED] (if (< 6 5) `(+ 1 1) `(+ 2 2)) => 4
257 | TEST #17> [PASSED] (def `x 3) => 3
258 | TEST #18> [PASSED] x => 3
259 | TEST #19> [PASSED] (+ x x) => 6
260 | TEST #20> [PASSED] (begin (def `x 1) (def `x (+ x 1)) (+ x 1)) => 3
261 | TEST #21> [PASSED] ((\ `x `(+ x x)) 5) => 10
262 | TEST #22> [PASSED] (def `twice (\ `x `(* 2 x))) =>
263 | TEST #23> [PASSED] (twice 5) => 10
264 | TEST #24> [PASSED] (def `compose (\ `(f g) `(\ `x `(f (g x))))) =>
265 | TEST #25> [PASSED] ((compose list twice) 5) => `10
266 | TEST #26> [PASSED] (def `repeat (\ `f `(compose f f))) =>
267 | TEST #27> [PASSED] ((repeat twice) 5) => 20
268 | TEST #28> [PASSED] ((repeat (repeat twice)) 5) => 80
269 | TEST #29> [PASSED] (def `fact (\ `n `(if (<= n 1) `1 `(* n (fact (- n 1)))))) =>
271 | TEST #30> [PASSED] (fact 3) => 6
272 | TEST #31> [ERROR ] (fact 50) => 0 REQUIRE: 30414093201713378043612608166064768844377641568960512000000000000
273 | TEST #32> [PASSED] (fact 12) => 479001600
274 | TEST #33> [PASSED] (def `abs (\ `n `((if (> n 0) `+ `-) 0 n))) => n 0) `+ `-) 0 n)>
275 | TEST #34> [PASSED] (abs -3) => 3
276 | TEST #35> [PASSED] (list (abs -3) (abs 0) (abs 3)) => `(3 0 3)
277 | TEST #36> [PASSED] (def `combine (\ `f `(\ `(x y) `(if (null? x) `nil `(f (list (car x) (car y)) ((combine f) (cdr x) (c
278 | dr y))))))) =>
279 | TEST #37> [PASSED] (def `zip (combine cons)) =>
281 | TEST #38> [PASSED] (zip (list 1 2 3 4) (list 5 6 7 8)) => `(`(1 5) `(2 6) `(3 7) `(4 8))
282 | TEST #39> [PASSED] (def `riff-shuffle (\ `deck `(begin (def `take (\ `(n seq) `(if (<= n 0) `nil `(cons (car seq) (take
283 | (- n 1) (cdr seq)))))) (def `drop (\ `(n seq) `(if (<= n 0) `seq `(drop (- n 1) (cdr seq))))) (def `mid (\ `seq `(/ (len
284 | seq) 2))) ((combine append) (take (mid deck) deck) (drop (mid deck) deck))))) =>
288 | TEST #40> [PASSED] (riff-shuffle (list 1 2 3 4 5 6 7 8)) => `(1 5 2 6 3 7 4 8)
289 | TEST #41> [PASSED] ((repeat riff-shuffle) (list 1 2 3 4 5 6 7 8)) => `(1 3 5 7 2 4 6 8)
290 | TEST #42> [PASSED] (riff-shuffle (riff-shuffle (riff-shuffle (list 1 2 3 4 5 6 7 8)))) => `(1 2 3 4 5 6 7 8)
291 | TEST #43> [PASSED] (def `apply (\ `(item L) `(eval (cons item L)))) =>
292 | TEST #44> [PASSED] (apply + `(1 2 3)) => 6
293 | TEST #45> [PASSED] (def `sum (\ `n `(if (< n 2) `1 `(+ n (sum (- n 1)))))) =>
295 | TEST #46> [PASSED] (sum 10) => 55
296 | TEST #47> [PASSED] (def `Y (\ `f `((\ `self `(f (\ `x `((self self) x)))) (\ `self `(f (\ `x `((self self) x))))))) =>
297 |
298 | TEST #48> [PASSED] (def `Y_fib (\ `f `(\ `n `(if (<= n 2) `1 `(+ (f (- n 1)) (f (- n 2))))))) =>
300 | TEST #49> [PASSED] ((Y Y_fib) 5) => 5
301 | TEST #50> [PASSED] (def `range (\ `(a b) `(if (== a b) `nil `(cons a (range (+ a 1) b))))) =>
303 | TEST #51> [PASSED] (range 1 10) => `(1 2 3 4 5 6 7 8 9)
304 | TEST #52> [PASSED] (apply + (range 1 10)) => 45
305 | ==== ALL TEST PASSED [51/52] ====
306 | ```
307 |
308 | ## 目标
309 |
310 | 1. [ ] 对内存使用进行优化,减少不必要的拷贝操作。
311 | 2. [ ] 添加更多测试用例,确保GC工作的可靠性,避免循环引用与僵尸引用。
312 | 3. [ ] 用LISP语言实现高阶函数。
313 |
314 | ## 目标
315 |
316 | 当前进展:
317 |
318 | - [x] 生成文法表达式
319 | - [x] 序列
320 | - [x] 分支
321 | - [x] 可选
322 | - [x] 跳过单词
323 | - [x] 生成LR项目
324 | - [x] 生成非确定性文法自动机
325 | - [x] 闭包求解
326 | - [x] 去Epsilon
327 | - [x] 打印NGA结构
328 | - [x] 生成下推自动机
329 | - [x] 求First集合,并输出
330 | - [x] 检查文法有效性(如不产生Epsilon)
331 | - [x] 检查纯左递归
332 | - [x] 生成PDA
333 | - [x] 打印PDA结构(独立于内存池)
334 | - [x] 生成抽象语法树
335 | - [x] 自动生成AST结构
336 | - [x] 美化AST结构
337 | - [ ] 语义动作
338 | - [x] 设计语言
339 | - [x] 使用[C语言文法](https://github.com/antlr/grammars-v4/blob/master/c/C.g4)
340 | - [x] 实现回溯,解决移进/归约冲突问题,解决回溯的诸多BUG
341 | - [x] 实现LISP的循环
342 | - [ ] LISP虚拟机
343 | - [x] 创建窗口
344 | - [ ] 更多内置指令
345 | - [ ] 控制台交互
346 | - [x] 图形交互
347 | - [ ] 模拟操作系统
348 |
349 | 1. 将文法树转换表(完成)
350 | 2. 根据PDA表生成AST(完成)
351 |
352 | ## 改进
353 |
354 | 1. [x] ~~修改了Lexer识别数字的问题~~
355 | 2. [x] ~~优化了内存池合并块算法,当没有元素被使用时将重置~~
356 | 3. [x] ~~添加错误恢复功能,异常时恢复GC的存储栈大小~~
357 | 4. [x] ~~更改了字符串管理方式,设为不可变~~
358 | 5. [x] ~~GC申请内存后自动清零~~
359 | 6. [x] ~~内存池算法可能存在问题,导致不定时崩溃~~
360 | 7. [x] ~~解决了数字溢出的问题~~
361 | 8. [x] ~~解决函数闭包问题~~
362 | 9. [x] ~~改进cons的实现~~
363 | 10. [x] ~~当使用函数不存在时发生崩溃,原因为内存池容量不够~~
364 | 11. [x] ~~调用递归时,环境变量env被free的问题,已修正~~
365 | 12. [x] ~~修正了类似double后缀的数字识别问题~~
366 | 13. [x] ~~修正了def函数无法修改外界环境的问题~~
367 | 14. [x] ~~修正了内存池申请的bug:删去为头时的情况;改正申请时块大小为size+1~~
368 | 15. [x] ~~修正了GC中unlink的bug~~
369 |
370 | - [ ] 生成LR项目集时将@符号提到集合的外面,减少状态
371 | - [x] PDA表的生成时使用了内存池来保存结点,当生成PDA表后,内存池可以全部回收
372 | - [x] 生成AST时减少嵌套结点
373 | - [ ] 优化回溯时产生的数据结构,减少拷贝
374 | - [ ] 解析成功时释放结点内存
375 | - [x] 将集合结点的标记修改成枚举
376 | - [ ] 设置的终结符可以不添加到语法树中
377 |
378 | ## 参考
379 |
380 | 1. [CMiniLang](https://github.com/bajdcc/CMiniLang)
381 | 2. [lysp](http://piumarta.com/software/lysp/lysp-1.1/lysp.c)
382 | 3. [MyScript](https://github.com/bajdcc/MyScript)
383 | 4. [Build Your Own Lisp](http://buildyourownlisp.com)
384 | 5. [Lisp interpreter in 90 lines of C++](http://howtowriteaprogram.blogspot.com/2010/11/lisp-interpreter-in-90-lines-of-c.html)
385 | 6. [CParser](https://github.com/bajdcc/CParser)
386 | 7. [vczh GLR Parser](https://github.com/vczh-libraries/Vlpp/tree/master/Source/Parsing)
--------------------------------------------------------------------------------
/cast.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Project: cliblisp
3 | // Created by bajdcc
4 | //
5 |
6 | #include
7 | #include
8 | #include "cast.h"
9 |
10 | namespace clib {
11 |
12 | cast::cast() {
13 | init();
14 | }
15 |
16 | void cast::init() {
17 | root = new_node(ast_root);
18 | current = root;
19 | }
20 |
21 | ast_node *cast::get_root() const {
22 | return root;
23 | }
24 |
25 | ast_node *cast::new_node(ast_t type) {
26 | if (nodes.available() < 64) {
27 | printf("AST ERROR: 'nodes' out of memory\n");
28 | throw std::exception();
29 | }
30 | auto node = nodes.alloc();
31 | memset(node, 0, sizeof(ast_node));
32 | node->flag = type;
33 | return node;
34 | }
35 |
36 | ast_node *cast::set_child(ast_node *node, ast_node *child) {
37 | child->parent = node;
38 | if (node->child == nullptr) { // 没有孩子
39 | node->child = child;
40 | child->prev = child->next = child;
41 | } else { // 有孩子,添加到末尾
42 | child->prev = node->child->prev;
43 | child->next = node->child;
44 | node->child->prev->next = child;
45 | node->child->prev = child;
46 | }
47 | return node;
48 | }
49 |
50 | ast_node *cast::set_sibling(ast_node *node, ast_node *sibling) {
51 | sibling->parent = node->parent;
52 | sibling->prev = node;
53 | sibling->next = node->next;
54 | node->next = sibling;
55 | return sibling;
56 | }
57 |
58 | int cast::children_size(ast_node *node) {
59 | if (!node || !node->child)
60 | return 0;
61 | node = node->child;
62 | auto i = node;
63 | auto n = 0;
64 | do {
65 | n++;
66 | i = i->next;
67 | assert(i);
68 | } while (i != node);
69 | return n;
70 | }
71 |
72 | ast_node *cast::add_child(ast_node *node) {
73 | return set_child(current, node);
74 | }
75 |
76 | ast_node *cast::new_child(ast_t type, bool step) {
77 | auto node = new_node(type);
78 | set_child(current, node);
79 | if (step)
80 | current = node;
81 | return node;
82 | }
83 |
84 | ast_node *cast::new_sibling(ast_t type, bool step) {
85 | auto node = new_node(type);
86 | set_sibling(current, node);
87 | if (step)
88 | current = node;
89 | return node;
90 | }
91 |
92 | void cast::remove(ast_node *node) {
93 | if (node->parent && node->parent->child == node) {
94 | if (node->next == node) {
95 | node->parent->child = nullptr;
96 | } else {
97 | node->parent->child = node->next;
98 | }
99 | }
100 | if (node->prev && node->prev != node) {
101 | node->prev->next = node->next;
102 | }
103 | if (node->next && node->next != node) {
104 | node->next->prev = node->prev;
105 | }
106 | if (node->child) {
107 | auto f = node->child;
108 | auto i = f;
109 | i->parent = nullptr;
110 | if (i->next != f) {
111 | i = i->next;
112 | do {
113 | i->parent = nullptr;
114 | i = i->next;
115 | } while (i != f);
116 | }
117 | }
118 | nodes.free(node);
119 | }
120 |
121 | void cast::to(ast_to_t type) {
122 | switch (type) {
123 | case to_parent:
124 | current = current->parent;
125 | break;
126 | case to_prev:
127 | current = current->prev;
128 | break;
129 | case to_next:
130 | current = current->next;
131 | break;
132 | case to_child:
133 | current = current->child;
134 | break;
135 | }
136 | }
137 |
138 | void cast::set_str(ast_node *node, const string_t &str) {
139 | if (strings.available() < 64) {
140 | printf("AST ERROR: 'strings' out of memory\n");
141 | throw std::exception();
142 | }
143 | auto len = str.length();
144 | auto s = strings.alloc_array(len + 1);
145 | memcpy(s, str.c_str(), len);
146 | s[len] = 0;
147 | node->data._string = s;
148 | }
149 |
150 | std::string cast::display_str(const char *str) {
151 | std::stringstream ss;
152 | for (auto c = str; *c != 0; c++) {
153 | if (*c < 0) {
154 | ss << *c;
155 | } else if (isprint(*c)) {
156 | ss << *c;
157 | } else {
158 | if (*c == '\n')
159 | ss << "\\n";
160 | else
161 | ss << ".";
162 | }
163 | }
164 | return ss.str();
165 | }
166 |
167 | void cast::reset() {
168 | nodes.clear();
169 | strings.clear();
170 | init();
171 | }
172 |
173 | template
174 | static void ast_recursion(ast_node *node, int level, std::ostream &os, T f) {
175 | if (node == nullptr)
176 | return;
177 | auto i = node;
178 | if (i->next == i) {
179 | f(i, level, os);
180 | return;
181 | }
182 | f(i, level, os);
183 | i = i->next;
184 | while (i != node) {
185 | f(i, level, os);
186 | i = i->next;
187 | }
188 | }
189 |
190 | void cast::print(ast_node *node, int level, std::ostream &os) {
191 | if (node == nullptr)
192 | return;
193 | auto rec = [&](auto n, auto l, auto &os) { cast::print(n, l, os); };
194 | auto type = (ast_t) node->flag;
195 | switch (type) {
196 | case ast_root: // 根结点,全局声明
197 | ast_recursion(node->child, level, os, rec);
198 | break;
199 | case ast_env:
200 | case ast_sub:
201 | case ast_lambda:
202 | break;
203 | case ast_sexpr:
204 | os << '(';
205 | ast_recursion(node->child, level + 1, os, rec);
206 | os << ')';
207 | break;
208 | case ast_qexpr:
209 | os << '`';
210 | if (node->child && node->child == node->child->next) {
211 | ast_recursion(node->child, level + 1, os, rec);
212 | } else {
213 | os << '(';
214 | ast_recursion(node->child, level + 1, os, rec);
215 | os << ')';
216 | }
217 | break;
218 | case ast_literal:
219 | os << node->data._string;
220 | break;
221 | case ast_string:
222 | os << '"' << display_str(node->data._string) << '"';
223 | break;
224 | case ast_char:
225 | if (isprint(node->data._char))
226 | os << '\'' << node->data._char << '\'';
227 | else if (node->data._char == '\n')
228 | os << "'\\n'";
229 | else
230 | os << "'\\x" << std::setiosflags(std::ios::uppercase) << std::hex
231 | << std::setfill('0') << std::setw(2)
232 | << (unsigned int) node->data._char << '\'';
233 | break;
234 | case ast_uchar:
235 | os << (unsigned int) node->data._uchar;
236 | break;
237 | case ast_short:
238 | os << node->data._short;
239 | break;
240 | case ast_ushort:
241 | os << node->data._ushort;
242 | break;
243 | case ast_int:
244 | os << node->data._int;
245 | break;
246 | case ast_uint:
247 | os << node->data._uint;
248 | break;
249 | case ast_long:
250 | os << node->data._long;
251 | break;
252 | case ast_ulong:
253 | os << node->data._ulong;
254 | break;
255 | case ast_float:
256 | os << node->data._float;
257 | break;
258 | case ast_double:
259 | os << node->data._double;
260 | break;
261 | }
262 | if (node->parent) {
263 | if ((node->parent->flag == ast_qexpr || node->parent->flag == ast_sexpr) &&
264 | node->next != node->parent->child) {
265 | os << ' ';
266 | }
267 | }
268 | }
269 |
270 | void cast::print2(ast_node *node, int level, std::ostream &os) {
271 | if (node == nullptr)
272 | return;
273 | auto rec = [&](auto n, auto l, auto &os) { cast::print2(n, l, os); };
274 | auto type = (ast_t) node->flag;
275 | os << std::setfill(' ') << std::setw(level) << "";
276 | switch (type) {
277 | case ast_root: // 根结点,全局声明
278 | ast_recursion(node->child, level, os, rec);
279 | break;
280 | case ast_collection:
281 | os << COLL_STRING(node->data._coll) << std::endl;
282 | ast_recursion(node->child, level + 1, os, rec);
283 | break;
284 | case ast_keyword:
285 | os << "keyword: " << KEYWORD_STRING(node->data._keyword) << std::endl;
286 | break;
287 | case ast_operator:
288 | os << "operator: " << OP_STRING(node->data._op) << std::endl;
289 | break;
290 | case ast_literal:
291 | os << "id: " << node->data._string << std::endl;
292 | break;
293 | case ast_string:
294 | os << "string: " << '"' << display_str(node->data._string) << '"' << std::endl;
295 | break;
296 | case ast_char:
297 | os << "char: ";
298 | if (isprint(node->data._char))
299 | os << '\'' << node->data._char << '\'';
300 | else if (node->data._char == '\n')
301 | os << "'\\n'";
302 | else
303 | os << "'\\x" << std::setiosflags(std::ios::uppercase) << std::hex
304 | << std::setfill('0') << std::setw(2)
305 | << (unsigned int) node->data._char << '\'';
306 | os << std::endl;
307 | break;
308 | case ast_uchar:
309 | os << "uchar: " << (unsigned int) node->data._uchar << std::endl;
310 | break;
311 | case ast_short:
312 | os << "short: " << node->data._short << std::endl;
313 | break;
314 | case ast_ushort:
315 | os << "ushort: " << node->data._ushort << std::endl;
316 | break;
317 | case ast_int:
318 | os << "int: " << node->data._int << std::endl;
319 | break;
320 | case ast_uint:
321 | os << "uint: " << node->data._uint << std::endl;
322 | break;
323 | case ast_long:
324 | os << "long: " << node->data._long << std::endl;
325 | break;
326 | case ast_ulong:
327 | os << "ulong: " << node->data._ulong << std::endl;
328 | break;
329 | case ast_float:
330 | os << "float: " << node->data._float << std::endl;
331 | break;
332 | case ast_double:
333 | os << "double: " << node->data._double << std::endl;
334 | break;
335 | }
336 | }
337 |
338 | ast_node *cast::index(ast_node *node, int index) {
339 | auto child = node->child;
340 | if (child) {
341 | if (child->next == child) {
342 | return index == 0 ? child : nullptr;
343 | }
344 | auto head = child;
345 | for (auto i = 0; i < index; ++i) {
346 | child = child->next;
347 | if (child == head)
348 | return nullptr;
349 | }
350 | return child;
351 | }
352 | return nullptr;
353 | }
354 |
355 | ast_node *cast::index(ast_node *node, const string_t &index) {
356 | auto child = node->child;
357 | if (child) {
358 | if (child->next == child) {
359 | return index == child->child->data._string ? child : nullptr;
360 | }
361 | auto head = child;
362 | auto i = head;
363 | do {
364 | if (index == i->child->data._string)
365 | return i->child->next;
366 | i = i->next;
367 | } while (i != head);
368 | }
369 | return nullptr;
370 | }
371 |
372 | std::tuple ast_list[] = {
373 | std::make_tuple(ast_root, "root", l_none, 0),
374 | std::make_tuple(ast_collection, "coll", l_none, 0),
375 | std::make_tuple(ast_keyword, "keyword", l_none, 0),
376 | std::make_tuple(ast_operator, "operator", l_operator , 0),
377 | std::make_tuple(ast_literal, "literal", l_identifier, 0),
378 | std::make_tuple(ast_string, "string", l_string, 0),
379 | std::make_tuple(ast_char, "char", l_char, 1),
380 | std::make_tuple(ast_uchar, "uchar", l_uchar, 2),
381 | std::make_tuple(ast_short, "short", l_short, 3),
382 | std::make_tuple(ast_ushort, "ushort", l_ushort, 4),
383 | std::make_tuple(ast_int, "int", l_int, 5),
384 | std::make_tuple(ast_uint, "uint", l_uint, 6),
385 | std::make_tuple(ast_long, "long", l_long, 7),
386 | std::make_tuple(ast_ulong, "ulong", l_ulong, 8),
387 | std::make_tuple(ast_float, "float", l_float, 9),
388 | std::make_tuple(ast_double, "double", l_double, 10),
389 | std::make_tuple(ast_env, "env", l_none, 0),
390 | std::make_tuple(ast_sub, "sub", l_none, 0),
391 | std::make_tuple(ast_lambda, "lambda", l_none, 0),
392 | std::make_tuple(ast_sexpr, "sexpr", l_none, 0),
393 | std::make_tuple(ast_qexpr, "qexpr", l_none, 0),
394 | };
395 |
396 | const string_t &cast::ast_str(ast_t type) {
397 | return std::get<1>(ast_list[type]);
398 | }
399 |
400 | bool cast::ast_equal(ast_t type, lexer_t lex) {
401 | return std::get<2>(ast_list[type]) == lex;
402 | }
403 |
404 | int cast::ast_prior(ast_t type) {
405 | return std::get<3>(ast_list[type]);
406 | }
407 |
408 | void cast::unlink(ast_node *node) {
409 | if (node->parent) {
410 | auto &parent = node->parent;
411 | auto &ptr = node;
412 | auto i = parent->child;
413 | if (i->next == i) {
414 | assert(i->prev == i);
415 | assert(parent->child == node);
416 | parent->child = nullptr;
417 | node->parent = nullptr;
418 | node->prev = node->next = node;
419 | return;
420 | }
421 | if (ptr == parent->child) {
422 | parent->child = i->next;
423 | i->prev->next = parent->child;
424 | parent->child->prev = i->prev;
425 | node->parent = nullptr;
426 | node->prev = node->next = node;
427 | return;
428 | }
429 | do {
430 | if (i->next == ptr) {
431 | if (i->next->next == parent->child) {
432 | i->next = parent->child;
433 | parent->child->prev = i;
434 | } else {
435 | i->next->next->prev = i;
436 | i->next = i->next->next;
437 | }
438 | break;
439 | } else {
440 | i = i->next;
441 | }
442 | } while (i != parent->child);
443 | node->parent = nullptr;
444 | node->prev = node->next = node;
445 | }
446 | }
447 | }
448 |
--------------------------------------------------------------------------------
/cast.h:
--------------------------------------------------------------------------------
1 | //
2 | // Project: cliblisp
3 | // Created by bajdcc
4 | //
5 |
6 | #ifndef CLIBLISP_CAST_H
7 | #define CLIBLISP_CAST_H
8 |
9 | #include "types.h"
10 | #include "memory.h"
11 |
12 | #define AST_NODE_MEM (8 * 1024)
13 | #define AST_STR_MEM (8 * 1024)
14 |
15 | namespace clib {
16 |
17 | enum ast_t {
18 | ast_root,
19 | ast_collection,
20 | ast_keyword,
21 | ast_operator,
22 | ast_literal,
23 | ast_string,
24 | ast_char,
25 | ast_uchar,
26 | ast_short,
27 | ast_ushort,
28 | ast_int,
29 | ast_uint,
30 | ast_long,
31 | ast_ulong,
32 | ast_float,
33 | ast_double,
34 | ast_env,
35 | ast_sub,
36 | ast_lambda,
37 | ast_sexpr,
38 | ast_qexpr,
39 | };
40 |
41 | enum ast_to_t {
42 | to_parent,
43 | to_prev,
44 | to_next,
45 | to_child,
46 | };
47 |
48 | // 结点
49 | struct ast_node {
50 | // 类型
51 | uint32_t flag;
52 |
53 | union {
54 | #define DEFINE_NODE_DATA(t) LEX_T(t) _##t;
55 | DEFINE_NODE_DATA(char)
56 | DEFINE_NODE_DATA(uchar)
57 | DEFINE_NODE_DATA(short)
58 | DEFINE_NODE_DATA(ushort)
59 | DEFINE_NODE_DATA(int)
60 | DEFINE_NODE_DATA(uint)
61 | DEFINE_NODE_DATA(long)
62 | DEFINE_NODE_DATA(ulong)
63 | DEFINE_NODE_DATA(float)
64 | DEFINE_NODE_DATA(double)
65 | #undef DEFINE_NODE_DATA
66 | const char *_string;
67 | const char *_identifier;
68 | keyword_t _keyword;
69 | operator_t _op;
70 | coll_t _coll;
71 | } data; // 数据
72 |
73 | // 树型数据结构,广义表
74 | ast_node *parent; // 父亲
75 | ast_node *prev; // 左兄弟
76 | ast_node *next; // 右兄弟
77 | ast_node *child; // 最左儿子
78 | };
79 |
80 | class cast {
81 | public:
82 | cast();
83 | ~cast() = default;
84 |
85 | cast(const cast &) = delete;
86 | cast &operator=(const cast &) = delete;
87 |
88 | ast_node *get_root() const;
89 |
90 | ast_node *new_node(ast_t type);
91 | ast_node *new_child(ast_t type, bool step = true);
92 | ast_node *new_sibling(ast_t type, bool step = true);
93 |
94 | void remove(ast_node*);
95 |
96 | ast_node *add_child(ast_node*);
97 | static ast_node *set_child(ast_node*, ast_node*);
98 | static ast_node *set_sibling(ast_node*, ast_node*);
99 | static int children_size(ast_node*);
100 |
101 | void set_str(ast_node *node, const string_t &str);
102 | static std::string display_str(const char *str);
103 |
104 | void to(ast_to_t type);
105 |
106 | static void print(ast_node *node, int level, std::ostream &os);
107 | static void print2(ast_node *node, int level, std::ostream &os);
108 | static const string_t &ast_str(ast_t type);
109 | static bool ast_equal(ast_t type, lexer_t lex);
110 | static int ast_prior(ast_t type);
111 |
112 | static void unlink(ast_node *node);
113 |
114 | static ast_node *index(ast_node *node, int index);
115 | static ast_node *index(ast_node *node, const string_t &index);
116 |
117 | void reset();
118 | private:
119 | void init();
120 |
121 | private:
122 | memory_pool nodes; // 全局AST结点内存管理
123 | memory_pool strings; // 全局字符串管理
124 | ast_node *root; // 根结点
125 | ast_node *current; // 当前结点
126 | };
127 | }
128 |
129 | #endif //CLIBLISP_CAST_H
130 |
--------------------------------------------------------------------------------
/cexception.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Project: clibparser
3 | // Author: bajdcc
4 | //
5 | #include "cexception.h"
6 |
7 | namespace clib {
8 | cexception::cexception(const string_t &msg) noexcept : msg(msg) {
9 |
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/cexception.h:
--------------------------------------------------------------------------------
1 | //
2 | // Project: clibparser
3 | // Author: bajdcc
4 | //
5 | #ifndef CLIBVM_EXCEPTION_H
6 | #define CLIBVM_EXCEPTION_H
7 |
8 | #include
9 | #include "types.h"
10 |
11 | namespace clib {
12 | class cexception : public std::exception {
13 | public:
14 | explicit cexception(const string_t &msg) noexcept;
15 | ~cexception() = default;
16 |
17 | cexception(const cexception& e) = default;
18 | cexception &operator = (const cexception& e) = default;
19 |
20 | string_t msg;
21 | };
22 | }
23 |
24 | #endif
25 |
--------------------------------------------------------------------------------
/cgui.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Project: cliblisp
3 | // Created by bajdcc
4 | //
5 |
6 | #include
7 | #include "cgui.h"
8 |
9 | namespace clib {
10 |
11 | cgui::cgui() {
12 | auto cs = std::vector{
13 | R"(def `put-str (\ `s `(map ui-put (word s))))",
14 | R"(def `ui-put-delay (\ `(c t) `(begin (ui-put c) (conf `(record)) (conf (attr `wait t)))))",
15 | R"(def `put-str-delay (\ `(s t) `(map (\ `c `(ui-put-delay c t)) (word s))))",
16 | R"(def `(i S) 0 (word __logo__))",
17 | R"(conf `(ticks 1000))",
18 | R"(if (< i (len S)) `(begin (ui-put (index S i)) (def `i (+ i)) (conf `continue)) `(conf `break))",
19 | R"(conf `(ticks 5))",
20 | R"(put-str-delay "Hello world!\n" 0.4d)",
21 | R"(put-str "Welcome to cliblisp by bajdcc!\n")",
22 | };
23 | std::copy(cs.begin(), cs.end(), std::back_inserter(codes));
24 | }
25 |
26 | cgui &cgui::singleton() {
27 | static clib::cgui gui;
28 | return gui;
29 | }
30 |
31 | void cgui::draw() {
32 | for (int i = 0; i < ticks; ++i) {
33 | tick();
34 | }
35 | draw_text();
36 | }
37 |
38 | void cgui::draw_text() {
39 | glMatrixMode(GL_PROJECTION);
40 | glPushMatrix();
41 | glLoadIdentity();
42 | int w = glutGet(GLUT_WINDOW_WIDTH);
43 | int h = glutGet(GLUT_WINDOW_HEIGHT);
44 | int width = GUI_COLS * GUI_FONT_W;
45 | int height = GUI_ROWS * GUI_FONT_H;
46 | gluOrtho2D(0, w, h, 0);
47 | glMatrixMode(GL_MODELVIEW);
48 | glPushMatrix();
49 | glLoadIdentity();
50 |
51 | glColor3f(0.9f, 0.9f, 0.9f);
52 | int x = std::max((w - width) / 2, 0);
53 | int y = std::max((h - height) / 2, 0);
54 |
55 | for (auto i = 0; i < GUI_ROWS; ++i) {
56 | glRasterPos2i(x, y);
57 | for (auto j = 0; j < GUI_COLS; ++j) {
58 | glutBitmapCharacter(GUI_FONT, buffer[i][j]);
59 | }
60 | y += GUI_FONT_H;
61 | }
62 |
63 | glPopMatrix();
64 | glMatrixMode(GL_PROJECTION);
65 | glPopMatrix();
66 | }
67 |
68 | void cgui::tick() {
69 | auto c = 0;
70 | auto error = false;
71 | if (running) {
72 | try {
73 | auto val = vm.run(cycle, c);
74 | if (val != nullptr) {
75 | vm.gc();
76 | vm.dump();
77 | running = false;
78 | }
79 | } catch (const std::exception &e) {
80 | error = true;
81 | printf("RUNTIME ERROR: %s\n", e.what());
82 | vm.restore();
83 | vm.gc();
84 | running = false;
85 | }
86 | } else {
87 | if (!codes.empty()) {
88 | current_code = codes.front();
89 | codes.pop_front();
90 | try {
91 | auto root = p.parse(current_code);
92 | vm.prepare(root);
93 | auto val = vm.run(GUI_CYCLES, c);
94 | if (val != nullptr) {
95 | vm.gc();
96 | vm.dump();
97 | } else {
98 | running = true;
99 | }
100 | } catch (const std::exception &e) {
101 | error = true;
102 | printf("RUNTIME ERROR: %s\n", e.what());
103 | vm.restore();
104 | vm.gc();
105 | }
106 | }
107 | }
108 | if (continues > 0) {
109 | if (error) {
110 | continues = 0;
111 | current_code = "";
112 | } else {
113 | codes.push_front(current_code);
114 | }
115 | }
116 | }
117 |
118 | void cgui::put_char(char c) {
119 | if (c == '\n') {
120 | if (ptr_y == GUI_ROWS - 1) {
121 | new_line();
122 | } else {
123 | ptr_x = 0;
124 | ptr_y++;
125 | }
126 | } else if (c == '\b') {
127 | if (ptr_mx + ptr_my * GUI_COLS < ptr_x + ptr_y * GUI_COLS) {
128 | if (ptr_y == 0) {
129 | if (ptr_x != 0) {
130 | draw_char('\u0000');
131 | ptr_x--;
132 | }
133 | } else {
134 | if (ptr_x != 0) {
135 | draw_char('\u0000');
136 | ptr_x--;
137 | } else {
138 | draw_char('\u0000');
139 | ptr_x = GUI_COLS - 1;
140 | ptr_y--;
141 | }
142 | }
143 | }
144 | } else if (c == '\u0002') {
145 | ptr_x--;
146 | while (ptr_x >= 0) {
147 | draw_char('\u0000');
148 | ptr_x--;
149 | }
150 | ptr_x = 0;
151 | } else if (c == '\r') {
152 | ptr_x = 0;
153 | } else if (ptr_x == GUI_COLS - 1) {
154 | if (ptr_y == GUI_ROWS - 1) {
155 | draw_char(c);
156 | new_line();
157 | } else {
158 | draw_char(c);
159 | ptr_x = 0;
160 | ptr_y++;
161 | }
162 | } else {
163 | draw_char(c);
164 | ptr_x++;
165 | }
166 | }
167 |
168 | void cgui::new_line() {
169 | ptr_x = 0;
170 | for (int i = 0; i < GUI_ROWS - 1; ++i) {
171 | std::copy(buffer[i + 1].begin(), buffer[i + 1].end(), buffer[i].begin());
172 | }
173 | std::fill(buffer[GUI_ROWS - 1].begin(), buffer[GUI_ROWS - 1].end(), 0);
174 | }
175 |
176 | void cgui::draw_char(const char &c) {
177 | buffer[ptr_y][ptr_x] = c;
178 | }
179 |
180 | void cgui::set_cycle(int cycle) {
181 | this->cycle = cycle;
182 | }
183 |
184 | void cgui::set_ticks(int ticks) {
185 | this->ticks = ticks;
186 | }
187 |
188 | void cgui::record() {
189 | record_now = std::chrono::high_resolution_clock::now();
190 | }
191 |
192 | bool cgui::reach(const decimal &d) {
193 | auto now = std::chrono::high_resolution_clock::now();
194 | return std::chrono::duration_cast>(now - record_now).count() > d;
195 | }
196 |
197 | void cgui::control(int type) {
198 | if (type == 0) { // continue
199 | continues++;
200 | } else if (type == 1) { // break
201 | continues = 0;
202 | }
203 | }
204 | }
205 |
--------------------------------------------------------------------------------
/cgui.h:
--------------------------------------------------------------------------------
1 | //
2 | // Project: cliblisp
3 | // Created by bajdcc
4 | //
5 |
6 | #ifndef CLIBLISP_CGUI_H
7 | #define CLIBLISP_CGUI_H
8 |
9 | #include
10 | #include
11 | #include
12 | #include "types.h"
13 | #include "cparser.h"
14 | #include "cvm.h"
15 |
16 | #define GUI_FONT GLUT_BITMAP_9_BY_15
17 | #define GUI_FONT_W 9
18 | #define GUI_FONT_H 15
19 | #define GUI_ROWS 30
20 | #define GUI_COLS 84
21 | #define GUI_SIZE (GUI_ROWS * GUI_COLS)
22 | #define GUI_CYCLES 50
23 | #define GUI_TICKS 5
24 |
25 | namespace clib {
26 |
27 | class cgui {
28 | public:
29 | cgui();
30 | ~cgui() = default;
31 |
32 | cgui(const cgui &) = delete;
33 | cgui &operator=(const cgui &) = delete;
34 |
35 | void draw();
36 |
37 | void put_char(char c);
38 |
39 | void set_cycle(int cycle);
40 | void set_ticks(int ticks);
41 |
42 | void record();
43 | bool reach(const decimal &d);
44 | void control(int type);
45 |
46 | private:
47 | void tick();
48 | void draw_text();
49 |
50 | void new_line();
51 | inline void draw_char(const char &c);
52 |
53 | public:
54 | static cgui &singleton();
55 |
56 | private:
57 | std::array, GUI_ROWS> buffer;
58 | std::deque codes;
59 | cvm vm;
60 | cparser p;
61 | bool running{false};
62 | int cycle{ GUI_CYCLES };
63 | int ticks{ GUI_TICKS };
64 | int ptr_x{0};
65 | int ptr_y{0};
66 | int ptr_mx{0};
67 | int ptr_my{0};
68 | int continues{0};
69 | string_t current_code;
70 | std::chrono::system_clock::time_point record_now;
71 | };
72 | }
73 |
74 | #endif //CLIBLISP_CGUI_H
75 |
--------------------------------------------------------------------------------
/clexer.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Project: CMiniLang
3 | // Author: bajdcc
4 | //
5 |
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include "clexer.h"
11 |
12 | namespace clib {
13 |
14 | clexer::clexer(string_t str) : str(str) {
15 | length = (uint) str.length();
16 | assert(length > 0);
17 | initMap();
18 | }
19 |
20 | clexer::~clexer() = default;
21 |
22 | #define DEFINE_LEXER_GETTER(t) \
23 | LEX_T(t) clexer::get_##t() const \
24 | { \
25 | return bags._##t; \
26 | }
27 |
28 | DEFINE_LEXER_GETTER(char)
29 | DEFINE_LEXER_GETTER(uchar)
30 | DEFINE_LEXER_GETTER(short)
31 | DEFINE_LEXER_GETTER(ushort)
32 | DEFINE_LEXER_GETTER(int)
33 | DEFINE_LEXER_GETTER(uint)
34 | DEFINE_LEXER_GETTER(long)
35 | DEFINE_LEXER_GETTER(ulong)
36 | DEFINE_LEXER_GETTER(float)
37 | DEFINE_LEXER_GETTER(double)
38 | DEFINE_LEXER_GETTER(operator)
39 | DEFINE_LEXER_GETTER(keyword)
40 | DEFINE_LEXER_GETTER(identifier)
41 | DEFINE_LEXER_GETTER(string)
42 | DEFINE_LEXER_GETTER(comment)
43 | DEFINE_LEXER_GETTER(space)
44 | DEFINE_LEXER_GETTER(newline)
45 | DEFINE_LEXER_GETTER(error)
46 | #undef DEFINE_LEXER_GETTER
47 |
48 | #define DEFINE_LEXER_GETTER(t) \
49 | LEX_T(t) clexer::get_store_##t(int index) const \
50 | { \
51 | return storage._##t[index]; \
52 | }
53 |
54 | DEFINE_LEXER_GETTER(char)
55 | DEFINE_LEXER_GETTER(uchar)
56 | DEFINE_LEXER_GETTER(short)
57 | DEFINE_LEXER_GETTER(ushort)
58 | DEFINE_LEXER_GETTER(int)
59 | DEFINE_LEXER_GETTER(uint)
60 | DEFINE_LEXER_GETTER(long)
61 | DEFINE_LEXER_GETTER(ulong)
62 | DEFINE_LEXER_GETTER(float)
63 | DEFINE_LEXER_GETTER(double)
64 | DEFINE_LEXER_GETTER(identifier)
65 | DEFINE_LEXER_GETTER(string)
66 | #undef DEFINE_LEXER_GETTER
67 |
68 | // 记录错误
69 | lexer_t clexer::record_error(error_t error, uint skip) {
70 | err_record_t err{};
71 | err.line = line; // 起始行
72 | err.column = column; // 起始列
73 | err.start_idx = index; // 文本起始位置
74 | err.end_idx = index + skip; // 文本结束位置
75 | err.err = error; // 错误类型
76 | err.str = str.substr(err.start_idx, err.end_idx - err.start_idx); // 错误字符
77 | records.push_back(err);
78 | bags._error = error;
79 | move(skip); // 略过错误文本
80 | return l_error;
81 | }
82 |
83 | const clexer::err_record_t &clexer::recent_error() const {
84 | return records.back();
85 | }
86 |
87 | lexer_t clexer::next() {
88 | auto c = local();
89 | if (c == -1) {
90 | type = l_end;
91 | return l_end;
92 | }
93 | type = l_error;
94 | if (isalpha(c) || c == '_') { // 变量名或关键字
95 | type = next_alpha();
96 | } else if (isdigit(c) || (c == '-' && isdigit(local(1)))) { // 数字
97 | if (c == '-') {
98 | move(1);
99 | type = next_digit();
100 | if (type == l_error)
101 | return type;
102 | switch (type) {
103 | case l_char:
104 | bags._char = -bags._char;
105 | break;
106 | case l_short:
107 | bags._short = -bags._short;
108 | break;
109 | case l_int:
110 | bags._int = -bags._int;
111 | break;
112 | case l_long:
113 | bags._long = -bags._long;
114 | break;
115 | case l_float:
116 | bags._float = -bags._float;
117 | break;
118 | case l_double:
119 | bags._double = -bags._double;
120 | break;
121 | default:
122 | break;
123 | }
124 | return type;
125 | } else {
126 | type = next_digit();
127 | }
128 | } else if (isspace(c)) { // 空白字符
129 | type = next_space();
130 | } else if (c == '\'') { // 字符
131 | type = next_char();
132 | } else if (c == '\"') { // 字符串
133 | type = next_string();
134 | } else if (c == '/') { // 注释
135 | auto c2 = local(1);
136 | if (c2 == '/' || c2 == '*') { // 注释
137 | type = next_comment();
138 | } else {
139 | type = next_operator();
140 | bags._identifier = OP_STRING(bags._operator);
141 | type = l_identifier;
142 | }
143 | } else if (c == '`' || c == '(' || c == ')') { // 操作符
144 | type = next_operator();
145 | } else {
146 | type = next_operator();
147 | bags._identifier = OP_STRING(bags._operator);
148 | type = l_identifier;
149 | }
150 | return type;
151 | }
152 |
153 | lexer_t clexer::get_type() const {
154 | return type;
155 | }
156 |
157 | int clexer::get_line() const {
158 | return line;
159 | }
160 |
161 | int clexer::get_column() const {
162 | return column;
163 | }
164 |
165 | int clexer::get_last_line() const {
166 | return last_line;
167 | }
168 |
169 | int clexer::get_last_column() const {
170 | return last_column;
171 | }
172 |
173 | string_t clexer::current() const {
174 | switch (type) {
175 | case l_operator:
176 | return str.substr(last_index, index - last_index) + "\t[" + OPERATOR_STRING(bags._operator) + "]";
177 | default:
178 | break;
179 | }
180 | return str.substr(last_index, index - last_index);
181 | }
182 |
183 | bool clexer::is_type(lexer_t type) const {
184 | return get_type() == type;
185 | }
186 |
187 | bool clexer::is_keyword(keyword_t type) const {
188 | return get_type() == l_keyword && get_keyword() == type;
189 | }
190 |
191 | bool clexer::is_operator(operator_t type) const {
192 | return get_type() == l_operator && get_operator() == type;
193 | }
194 |
195 | bool clexer::is_operator(operator_t type1, operator_t type2) const {
196 | return get_type() == l_operator && (get_operator() == type1 || get_operator() == type2);
197 | }
198 |
199 | bool clexer::is_number() const {
200 | return get_type() >= l_char && get_type() <= l_double;
201 | }
202 |
203 | bool clexer::is_integer() const {
204 | return get_type() >= l_char && get_type() <= l_ulong;
205 | }
206 |
207 | LEX_T(int) clexer::get_integer() const {
208 | assert(is_integer());
209 | switch (type) {
210 | #define DEFINE_LEXER_CASE(t) case l_##t: return get_##t();
211 | DEFINE_LEXER_CASE(char)
212 | DEFINE_LEXER_CASE(uchar)
213 | DEFINE_LEXER_CASE(short)
214 | DEFINE_LEXER_CASE(ushort)
215 | DEFINE_LEXER_CASE(int)
216 | DEFINE_LEXER_CASE(uint)
217 | DEFINE_LEXER_CASE(long)
218 | DEFINE_LEXER_CASE(ulong)
219 | #undef DEFINE_LEXER_CASE
220 | default:
221 | break;
222 | }
223 | return 0;
224 | }
225 |
226 |
227 |
228 | void clexer::move(uint idx, int inc) {
229 | last_index = index;
230 | last_line = line;
231 | last_column = column;
232 | if (inc < 0) {
233 | column += idx;
234 | } else {
235 | column = 1;
236 | line += inc;
237 | }
238 | index += idx;
239 | }
240 |
241 | // 计算幂
242 | template
243 | static T calc_exp(T d, int e) {
244 | if (e == 0)
245 | return d;
246 | else if (e > 0)
247 | for (int i = 0; i < e; i++)
248 | d *= 10;
249 | else
250 | for (int i = e; i < 0; i++)
251 | d /= 10;
252 | return d;
253 | }
254 |
255 | // 转无符号类型
256 | static lexer_t unsigned_type(lexer_t t) {
257 | switch (t) {
258 | case l_char:
259 | return l_uchar;
260 | case l_short:
261 | return l_ushort;
262 | case l_int:
263 | return l_uint;
264 | case l_long:
265 | return l_ulong;
266 | default:
267 | return t;
268 | }
269 | }
270 |
271 | // 数字类型后缀
272 | static lexer_t digit_type_postfix(char c) {
273 | switch (c) {
274 | case 'C':
275 | case 'c':
276 | return l_char;
277 | case 'S':
278 | case 's':
279 | return l_short;
280 | case 'I':
281 | case 'i':
282 | return l_int;
283 | case 'L':
284 | case 'l':
285 | return l_long;
286 | case 'F':
287 | case 'f':
288 | return l_float;
289 | case 'D':
290 | case 'd':
291 | return l_double;
292 | default:
293 | return l_error;
294 | }
295 | }
296 |
297 | // 数字类型后缀(带无符号)
298 | lexer_t clexer::digit_type(lexer_t t, uint &i) {
299 | if (i == length) {
300 | return l_error;
301 | }
302 | if (str[i] == 'U' || str[i] == 'u') {
303 | if (++i == length) {
304 | return unsigned_type(t);
305 | }
306 | if ((t = unsigned_type(digit_type_postfix(str[i]))) == l_error) {
307 | return l_error;
308 | }
309 | ++i;
310 | return t;
311 | } else {
312 | if ((t = digit_type_postfix(str[i])) == l_error) {
313 | return l_error;
314 | }
315 | ++i;
316 | return t;
317 | }
318 | }
319 |
320 | bool clexer::digit_from_integer(lexer_t t, LEX_T(ulong) n) {
321 | switch (t) {
322 | #define DEFINE_LEXER_CONV_INTEGER(t) case l_##t: bags._##t = (LEX_T(t)) n; break;
323 | DEFINE_LEXER_CONV_INTEGER(char)
324 | DEFINE_LEXER_CONV_INTEGER(uchar)
325 | DEFINE_LEXER_CONV_INTEGER(short)
326 | DEFINE_LEXER_CONV_INTEGER(ushort)
327 | DEFINE_LEXER_CONV_INTEGER(int)
328 | DEFINE_LEXER_CONV_INTEGER(uint)
329 | DEFINE_LEXER_CONV_INTEGER(long)
330 | DEFINE_LEXER_CONV_INTEGER(ulong)
331 | DEFINE_LEXER_CONV_INTEGER(float)
332 | DEFINE_LEXER_CONV_INTEGER(double)
333 | #undef DEFINE_LEXER_CONV_INTEGER
334 | default:
335 | return false;
336 | }
337 | return true;
338 | }
339 |
340 | bool clexer::digit_from_double(lexer_t t, LEX_T(double) d) {
341 | switch (t) {
342 | #define DEFINE_LEXER_CONV_INTEGER(t) case l_##t: bags._##t = (LEX_T(t)) d; break;
343 | DEFINE_LEXER_CONV_INTEGER(char)
344 | DEFINE_LEXER_CONV_INTEGER(uchar)
345 | DEFINE_LEXER_CONV_INTEGER(short)
346 | DEFINE_LEXER_CONV_INTEGER(ushort)
347 | DEFINE_LEXER_CONV_INTEGER(int)
348 | DEFINE_LEXER_CONV_INTEGER(uint)
349 | DEFINE_LEXER_CONV_INTEGER(long)
350 | DEFINE_LEXER_CONV_INTEGER(ulong)
351 | DEFINE_LEXER_CONV_INTEGER(float)
352 | DEFINE_LEXER_CONV_INTEGER(double)
353 | #undef DEFINE_LEXER_CONV_INTEGER
354 | default:
355 | return false;
356 | }
357 | return true;
358 | }
359 |
360 | // 返回数字(依照目前识别的类型)
361 | lexer_t clexer::digit_return(lexer_t t, LEX_T(ulong) n, LEX_T(double) d, uint i) {
362 | if (t == l_int) {
363 | bags._int = (int) n;
364 | } else if (t == l_double) {
365 | bags._double = d;
366 | } else if (t == l_long) {
367 | bags._long = n;
368 | } else {
369 | bags._double = d;
370 | }
371 | move(i - index);
372 | return t;
373 | }
374 |
375 | // 十六进制字符转十进制
376 | static int hex2dec(char c) {
377 | if (c >= '0' && c <= '9') {
378 | return c - '0';
379 | } else if (c >= 'a' && c <= 'f') {
380 | return c - 'a' + 10;
381 | } else if (c >= 'A' && c <= 'F') {
382 | return c - 'A' + 10;
383 | } else {
384 | return -1;
385 | }
386 | }
387 |
388 | // 参考自:https://github.com/bajdcc/CEval/blob/master/CEval/CEval.cpp#L105
389 | lexer_t clexer::next_digit() {
390 | // 假定这里的数字规则是以0-9开头
391 | // 正则:^((?:\d+(\.)?\d*)(?:[eE][+-]?\d+)?)([uU])?([fFdCcSsDiIlL])?$
392 | // 正则:^0[Xx][0-9A-Fa-f]+$
393 | // 手动实现atof/atoi,并类型转换
394 | // 其他功能:int溢出转double,e科学记数法
395 | // 注意:这里不考虑负数,因为估计到歧义(可能是减法呢?)
396 | auto _type = l_int; // 默认是整型
397 | auto _postfix = l_none;
398 | auto i = index;
399 | auto n = 0ULL, _n = 0ULL;
400 | auto d = 0.0;
401 | if (local() == '0' && (local(1) == 'x' || local(1) == 'x')) {
402 | auto cc = 0;
403 | // 预先判断十六进制
404 | for (i += 2; i < length && ((cc = hex2dec(str[i])) != -1); i++) { // 解析整数部分
405 | if (_type == l_double) { // 小数加位,溢出后自动转换
406 | d *= 16.0;
407 | d += cc;
408 | } else { // 整数加位
409 | _n = n;
410 | n <<= 4;
411 | n += cc;
412 | }
413 | if (_type == l_int) { // 超过int范围,转为long
414 | if (n > INT_MAX)
415 | _type = l_long;
416 | } else if (_type == l_long) { // 超过long范围,转为double
417 | if (n >> 4 != _n) {
418 | d = (double) _n;
419 | d *= 16.0;
420 | d += cc;
421 | _type = l_double;
422 | }
423 | }
424 | }
425 | return digit_return(_type, n, d, i);
426 | }
427 | // 判断整数部分
428 | for (; i < length && (isdigit(str[i])); i++) { // 解析整数部分
429 | if (_type == l_double) { // 小数加位,溢出后自动转换
430 | d *= 10.0;
431 | d += str[i] - '0';
432 | } else { // 整数加位
433 | _n = n;
434 | n *= 10;
435 | n += str[i] - '0';
436 | }
437 | if (_type == l_int) { // 超过int范围,转为long
438 | if (n > INT_MAX) {
439 | _type = l_long;
440 | }
441 | } else if (_type == l_long) { // 超过long范围,转为double
442 | if (n / 10 != _n) {
443 | d = (double) _n;
444 | d *= 10.0;
445 | d += str[i] - '0';
446 | _type = l_double;
447 | }
448 | }
449 | }
450 | if (i == length) { // 只有整数部分
451 | return digit_return(_type, n, d, i);
452 | }
453 | if ((_postfix = digit_type(_type, i)) != l_error) { // 判断有无后缀
454 | move(i - index);
455 | if (_type == l_int)
456 | return digit_from_integer(_postfix, n) ? _postfix : _type;
457 | else
458 | return digit_from_double(_postfix, d) ? _postfix : _type;
459 | }
460 | if (str[i] == '.') { // 解析小数部分
461 | sint l = ++i;
462 | for (; i < length && (isdigit(str[i])); i++) {
463 | d *= 10.0;
464 | d += str[i] - '0';
465 | }
466 | l = i - l;
467 | if (l > 0) {
468 | d = (double) n + calc_exp(d, -l);
469 | _type = l_double;
470 | }
471 | }
472 | if (i == length) { // 只有整数部分和小数部分
473 | return digit_return(_type, n, d, i);
474 | }
475 | if ((_postfix = digit_type(_type, i)) != l_error) { // 判断有无后缀
476 | move(i - index);
477 | if (_type == l_int)
478 | return digit_from_integer(_postfix, n) ? _postfix : _type;
479 | else
480 | return digit_from_double(_postfix, d) ? _postfix : _type;
481 | }
482 | if (str[i] == 'e' || str[i] == 'E') { // 科学计数法强制转成double
483 | auto neg = false;
484 | auto e = 0;
485 | if (_type != l_double) {
486 | _type = l_double;
487 | d = (double) n;
488 | }
489 | if (++i == length) {
490 | return digit_return(_type, n, d, i);
491 | }
492 | if (!isdigit(str[i])) {
493 | if (str[i] == '-') { // 1e-1
494 | if (++i == length)
495 | return digit_return(_type, n, d, i);
496 | neg = true;
497 | } else if (str[i] == '+') { // 1e+1
498 | if (++i == length)
499 | return digit_return(_type, n, d, i);
500 | } else {
501 | return digit_return(_type, n, d, i);
502 | }
503 | }
504 | for (; i < length && (isdigit(str[i])); i++) { // 解析指数部分
505 | e *= 10;
506 | e += str[i] - '0';
507 | }
508 | d = calc_exp(d, neg ? -e : e);
509 | }
510 | if ((_postfix = digit_type(_type, i)) != l_error) { // 判断有无后缀
511 | move(i - index);
512 | if (_type == l_int)
513 | return digit_from_integer(_postfix, n) ? _postfix : _type;
514 | else
515 | return digit_from_double(_postfix, d) ? _postfix : _type;
516 | }
517 | return digit_return(_type, n, d, i);
518 | }
519 |
520 | lexer_t clexer::next_alpha() {
521 | sint i;
522 | for (i = index + 1; i < length && (isalnum(str[i]) || bitIdOp.test(str[i])); i++);
523 | auto s = str.substr(index, i - index);
524 | /*auto kw = mapKeyword.find(s);
525 | if (kw != mapKeyword.end()) { // 哈希查找关键字
526 | bags._keyword = kw->second;
527 | move(s.length());
528 | return l_keyword;
529 | }*/
530 | // 普通变量名
531 | bags._identifier = s;
532 | move(s.length());
533 | return l_identifier;
534 | }
535 |
536 | lexer_t clexer::next_space() {
537 | uint i, j;
538 | switch (str[index]) {
539 | case ' ':
540 | case '\t':
541 | // 查找连续的空格或Tab
542 | for (i = index + 1; i < length && (str[i] == ' ' || str[i] == '\t'); i++);
543 | bags._space = i - index;
544 | move(bags._space);
545 | return l_space;
546 | case '\r':
547 | case '\n':
548 | // 查找连续的'\n'或'\r\n'
549 | for (i = index, j = 0; i < length &&
550 | (str[i] == '\r' || (str[i] == '\n' ? ++j > 0 : false)); i++);
551 | bags._newline = j;
552 | move(i - index, bags._newline);
553 | return l_newline;
554 | }
555 | assert(!"space not match"); // cannot reach
556 | move(1);
557 | return l_error;
558 | }
559 |
560 | // 单字符转义
561 | static int escape(char c) {
562 | if (c >= '0' && c <= '9') {
563 | return c - '0';
564 | } else {
565 | switch (c) { // like \r, \n, ...
566 | case 'b':
567 | return '\b';
568 | case 'f':
569 | return '\f';
570 | case 'n':
571 | return '\n';
572 | case 'r':
573 | return '\r';
574 | case 't':
575 | return '\t';
576 | case 'v':
577 | return '\v';
578 | case '\'':
579 | return '\'';
580 | case '\"':
581 | return '\"';
582 | case '\\':
583 | return '\\';
584 | default:
585 | return -1;
586 | }
587 | }
588 | }
589 |
590 | lexer_t clexer::next_char() {
591 | // 提前判定 '\'' 及 '\\' 这两种特殊情况(向前看)
592 | if (local(1) == '\\' && local(3) == '\'') {
593 | auto c = local(2);
594 | auto esc = escape((char) c); // '\?'
595 | if (esc != -1) {
596 | bags._char = (char) esc;
597 | move(4);
598 | return l_char;
599 | }
600 | return record_error(e_invalid_char, 4);
601 | }
602 | uint i;
603 | // 寻找 '\'' 的右边界(限定)
604 | for (i = 1; index + i < length && str[index + i] != '\'' && i <= 4; i++);
605 | if (i == 1) { // ''
606 | return record_error(e_invalid_char, i + 1);
607 | }
608 | auto j = index + i;
609 | i++;
610 | if (j < length && str[j] == '\'') {
611 | if (str[index + 1] == '\\') {
612 | if (i == 3) { // '\'
613 | return record_error(e_invalid_char, i);
614 | }
615 | // i 不可能为 4
616 | if (i == 5) { // '\x?'
617 | if (str[index + 1] == '\\' && str[index + 2] == 'x') {
618 | auto esc = hex2dec(str[index + 3]);
619 | if (esc != -1) {
620 | bags._char = (char) esc;
621 | move(i);
622 | return l_char;
623 | }
624 | }
625 | return record_error(e_invalid_char, i);
626 | }
627 | // '\x??'
628 | if (str[index + 1] == '\\' && str[index + 2] == 'x') {
629 | auto esc = hex2dec(str[index + 3]); // '\x?_'
630 | if (esc != -1) {
631 | bags._char = (char) esc;
632 | esc = hex2dec(str[index + 4]); // '\x_?'
633 | if (esc != -1) {
634 | bags._char *= 0x10;
635 | bags._char += (char) esc;
636 | move(i);
637 | return l_char;
638 | }
639 | }
640 | }
641 | return record_error(e_invalid_char, i);
642 | } else if (i == 3) { // '?'
643 | bags._char = str[index + 1];
644 | move((uint) i);
645 | return l_char;
646 | }
647 | }
648 | return record_error(e_invalid_char, 1);
649 | }
650 |
651 | lexer_t clexer::next_string() {
652 | auto i = index;
653 | auto prev = str[i];
654 | // 寻找非'\"'的第一个'"'
655 | for (i++; i < length && (prev == '\\' || (str[i]) != '"'); prev = str[i++]);
656 | auto j = i;
657 | if (j == length) { // " EOF
658 | return record_error(e_invalid_string, i - index);
659 | }
660 | std::stringstream ss;
661 | auto status = 1; // 状态机
662 | char c = 0;
663 | for (i = index + 1; i < j;) {
664 | switch (status) {
665 | case 1: { // 处理字符
666 | if (str[i] == '\\') {
667 | status = 2;
668 | } else { // '?'
669 | ss << str[i];
670 | }
671 | i++;
672 | }
673 | break;
674 | case 2: { // 处理转义
675 | if (str[i] == 'x') {
676 | status = 3;
677 | i++;
678 | } else {
679 | auto esc = escape(str[i]);
680 | if (esc != -1) {
681 | ss << (char) esc;
682 | i++;
683 | status = 1;
684 | } else {
685 | status = 0; // 失败
686 | }
687 | }
688 | }
689 | break;
690 | case 3: { // 处理 '\x??' 前一位十六进制数字
691 | auto esc = hex2dec(str[i]);
692 | if (esc != -1) {
693 | c = (char) esc;
694 | status = 4;
695 | i++;
696 | } else {
697 | status = 0; // 失败
698 | }
699 | }
700 | break;
701 | case 4: { // 处理 '\x??' 后一位十六进制数字
702 | auto esc = hex2dec(str[i]);
703 | if (esc != -1) {
704 | c *= 10;
705 | c += (char) esc;
706 | ss << c;
707 | status = 1;
708 | i++;
709 | } else {
710 | ss << c;
711 | status = 1;
712 | }
713 | }
714 | break;
715 | default: // 失败
716 | bags._string = str.substr(index + 1, j - index - 1);
717 | move(j - index + 1);
718 | return l_string;
719 | }
720 | }
721 | if (status == 1) { // 为初态/终态
722 | bags._string = ss.str();
723 | move(j - index + 1);
724 | return l_string;
725 | }
726 | bags._string = str.substr(index + 1, j - index - 1);
727 | move(j - index + 1);
728 | return l_string;
729 | }
730 |
731 | lexer_t clexer::next_comment() {
732 | sint i = index;
733 | if (str[++i] == '/') { // '//'
734 | // 寻找第一个换行符
735 | for (++i; i < length && (str[i] != '\n' && str[i] != '\r'); i++);
736 | bags._comment = str.substr(index + 2, i - index - 2);
737 | move(i - index);
738 | return l_comment;
739 | } else { // '/* */'
740 | // 寻找第一个 '*/'
741 | char prev = 0;
742 | auto newline = 0;
743 | for (++i; i < length && (prev != '*' || (str[i]) != '/');
744 | prev = str[i++], prev == '\n' ? ++newline : 0);
745 | i++;
746 | bags._comment = str.substr(index + 2, i - index - 1);
747 | move(i - index, newline); // 检查换行
748 | return l_comment;
749 | }
750 | }
751 |
752 | lexer_t clexer::next_operator() {
753 | auto c = local();
754 | if (bitOp[0].test((uint) c)) { // 操作符第一个char判断非法
755 | auto c2 = local(1);
756 | if (c2 != -1 && bitOp[1].test((uint) c2)) { // 操作符第二个char判断非法,否则解析单字符操作符
757 | auto c3 = local(2);
758 | if (c3 != -1 && (c3 == '=' || c3 == '.')) { // 操作符第三个char判断非法,否则解析双字符操作符
759 | // 三字符操作符
760 | auto p = op__start;
761 | if (c3 == '=') { // 手动判断
762 | if (c == c2) {
763 | if (c == '<') {
764 | p = op_left_shift_assign;
765 | } else if (c == '>') {
766 | p = op_left_shift_assign;
767 | }
768 | }
769 | } else {
770 | if (c == '.' && c2 == '.') {
771 | p = op_ellipsis;
772 | }
773 | }
774 | if (p == op__start) {
775 | auto p2 = sinOp[c];
776 | if (p2 != 0) {
777 | bags._operator = p2;
778 | move(1);
779 | return l_operator;
780 | }
781 | return record_error(e_invalid_operator, 3);
782 | } else {
783 | bags._operator = (operator_t) p;
784 | move(3);
785 | return l_operator;
786 | }
787 | } else {
788 | // 双字符操作符
789 | if (c2 == '=') {
790 | auto p = sinOp[c];
791 | if (p == 0 || p > op_logical_not) {
792 | // 单字符操作符
793 | auto p = sinOp[c];
794 | bags._operator = (operator_t) p;
795 | move(1);
796 | return l_operator;
797 | }
798 | bags._operator = (operator_t) (p + 1); // 从 '?' 到 '?='
799 | move(2);
800 | return l_operator;
801 | }
802 | auto p = op__start;
803 | if (c == c2) { // 相同位的双字符操作符
804 | switch (c2) {
805 | case '+':
806 | p = op_plus_plus;
807 | break;
808 | case '-':
809 | p = op_minus_minus;
810 | break;
811 | case '&':
812 | p = op_logical_and;
813 | break;
814 | case '|':
815 | p = op_logical_or;
816 | break;
817 | case '<':
818 | p = op_left_shift;
819 | break;
820 | case '>':
821 | p = op_right_shift;
822 | break;
823 | default:
824 | break;
825 | }
826 | } else if (c == '-' && c2 == '>') { // '->'
827 | p = op_pointer;
828 | }
829 | if (p == op__start) { // 双字符非法,则回退到单字符
830 | auto p = sinOp[c];
831 | if (p == 0) {
832 | return record_error(e_invalid_operator, 1);
833 | }
834 | bags._operator = (operator_t) p;
835 | move(1);
836 | return l_operator;
837 | } else {
838 | bags._operator = (operator_t) p;
839 | move(2);
840 | return l_operator;
841 | }
842 | }
843 | } else {
844 | // 单字符操作符
845 | auto p = sinOp[c];
846 | if (p == 0) {
847 | return record_error(e_invalid_operator, 1);
848 | }
849 | bags._operator = (operator_t) p;
850 | move(1);
851 | return l_operator;
852 | }
853 | } else {
854 | return record_error(e_invalid_operator, 1);
855 | }
856 | }
857 |
858 | int clexer::local() {
859 | if (index < length)
860 | return str[index];
861 | return -1;
862 | }
863 |
864 | int clexer::local(int offset) {
865 | if (index + offset < length)
866 | return str[index + offset];
867 | return -1;
868 | }
869 |
870 | void clexer::initMap() {
871 | // Keyword
872 | for (auto i = k__start + 1; i < k__end; i++) {
873 | mapKeyword[KEYWORD_STRING((keyword_t) i)] = (keyword_t) i;
874 | }
875 | auto len = 0;
876 | for (auto i = op__start + 1; i < op__end; i++) {
877 | const auto &op = OP_STRING((operator_t) i);
878 | len = op.length();
879 | if (len == 1) {
880 | sinOp[op[0]] = (operator_t) i; // 操作符第一位char映射
881 | }
882 | len = std::min(len, 2);
883 | for (auto j = 0; j < len; j++) {
884 | bitOp[j].set((uint) op[j]); // 操作符第一/二位char二进制查找
885 | }
886 | }
887 | string_t enable_char = "_-?";
888 | for (auto &c : enable_char) {
889 | bitIdOp.set((uint) c);
890 | }
891 | }
892 |
893 | void clexer::reset() {
894 | index = 0;
895 | last_index = 0;
896 |
897 | type = l_none;
898 | line = 1;
899 | column = 1;
900 | last_line = 1;
901 | last_column = 1;
902 |
903 | records.clear();
904 | }
905 | }
906 |
--------------------------------------------------------------------------------
/clexer.h:
--------------------------------------------------------------------------------
1 | //
2 | // Project: CMiniLang
3 | // Author: bajdcc
4 | //
5 |
6 | #ifndef CMINILANG_LEXER_H
7 | #define CMINILANG_LEXER_H
8 |
9 | #include
10 | #include
11 | #include
12 | #include "types.h"
13 |
14 | namespace clib {
15 |
16 | // 词法分析
17 | class clexer {
18 | public:
19 | explicit clexer(string_t str);
20 | ~clexer();
21 |
22 | clexer(const clexer &) = delete;
23 | clexer &operator=(const clexer &) = delete;
24 |
25 | // 外部接口
26 | #define DEFINE_LEXER_GETTER(t) LEX_T(t) get_##t() const;
27 | DEFINE_LEXER_GETTER(char)
28 | DEFINE_LEXER_GETTER(uchar)
29 | DEFINE_LEXER_GETTER(short)
30 | DEFINE_LEXER_GETTER(ushort)
31 | DEFINE_LEXER_GETTER(int)
32 | DEFINE_LEXER_GETTER(uint)
33 | DEFINE_LEXER_GETTER(long)
34 | DEFINE_LEXER_GETTER(ulong)
35 | DEFINE_LEXER_GETTER(float)
36 | DEFINE_LEXER_GETTER(double)
37 | DEFINE_LEXER_GETTER(operator)
38 | DEFINE_LEXER_GETTER(keyword)
39 | DEFINE_LEXER_GETTER(identifier)
40 | DEFINE_LEXER_GETTER(string)
41 | DEFINE_LEXER_GETTER(comment)
42 | DEFINE_LEXER_GETTER(space)
43 | DEFINE_LEXER_GETTER(newline)
44 | DEFINE_LEXER_GETTER(error)
45 | #undef DEFINE_LEXER_GETTER
46 | #define DEFINE_LEXER_GETTER(t) LEX_T(t) get_store_##t(int) const;
47 | DEFINE_LEXER_GETTER(char)
48 | DEFINE_LEXER_GETTER(uchar)
49 | DEFINE_LEXER_GETTER(short)
50 | DEFINE_LEXER_GETTER(ushort)
51 | DEFINE_LEXER_GETTER(int)
52 | DEFINE_LEXER_GETTER(uint)
53 | DEFINE_LEXER_GETTER(long)
54 | DEFINE_LEXER_GETTER(ulong)
55 | DEFINE_LEXER_GETTER(float)
56 | DEFINE_LEXER_GETTER(double)
57 | DEFINE_LEXER_GETTER(identifier)
58 | DEFINE_LEXER_GETTER(string)
59 | #undef DEFINE_LEXER_GETTER
60 |
61 | public:
62 | struct err_record_t {
63 | int line, column;
64 | uint start_idx, end_idx;
65 | error_t err;
66 | string_t str;
67 | };
68 |
69 | private:
70 | std::vector records;
71 |
72 | lexer_t record_error(error_t error, uint skip);
73 |
74 | public:
75 | lexer_t next();
76 |
77 | lexer_t get_type() const;
78 | int get_line() const;
79 | int get_column() const;
80 | int get_last_line() const;
81 | int get_last_column() const;
82 | string_t current() const;
83 |
84 | const err_record_t& recent_error() const;
85 |
86 | lexer_t digit_type(lexer_t t, uint &i);
87 | bool digit_from_integer(lexer_t t, LEX_T(ulong) n);
88 | bool digit_from_double(lexer_t t, LEX_T(double) n);
89 | lexer_t digit_return(lexer_t t, LEX_T(ulong) n, LEX_T(double) d, uint i);
90 |
91 | private:
92 | void move(uint idx, int inc = -1);
93 |
94 | // 内部解析
95 | lexer_t next_digit();
96 | lexer_t next_alpha();
97 | lexer_t next_space();
98 | lexer_t next_char();
99 | lexer_t next_string();
100 | lexer_t next_comment();
101 | lexer_t next_operator();
102 |
103 | int local();
104 | int local(int offset);
105 |
106 | public:
107 | bool is_type(lexer_t) const;
108 | bool is_keyword(keyword_t) const;
109 | bool is_operator(operator_t) const;
110 | bool is_operator(operator_t, operator_t) const;
111 | bool is_number() const;
112 | bool is_integer() const;
113 |
114 | LEX_T(int) get_integer() const;
115 |
116 | void reset();
117 |
118 | private:
119 | string_t str;
120 | uint index{0};
121 | uint last_index{0};
122 | uint length{0};
123 |
124 | lexer_t type{l_none};
125 | uint line{1};
126 | uint column{1};
127 | uint last_line{1};
128 | uint last_column{1};
129 |
130 | struct {
131 | #define DEFINE_LEXER_GETTER(t) LEX_T(t) _##t;
132 | DEFINE_LEXER_GETTER(char)
133 | DEFINE_LEXER_GETTER(uchar)
134 | DEFINE_LEXER_GETTER(short)
135 | DEFINE_LEXER_GETTER(ushort)
136 | DEFINE_LEXER_GETTER(int)
137 | DEFINE_LEXER_GETTER(uint)
138 | DEFINE_LEXER_GETTER(long)
139 | DEFINE_LEXER_GETTER(ulong)
140 | DEFINE_LEXER_GETTER(float)
141 | DEFINE_LEXER_GETTER(double)
142 | DEFINE_LEXER_GETTER(operator)
143 | DEFINE_LEXER_GETTER(keyword)
144 | DEFINE_LEXER_GETTER(identifier)
145 | DEFINE_LEXER_GETTER(string)
146 | DEFINE_LEXER_GETTER(comment)
147 | DEFINE_LEXER_GETTER(space)
148 | DEFINE_LEXER_GETTER(newline)
149 | DEFINE_LEXER_GETTER(error)
150 | #undef DEFINE_LEXER_GETTER
151 | } bags;
152 |
153 | struct {
154 | #define DEFINE_LEXER_STORAGE(t) std::vector _##t;
155 | DEFINE_LEXER_STORAGE(char)
156 | DEFINE_LEXER_STORAGE(uchar)
157 | DEFINE_LEXER_STORAGE(short)
158 | DEFINE_LEXER_STORAGE(ushort)
159 | DEFINE_LEXER_STORAGE(int)
160 | DEFINE_LEXER_STORAGE(uint)
161 | DEFINE_LEXER_STORAGE(long)
162 | DEFINE_LEXER_STORAGE(ulong)
163 | DEFINE_LEXER_STORAGE(float)
164 | DEFINE_LEXER_STORAGE(double)
165 | DEFINE_LEXER_STORAGE(operator)
166 | DEFINE_LEXER_STORAGE(keyword)
167 | DEFINE_LEXER_STORAGE(identifier)
168 | DEFINE_LEXER_STORAGE(string)
169 | DEFINE_LEXER_STORAGE(comment)
170 | DEFINE_LEXER_STORAGE(space)
171 | DEFINE_LEXER_STORAGE(newline)
172 | DEFINE_LEXER_STORAGE(error)
173 | #undef DEFINE_LEXER_STORAGE
174 | } storage;
175 |
176 | // 字典
177 | map_t mapKeyword;
178 | std::bitset<128> bitOp[2];
179 | std::array sinOp;
180 | std::bitset<128> bitIdOp;
181 |
182 | void initMap();
183 | };
184 | }
185 |
186 | #endif //CMINILANG_LEXER_H
--------------------------------------------------------------------------------
/cparser.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Project: CMiniLang
3 | // Author: bajdcc
4 | //
5 |
6 | #include
7 | #include
8 | #include
9 | #include "cexception.h"
10 | #include "cparser.h"
11 | #include "clexer.h"
12 | #include "cast.h"
13 | #include "cunit.h"
14 |
15 | #define TRACE_PARSING 0
16 | #define DUMP_PDA 0
17 | #define DEBUG_AST 0
18 | #define CHECK_AST 0
19 |
20 | namespace clib {
21 |
22 | ast_node *cparser::parse(const string_t &str) {
23 | lexer = std::make_unique(str.empty() ? str : (str[0] == '`' ? str : ("(" + str + ")")));
24 | // 清空词法分析结果
25 | lexer->reset();
26 | // 清空AST
27 | ast.reset();
28 | // 产生式
29 | if (unit.get_pda().empty())
30 | gen();
31 | // 语法分析(LR)
32 | program();
33 | //cast::print2(ast.get_root(), 0, std::cout);
34 | simplify(ast.get_root());
35 | return ast.get_root();
36 | }
37 |
38 | ast_node *cparser::root() const {
39 | return ast.get_root();
40 | }
41 |
42 | void cparser::reset() {
43 | ast_cache_index = 0;
44 | state_stack.clear();
45 | ast_stack.clear();
46 | ast_cache.clear();
47 | ast_coll_cache.clear();
48 | ast_reduce_cache.clear();
49 | state_stack.push_back(0);
50 | }
51 |
52 | void cparser::next() {
53 | lexer_t token;
54 | do {
55 | token = lexer->next();
56 | if (token == l_error) {
57 | auto err = lexer->recent_error();
58 | printf("[%04d:%03d] %-12s - %s\n",
59 | err.line,
60 | err.column,
61 | ERROR_STRING(err.err).c_str(),
62 | err.str.c_str());
63 | }
64 | } while (token == l_newline || token == l_space || token == l_error || token == l_comment);
65 | #if 0
66 | if (token != l_end) {
67 | qDebug("[%04d:%03d] %-12s - %s\n",
68 | lexer->get_last_line(),
69 | lexer->get_last_column(),
70 | LEX_STRING(lexer->get_type()).c_str(),
71 | lexer->current().c_str());
72 | }
73 | #endif
74 | }
75 |
76 | ast_node *cparser::simplify(ast_node *node) {
77 | if (node == nullptr)
78 | return nullptr;
79 | auto type = (ast_t) node->flag;
80 | switch (type) {
81 | case ast_root: // 根结点,全局声明
82 | return node->child = simplify(node->child);
83 | case ast_collection: {
84 | switch (node->data._coll) {
85 | case c_program:
86 | if (node->child->data._coll == c_list && node->child->child == node->child->child->next)
87 | return simplify(node->child->child);
88 | return simplify(node->child);
89 | case c_list: {
90 | auto i = node->child;
91 | if (i) {
92 | std::vector children;
93 | children.push_back(i);
94 | i = i->next;
95 | while (i != node->child) {
96 | children.push_back(i);
97 | i = i->next;
98 | }
99 | node->child = nullptr;
100 | for (auto &child: children) {
101 | cast::set_child(node, simplify(child->child));
102 | }
103 | }
104 | node->flag = ast_sexpr;
105 | return node;
106 | }
107 | case c_sexpr:
108 | node->child = simplify(node->child);
109 | if (node->child && node->child == node->child->next) {
110 | node->child->flag = ast_sexpr;
111 | return node->child;
112 | }
113 | node->flag = ast_sexpr;
114 | return node;
115 | case c_qexpr: {
116 | auto q = node->child->child;
117 | if (q->data._coll == c_sexpr) {
118 | auto t = simplify(q);
119 | t->flag = ast_qexpr;
120 | return t;
121 | }
122 | q = simplify(node->child);
123 | node->child = q;
124 | node->flag = ast_qexpr;
125 | return node;
126 | }
127 | case c_object:
128 | return simplify(node->child);
129 | default:
130 | break;
131 | }
132 | }
133 | break;
134 | case ast_string:
135 | case ast_literal:
136 | case ast_char:
137 | case ast_uchar:
138 | case ast_short:
139 | case ast_ushort:
140 | case ast_int:
141 | case ast_uint:
142 | case ast_long:
143 | case ast_ulong:
144 | case ast_float:
145 | case ast_double:
146 | return node;
147 | default:
148 | break;
149 | }
150 | error("invalid val type");
151 | return nullptr;
152 | }
153 |
154 | void cparser::gen() {
155 | #define DEF_OP(name) auto &_##name##_ = unit.token(op_##name)
156 | DEF_OP(lparan);
157 | DEF_OP(rparan);
158 | DEF_OP(quote);
159 | #undef DEF_OP
160 | #define DEF_LEX(name, real) auto &real = unit.token(l_##name)
161 | DEF_LEX(char, Char);
162 | DEF_LEX(uchar, UnsignedChar);
163 | DEF_LEX(short, Short);
164 | DEF_LEX(ushort, UnsignedShort);
165 | DEF_LEX(int, Integer);
166 | DEF_LEX(uint, UnsignedInteger);
167 | DEF_LEX(long, Long);
168 | DEF_LEX(ulong, UnsignedLong);
169 | DEF_LEX(float, Float);
170 | DEF_LEX(double, Double);
171 | DEF_LEX(identifier, Identifier);
172 | DEF_LEX(string, String);
173 | DEF_LEX(comment, Comment);
174 | DEF_LEX(space, Space);
175 | DEF_LEX(newline, Newline);
176 | #undef DEF_LEX
177 | #define DEF_RULE(name) auto &name = unit.rule(#name, c_##name)
178 | DEF_RULE(program);
179 | DEF_RULE(list);
180 | DEF_RULE(sexpr);
181 | DEF_RULE(qexpr);
182 | DEF_RULE(object);
183 | #undef DEF_RULE
184 | program = list | sexpr;
185 | list = *list + object;
186 | sexpr = ~_lparan_ + *list + ~_rparan_;
187 | qexpr = ~_quote_ + object;
188 | object = Char | UnsignedChar | Short | UnsignedShort | Integer | UnsignedInteger |
189 | Long | UnsignedLong | Float | Double | String | Identifier | sexpr | qexpr;
190 | unit.gen(&program);
191 | #if DUMP_PDA
192 | unit.dump(std::cout);
193 | #endif
194 | }
195 |
196 | void check_ast(ast_node *node) {
197 | #if CHECK_AST
198 | if (node->child) {
199 | auto &c = node->child;
200 | auto i = c;
201 | assert(i->parent == node);
202 | check_ast(i);
203 | if (i->next != i) {
204 | assert(i->prev->next == i);
205 | assert(i->next->prev == i);
206 | i = i->next;
207 | do {
208 | assert(i->parent == node);
209 | assert(i->prev->next == i);
210 | assert(i->next->prev == i);
211 | check_ast(i);
212 | i = i->next;
213 | } while (i != c);
214 | } else {
215 | assert(i->prev == i);
216 | }
217 | }
218 | #endif
219 | }
220 |
221 | void cparser::program() {
222 | reset();
223 | next();
224 | auto &pdas = unit.get_pda();
225 | auto root = ast.new_node(ast_collection);
226 | root->data._coll = pdas[0].coll;
227 | cast::set_child(ast.get_root(), root);
228 | ast_stack.push_back(root);
229 | std::vector jumps;
230 | std::vector trans_ids;
231 | backtrace_t bk_tmp;
232 | bk_tmp.lexer_index = 0;
233 | bk_tmp.state_stack = state_stack;
234 | bk_tmp.ast_stack = ast_stack;
235 | bk_tmp.current_state = 0;
236 | bk_tmp.coll_index = 0;
237 | bk_tmp.reduce_index = 0;
238 | bk_tmp.direction = b_next;
239 | std::vector bks;
240 | bks.push_back(bk_tmp);
241 | auto trans_id = -1;
242 | while (!bks.empty()) {
243 | auto bk = &bks.back();
244 | if (bk->direction == b_success || bk->direction == b_fail) {
245 | break;
246 | }
247 | if (bk->direction == b_fallback) {
248 | if (bk->trans_ids.empty()) {
249 | if (bks.size() > 1) {
250 | bks.pop_back();
251 | bks.back().direction = b_error;
252 | bk = &bks.back();
253 | } else {
254 | bk->direction = b_fail;
255 | continue;
256 | }
257 | }
258 | }
259 | ast_cache_index = bk->lexer_index;
260 | state_stack = bk->state_stack;
261 | ast_stack = bk->ast_stack;
262 | auto state = bk->current_state;
263 | if (bk->direction != b_error)
264 | for (;;) {
265 | auto ¤t_state = pdas[state];
266 | if (lexer->is_type(l_end)) {
267 | if (current_state.final) {
268 | if (state_stack.empty()) {
269 | bk->direction = b_success;
270 | break;
271 | }
272 | }
273 | }
274 | auto &trans = current_state.trans;
275 | if (trans_id == -1 && !bk->trans_ids.empty()) {
276 | trans_id = bk->trans_ids.back() & ((1 << 16) - 1);
277 | bk->trans_ids.pop_back();
278 | } else {
279 | trans_ids.clear();
280 | for (auto i = 0; i < trans.size(); ++i) {
281 | auto &cs = trans[i];
282 | if (valid_trans(cs))
283 | trans_ids.push_back(i | pda_edge_priority(cs.type) << 16);
284 | }
285 | if (!trans_ids.empty()) {
286 | std::sort(trans_ids.begin(), trans_ids.end(), std::greater<>());
287 | if (trans_ids.size() > 1) {
288 | bk_tmp.lexer_index = ast_cache_index;
289 | bk_tmp.state_stack = state_stack;
290 | bk_tmp.ast_stack = ast_stack;
291 | bk_tmp.current_state = state;
292 | bk_tmp.trans_ids = trans_ids;
293 | bk_tmp.coll_index = ast_coll_cache.size();
294 | bk_tmp.reduce_index = ast_reduce_cache.size();
295 | bk_tmp.direction = b_next;
296 | #if DEBUG_AST
297 | for (auto i = 0; i < bks.size(); ++i) {
298 | auto &_bk = bks[i];
299 | printf("[DEBUG] Branch old: i=%d, LI=%d, SS=%d, AS=%d, S=%d, TS=%d, CI=%d, RI=%d, TK=%d\n",
300 | i, _bk.lexer_index, _bk.state_stack.size(),
301 | _bk.ast_stack.size(), _bk.current_state, _bk.trans_ids.size(),
302 | _bk.coll_index, _bk.reduce_index, _bk.ast_ids.size());
303 | }
304 | #endif
305 | bks.push_back(bk_tmp);
306 | bk = &bks.back();
307 | #if DEBUG_AST
308 | printf("[DEBUG] Branch new: BS=%d, LI=%d, SS=%d, AS=%d, S=%d, TS=%d, CI=%d, RI=%d, TK=%d\n",
309 | bks.size(), bk_tmp.lexer_index, bk_tmp.state_stack.size(),
310 | bk_tmp.ast_stack.size(), bk_tmp.current_state, bk_tmp.trans_ids.size(),
311 | bk_tmp.coll_index, bk_tmp.reduce_index, bk_tmp.ast_ids.size());
312 | #endif
313 | bk->direction = b_next;
314 | break;
315 | } else {
316 | trans_id = trans_ids.back() & ((1 << 16) - 1);
317 | trans_ids.pop_back();
318 | }
319 | } else {
320 | #if TRACE_PARSING
321 | std::cout << "parsing error: " << current_state.label << std::endl;
322 | #endif
323 | bk->direction = b_error;
324 | break;
325 | }
326 | }
327 | auto &t = trans[trans_id];
328 | if (t.type == e_finish) {
329 | if (!lexer->is_type(l_end)) {
330 | #if TRACE_PARSING
331 | std::cout << "parsing redundant code: " << current_state.label << std::endl;
332 | #endif
333 | bk->direction = b_fail;
334 | break;
335 | }
336 | }
337 | auto jump = trans[trans_id].jump;
338 | #if TRACE_PARSING
339 | printf("State: %3d => To: %3d -- Action: %-10s -- Rule: %s\n",
340 | state, jump, pda_edge_str(t.type).c_str(), current_state.label.c_str());
341 | #endif
342 | do_trans(state, *bk, trans[trans_id]);
343 | state = jump;
344 | }
345 | if (bk->direction == b_error) {
346 | #if DEBUG_AST
347 | for (auto i = 0; i < bks.size(); ++i) {
348 | auto &_bk = bks[i];
349 | printf("[DEBUG] Backtrace failed: i=%d, LI=%d, SS=%d, AS=%d, S=%d, TS=%d, CI=%d, RI=%d, TK=%d\n",
350 | i, _bk.lexer_index, _bk.state_stack.size(),
351 | _bk.ast_stack.size(), _bk.current_state, _bk.trans_ids.size(),
352 | _bk.coll_index, _bk.reduce_index, _bk.ast_ids.size());
353 | }
354 | #endif
355 | for (auto &i : bk->ast_ids) {
356 | auto &token = ast_cache[i];
357 | check_ast(token);
358 | #if DEBUG_AST
359 | printf("[DEBUG] Backtrace failed, unlink token: %p, PB=%p\n", token, token->parent);
360 | #endif
361 | cast::unlink(token);
362 | check_ast(token);
363 | }
364 | auto size = ast_reduce_cache.size();
365 | for (auto i = size; i > bk->reduce_index; --i) {
366 | auto &coll = ast_reduce_cache[i - 1];
367 | check_ast(coll);
368 | #if DEBUG_AST
369 | printf("[DEBUG] Backtrace failed, unlink: %p, PB=%p, NE=%d, CB=%d\n",
370 | coll, coll->parent, cast::children_size(coll->parent), cast::children_size(coll));
371 | #endif
372 | cast::unlink(coll);
373 | check_ast(coll);
374 | }
375 | ast_reduce_cache.erase(ast_reduce_cache.begin() + bk->reduce_index, ast_reduce_cache.end());
376 | size = ast_coll_cache.size();
377 | for (auto i = size; i > bk->coll_index; --i) {
378 | auto &coll = ast_coll_cache[i - 1];
379 | assert(coll->flag == ast_collection);
380 | check_ast(coll);
381 | #if DEBUG_AST
382 | printf("[DEBUG] Backtrace failed, delete coll: %p, PB=%p, CB=%p, NE=%d, CS=%d\n",
383 | coll, coll->parent, coll->child,
384 | cast::children_size(coll->parent), cast::children_size(coll));
385 | #endif
386 | cast::unlink(coll);
387 | check_ast(coll);
388 | ast.remove(coll);
389 | }
390 | ast_coll_cache.erase(ast_coll_cache.begin() + bk->coll_index, ast_coll_cache.end());
391 | bk->direction = b_fallback;
392 | }
393 | trans_id = -1;
394 | }
395 | }
396 |
397 | ast_node *cparser::terminal() {
398 | if (lexer->is_type(l_end)) { // 结尾
399 | error("unexpected token EOF of expression");
400 | }
401 | if (ast_cache_index < ast_cache.size()) {
402 | return ast_cache[ast_cache_index++];
403 | }
404 | if (lexer->is_type(l_operator)) {
405 | auto node = ast.new_node(ast_operator);
406 | node->data._op = lexer->get_operator();
407 | match_operator(node->data._op);
408 | ast_cache.push_back(node);
409 | ast_cache_index++;
410 | return node;
411 | }
412 | if (lexer->is_type(l_keyword)) {
413 | auto node = ast.new_node(ast_keyword);
414 | node->data._keyword = lexer->get_keyword();
415 | match_keyword(node->data._keyword);
416 | ast_cache.push_back(node);
417 | ast_cache_index++;
418 | return node;
419 | }
420 | if (lexer->is_type(l_identifier)) {
421 | auto node = ast.new_node(ast_literal);
422 | ast.set_str(node, lexer->get_identifier());
423 | match_type(l_identifier);
424 | ast_cache.push_back(node);
425 | ast_cache_index++;
426 | return node;
427 | }
428 | if (lexer->is_number()) {
429 | ast_node *node = nullptr;
430 | auto type = lexer->get_type();
431 | switch (type) {
432 | #define DEFINE_NODE_INT(t) \
433 | case l_##t: \
434 | node = ast.new_node(ast_##t); \
435 | node->data._##t = lexer->get_##t(); \
436 | break;
437 | DEFINE_NODE_INT(char)
438 | DEFINE_NODE_INT(uchar)
439 | DEFINE_NODE_INT(short)
440 | DEFINE_NODE_INT(ushort)
441 | DEFINE_NODE_INT(int)
442 | DEFINE_NODE_INT(uint)
443 | DEFINE_NODE_INT(long)
444 | DEFINE_NODE_INT(ulong)
445 | DEFINE_NODE_INT(float)
446 | DEFINE_NODE_INT(double)
447 | #undef DEFINE_NODE_INT
448 | default:
449 | error("invalid number");
450 | break;
451 | }
452 | match_number();
453 | ast_cache.push_back(node);
454 | ast_cache_index++;
455 | return node;
456 | }
457 | if (lexer->is_type(l_string)) {
458 | std::stringstream ss;
459 | ss << lexer->get_string();
460 | #if 0
461 | printf("[%04d:%03d] String> %04X '%s'\n", clexer->get_line(), clexer->get_column(), idx, clexer->get_string().c_str());
462 | #endif
463 | match_type(l_string);
464 |
465 | while (lexer->is_type(l_string)) {
466 | ss << lexer->get_string();
467 | #if 0
468 | printf("[%04d:%03d] String> %04X '%s'\n", clexer->get_line(), clexer->get_column(), idx, clexer->get_string().c_str());
469 | #endif
470 | match_type(l_string);
471 | }
472 | auto node = ast.new_node(ast_string);
473 | ast.set_str(node, ss.str());
474 | ast_cache.push_back(node);
475 | ast_cache_index++;
476 | return node;
477 | }
478 | error("invalid type");
479 | return nullptr;
480 | }
481 |
482 | bool cparser::valid_trans(const pda_trans &trans) const {
483 | auto &la = trans.LA;
484 | if (!la.empty()) {
485 | auto success = false;
486 | for (auto &_la : la) {
487 | if (LA(_la)) {
488 | success = true;
489 | break;
490 | }
491 | }
492 | if (!success)
493 | return false;
494 | }
495 | switch (trans.type) {
496 | case e_shift:
497 | break;
498 | case e_pass:
499 | break;
500 | case e_move:
501 | break;
502 | case e_left_recursion:
503 | break;
504 | case e_reduce: {
505 | if (state_stack.empty())
506 | return false;
507 | if (trans.status != state_stack.back())
508 | return false;
509 | }
510 | break;
511 | case e_finish:
512 | break;
513 | default:
514 | break;
515 | }
516 | return true;
517 | }
518 |
519 | void cparser::do_trans(int state, backtrace_t &bk, const pda_trans &trans) {
520 | switch (trans.type) {
521 | case e_shift: {
522 | state_stack.push_back(state);
523 | auto new_node = ast.new_node(ast_collection);
524 | auto &pdas = unit.get_pda();
525 | new_node->data._coll = pdas[trans.jump].coll;
526 | #if DEBUG_AST
527 | printf("[DEBUG] Shift: top=%p, new=%p, CS=%d\n", ast_stack.back(), new_node,
528 | cast::children_size(ast_stack.back()));
529 | #endif
530 | ast_coll_cache.push_back(new_node);
531 | ast_stack.push_back(new_node);
532 | }
533 | break;
534 | case e_pass: {
535 | bk.ast_ids.insert(ast_cache_index);
536 | terminal();
537 | #if CHECK_AST
538 | check_ast(t);
539 | #endif
540 | #if DEBUG_AST
541 | printf("[DEBUG] Move: parent=%p, child=%p, CS=%d\n", ast_stack.back(), t,
542 | cast::children_size(ast_stack.back()));
543 | #endif
544 | }
545 | break;
546 | case e_move: {
547 | bk.ast_ids.insert(ast_cache_index);
548 | auto t = terminal();
549 | #if CHECK_AST
550 | check_ast(t);
551 | #endif
552 | #if DEBUG_AST
553 | printf("[DEBUG] Move: parent=%p, child=%p, CS=%d\n", ast_stack.back(), t,
554 | cast::children_size(ast_stack.back()));
555 | #endif
556 | cast::set_child(ast_stack.back(), t);
557 | }
558 | break;
559 | case e_left_recursion:
560 | break;
561 | case e_reduce: {
562 | auto new_ast = ast_stack.back();
563 | check_ast(new_ast);
564 | if (new_ast->flag != ast_collection) {
565 | bk.ast_ids.insert(ast_cache_index);
566 | }
567 | state_stack.pop_back();
568 | ast_stack.pop_back();
569 | ast_reduce_cache.push_back(new_ast);
570 | #if DEBUG_AST
571 | printf("[DEBUG] Reduce: parent=%p, child=%p, CS=%d, AS=%d, RI=%d\n",
572 | ast_stack.back(), new_ast, cast::children_size(ast_stack.back()),
573 | ast_stack.size(), ast_reduce_cache.size());
574 | #endif
575 | cast::set_child(ast_stack.back(), new_ast);
576 | check_ast(ast_stack.back());
577 | }
578 | break;
579 | case e_finish:
580 | state_stack.pop_back();
581 | break;
582 | }
583 | }
584 |
585 | bool cparser::LA(struct unit *u) const {
586 | if (u->t != u_token)
587 | return false;
588 | auto token = to_token(u);
589 | if (ast_cache_index < ast_cache.size()) {
590 | auto &cache = ast_cache[ast_cache_index];
591 | if (token->type == l_keyword)
592 | return cache->flag == ast_keyword && cache->data._keyword == token->value.keyword;
593 | if (token->type == l_operator)
594 | return cache->flag == ast_operator && cache->data._op == token->value.op;
595 | return cast::ast_equal((ast_t) cache->flag, token->type);
596 | }
597 | if (token->type == l_keyword)
598 | return lexer->is_keyword(token->value.keyword);
599 | if (token->type == l_operator)
600 | return lexer->is_operator(token->value.op);
601 | return lexer->is_type(token->type);
602 | }
603 |
604 | void cparser::expect(bool flag, const string_t &info) {
605 | if (!flag) {
606 | error(info);
607 | }
608 | }
609 |
610 | void cparser::match_keyword(keyword_t type) {
611 | expect(lexer->is_keyword(type), string_t("expect keyword ") + KEYWORD_STRING(type));
612 | next();
613 | }
614 |
615 | void cparser::match_operator(operator_t type) {
616 | expect(lexer->is_operator(type), string_t("expect operator " + OPERATOR_STRING(type)));
617 | next();
618 | }
619 |
620 | void cparser::match_type(lexer_t type) {
621 | expect(lexer->is_type(type), string_t("expect type " + LEX_STRING(type)));
622 | next();
623 | }
624 |
625 | void cparser::match_number() {
626 | expect(lexer->is_number(), "expect number");
627 | next();
628 | }
629 |
630 | void cparser::match_integer() {
631 | expect(lexer->is_integer(), "expect integer");
632 | next();
633 | }
634 |
635 | void cparser::error(const string_t &info) {
636 | std::stringstream ss;
637 | ss << '[' << std::setfill('0') << std::setw(4) << lexer->get_line();
638 | ss << ':' << std::setfill('0') << std::setw(3) << lexer->get_column();
639 | ss << ']' << " PARSER ERROR: " << info;
640 | throw cexception(ss.str());
641 | }
642 | }
643 |
--------------------------------------------------------------------------------
/cparser.h:
--------------------------------------------------------------------------------
1 | //
2 | // Project: CMiniLang
3 | // Author: bajdcc
4 | //
5 | #ifndef CMINILANG_PARSER_H
6 | #define CMINILANG_PARSER_H
7 |
8 | #include
9 | #include "types.h"
10 | #include "clexer.h"
11 | #include "cast.h"
12 | #include "cunit.h"
13 |
14 | namespace clib {
15 |
16 | enum backtrace_direction {
17 | b_success,
18 | b_next,
19 | b_error,
20 | b_fail,
21 | b_fallback,
22 | };
23 |
24 | struct backtrace_t {
25 | int lexer_index;
26 | std::vector state_stack;
27 | std::vector ast_stack;
28 | int current_state;
29 | int coll_index;
30 | int reduce_index;
31 | std::vector trans_ids;
32 | std::unordered_set ast_ids;
33 | backtrace_direction direction;
34 | };
35 |
36 | class cparser {
37 | public:
38 | cparser() = default;
39 | ~cparser() = default;
40 |
41 | cparser(const cparser &) = delete;
42 | cparser &operator=(const cparser &) = delete;
43 |
44 | ast_node *parse(const string_t &str);
45 | ast_node *root() const;
46 |
47 | private:
48 | void next();
49 | void reset();
50 |
51 | void gen();
52 | void program();
53 | ast_node *terminal();
54 |
55 | ast_node *simplify(ast_node *node);
56 |
57 | bool valid_trans(const pda_trans &trans) const;
58 | void do_trans(int state, backtrace_t &bk, const pda_trans &trans);
59 | bool LA(unit *u) const;
60 |
61 | private:
62 | void expect(bool, const string_t &);
63 | void match_keyword(keyword_t);
64 | void match_operator(operator_t);
65 | void match_type(lexer_t);
66 | void match_number();
67 | void match_integer();
68 |
69 | void error(const string_t &);
70 |
71 | private:
72 | std::vector state_stack;
73 | std::vector ast_stack;
74 | std::vector ast_cache;
75 | int ast_cache_index{0};
76 | std::vector ast_coll_cache;
77 | std::vector ast_reduce_cache;
78 |
79 | private:
80 | cunit unit;
81 | std::unique_ptr lexer;
82 | cast ast;
83 | };
84 | }
85 | #endif //CMINILANG_PARSER_H
--------------------------------------------------------------------------------
/csub.h:
--------------------------------------------------------------------------------
1 | //
2 | // Project: cliblisp
3 | // Created by bajdcc
4 | //
5 |
6 | #ifndef CLIBLISP_CSUB_H
7 | #define CLIBLISP_CSUB_H
8 |
9 | #include "cvm.h"
10 |
11 | namespace clib {
12 | struct cval;
13 | class cvm;
14 |
15 | class builtins {
16 | public:
17 | static status_t add(cvm *vm, cframe *frame);
18 | static status_t sub(cvm *vm, cframe *frame);
19 | static status_t mul(cvm *vm, cframe *frame);
20 | static status_t div(cvm *vm, cframe *frame);
21 | static status_t quote(cvm *vm, cframe *frame);
22 | static status_t list(cvm *vm, cframe *frame);
23 | static status_t car(cvm *vm, cframe *frame);
24 | static status_t cdr(cvm *vm, cframe *frame);
25 | static status_t cons(cvm *vm, cframe *frame);
26 |
27 | static status_t def(cvm *vm, cframe *frame);
28 | static status_t lambda(cvm *vm, cframe *frame);
29 | static status_t call_lambda(cvm *vm, cframe *frame);
30 | static status_t call_eval(cvm *vm, cframe *frame);
31 |
32 | static status_t lt(cvm *vm, cframe *frame);
33 | static status_t le(cvm *vm, cframe *frame);
34 | static status_t gt(cvm *vm, cframe *frame);
35 | static status_t ge(cvm *vm, cframe *frame);
36 | static status_t eq(cvm *vm, cframe *frame);
37 | static status_t ne(cvm *vm, cframe *frame);
38 |
39 | static status_t begin(cvm *vm, cframe *frame);
40 | static status_t _if(cvm *vm, cframe *frame);
41 |
42 | static status_t len(cvm *vm, cframe *frame);
43 | static status_t index(cvm *vm, cframe *frame);
44 | static status_t append(cvm *vm, cframe *frame);
45 |
46 | static status_t is_null(cvm *vm, cframe *frame);
47 | static status_t type(cvm *vm, cframe *frame);
48 | static status_t str(cvm *vm, cframe *frame);
49 | static status_t word(cvm *vm, cframe *frame);
50 |
51 | static status_t print(cvm *vm, cframe *frame);
52 | static status_t conf(cvm *vm, cframe *frame);
53 | static status_t attr(cvm *vm, cframe *frame);
54 |
55 | // GUI
56 | static status_t ui_put(cvm *vm, cframe *frame);
57 | };
58 | }
59 |
60 | #endif //CLIBLISP_CSUB_H
61 |
--------------------------------------------------------------------------------
/cunit.h:
--------------------------------------------------------------------------------
1 | //
2 | // Project: clibparser
3 | // Created by CC
4 | //
5 |
6 | #ifndef CLIBPARSER_CUNIT_H
7 | #define CLIBPARSER_CUNIT_H
8 |
9 | #include
10 | #include
11 | #include
12 | #include