├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cast.cpp ├── cast.h ├── cexception.cpp ├── cexception.h ├── cgui.cpp ├── cgui.h ├── clexer.cpp ├── clexer.h ├── cparser.cpp ├── cparser.h ├── csub.cpp ├── csub.h ├── cunit.cpp ├── cunit.h ├── cvm.cpp ├── cvm.h ├── main.cpp ├── memory.h ├── memory_gc.h ├── screenshots └── 1.png ├── test.cpp ├── types.cpp └── types.h /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | cmake-build-* 3 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.12) 2 | project(cliblisp) 3 | 4 | set(CMAKE_CXX_STANDARD 14) 5 | 6 | link_libraries(freeglut opengl32 glu32) 7 | 8 | add_executable(cliblisp 9 | main.cpp 10 | memory.h 11 | memory_gc.h 12 | types.h 13 | types.cpp 14 | clexer.h 15 | clexer.cpp 16 | cparser.h 17 | cparser.cpp 18 | cunit.h 19 | cunit.cpp 20 | cexception.h 21 | cexception.cpp 22 | cast.h 23 | cast.cpp 24 | cvm.cpp 25 | cvm.h 26 | csub.cpp 27 | csub.h 28 | cgui.cpp 29 | cgui.h) 30 | 31 | add_executable(cliblisp-test 32 | test.cpp 33 | memory.h 34 | memory_gc.h 35 | types.h 36 | types.cpp 37 | clexer.h 38 | clexer.cpp 39 | cparser.h 40 | cparser.cpp 41 | cunit.h 42 | cunit.cpp 43 | cexception.h 44 | cexception.cpp 45 | cast.h 46 | cast.cpp 47 | cvm.cpp 48 | cvm.h 49 | csub.cpp 50 | csub.h 51 | cgui.cpp 52 | cgui.h) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cliblisp(C++ 简易LISP解释器 + 通用LR语法分析) 2 | 3 | 借鉴[CMiniLang](https://github.com/bajdcc/CMiniLang)的部分代码。 4 | 5 | 主要借鉴了CMiniLang的类型系统、词法分析、语法分析、AST、内存管理等代码(均为原创)。 6 | 7 | 事实证明CMiniLang的框架还是非常经典耐用的(再次强调)。 8 | 9 | **语法分析采用LR分析。项目见:[clibparser](https://github.com/bajdcc/clibparser)。** 10 | 11 | - 文法书写方式:以C++重载为基础的Parser Generator。 12 | - 识别方式:**以下推自动机为基础,向看查看一个字符、带回溯的LR分析**。 13 | - 内存管理:自制内存池。 14 | 15 | 本说明完善中,**末尾有测试用例**。 16 | 17 | 注:经[Qlib2d](https://github.com/bajdcc/Qlib2d)项目测试,本项目于**x64**环境下也可编译成功。 18 | 19 | ## 截图 20 | 21 | ![image](https://raw.githubusercontent.com/bajdcc/cliblisp/master/screenshots/1.png) 22 | 23 | ## 文章 24 | 25 | - [【Lisp系列】开篇](http://zhuanlan.zhihu.com/p/45897626) 26 | - [【Lisp系列】实现四则运算](http://zhuanlan.zhihu.com/p/46723048) 27 | - [【Lisp系列】实现GC](http://zhuanlan.zhihu.com/p/46993463) 28 | - [【Lisp系列】实现Lambda](http://zhuanlan.zhihu.com/p/47309037) 29 | - [【Lisp系列】大功告成](http://zhuanlan.zhihu.com/p/47569910) 30 | - [【Lisp系列】手动递归](http://zhuanlan.zhihu.com/p/47869195) 31 | 32 | - [【Parser系列】实现LR分析——开篇](https://zhuanlan.zhihu.com/p/52478414) 33 | - [【Parser系列】实现LR分析——生成AST](https://zhuanlan.zhihu.com/p/52528516) 34 | - [【Parser系列】实现LR分析——支持C语言文法](https://zhuanlan.zhihu.com/p/52812144) 35 | - [【Parser系列】实现LR分析——完成编译器前端!](https://zhuanlan.zhihu.com/p/53070412) 36 | 37 | ## 功能 38 | 39 | 当前完成了四则运算和常用函数,采用解释器求值。 40 | 41 | **运行时所有对象采用标识回收GC,采用不可变值,传递拷贝。** 42 | 43 | 已实现:引用,变量,函数,四则,比较,递归,闭包,if,测试用例。 44 | 45 | 已实现**Y-combinator**,见测试用例#47-#49,由于递归运算会大量消耗内存,因此必要时需更改cvm.h中的**VM_MEM**宏的值为更大值。 46 | 47 | **改进:将eval调用转化为手动调归,使得递归可以人工控制,后续可能将出错机制从throw方式转变为手动调归跳出方式。测试:除大数溢出外,其余均通过。** 48 | 49 | - [x] 词法分析 50 | - [x] 语法分析 51 | - [x] 内存管理 52 | - [x] 序列化 53 | - [x] 识别数字 54 | - [x] 识别S-表达式 55 | - [x] 识别Q-表达式 56 | - [x] GC 57 | - [x] 运行时环境 58 | - [x] 异常恢复 59 | - [x] 简单的内建四则运算 60 | - [x] Subroutine和Symbol 61 | - [x] nil 62 | - [x] 常用内建函数 63 | - [ ] 输入 64 | - [x] 输出 65 | - [x] 字符串处理 66 | - [x] 识别变量,设置变量 67 | - [x] 识别函数Lambda 68 | - [x] 支持递归 69 | - [ ] 完善控制流:if 70 | - [ ] 更多测试用例 71 | - [ ] 添加更多功能 72 | 73 | 内建函数: 74 | 75 | - 四则运算 76 | - 比较运算 77 | - lambda 78 | - eval 79 | - quote 80 | - list 81 | - cons 82 | - car 83 | - cdr 84 | - def 85 | - if 86 | - len 87 | - append 88 | 89 | ## 调试信息 90 | 91 | - cval结点内存申请情况 92 | - GC释放情况 93 | - 内存池结点情况 94 | - GC中的栈对象引用树 95 | 96 | 生成NGA图,去EPSILON化,生成PDA表,生成AST。 97 | 98 | 以下为下推自动机的识别过程(**太长,略**),如需查看,请修改cparser.cpp中的: 99 | 100 | ```cpp 101 | #define TRACE_PARSING 0 102 | #define DUMP_PDA 0 103 | #define DEBUG_AST 0 104 | #define CHECK_AST 0 105 | ``` 106 | 107 | 将值改为1即可。 108 | 109 | ## 使用 110 | 111 | ```cpp 112 | int main(int argc, char *argv[]) { 113 | clib::cvm vm; 114 | std::string input; 115 | while (true) { 116 | std::cout << "lisp> "; 117 | std::getline(std::cin, input); 118 | if (input == "exit") 119 | break; 120 | if (input.empty()) 121 | continue; 122 | try { 123 | vm.save(); 124 | clib::cparser p(input); 125 | auto root = p.parse(); 126 | //clib::cast::print(root, 0, std::cout); 127 | auto val = vm.run(root); 128 | clib::cvm::print(val, std::cout); 129 | std::cout << std::endl; 130 | vm.gc(); 131 | } catch (const std::exception &e) { 132 | printf("RUNTIME ERROR: %s\n", e.what()); 133 | vm.restore(); 134 | vm.gc(); 135 | } 136 | } 137 | return 0; 138 | } 139 | ``` 140 | 141 | ## 例子 142 | 143 | ```lisp 144 | lisp> + 1 2 145 | 3 146 | lisp> * 1 2 3 4 5 6 147 | 720 148 | lisp> - 8 4 2 9 8 149 | -15 150 | lisp> + a 5 151 | + a 5 152 | COMPILER ERROR: unsupported calc op 153 | RUNTIME ERROR: std::exception 154 | lisp> + "Hello" " " "world!" 155 | "Hello world!" 156 | lisp> eval 5 157 | 5 158 | lisp> eval `(+ 1 2) 159 | 3 160 | lisp> eval (+ 1 2) 161 | 3 162 | lisp> `a 163 | `a 164 | lisp> `(a b c) 165 | `(a b c) 166 | ``` 167 | 168 | ### Subroutine 169 | 170 | ```lisp 171 | lisp> + "Project: " __project__ ", author: " __author__ 172 | "Project: cliblisp, author: bajdcc" 173 | lisp> + 174 | 175 | ``` 176 | 177 | ### List 178 | 179 | ```lisp 180 | lisp> list 181 | 182 | lisp> car (list 1 2 3) 183 | 1 184 | lisp> cdr (list 1 2 3) 185 | `(2 3) 186 | lisp> (eval (car (list + - * /))) 1 1 187 | 2 188 | ``` 189 | 190 | ### Builtin 191 | 192 | ```lisp 193 | lisp> def `(a b c d) 1 2 3 4 194 | nil 195 | lisp> + a b c d 196 | 10 197 | ``` 198 | 199 | ### Lambda 200 | 201 | ```lisp 202 | lisp> def `a (\ `(x y) `(+ x y)) 203 | nil 204 | lisp> a 205 | 206 | lisp> a 1 2 3 207 | 6 208 | ``` 209 | 210 | ### comparison 211 | 212 | ```lisp 213 | lisp> def `a (\ `(x y) `(+ x y)) 214 | nil 215 | lisp> == (a 1 2) (a 2 1) 216 | 1 217 | lisp> < (a 1 2) (a 1 1) 218 | 0 219 | ``` 220 | 221 | ### recursion 222 | 223 | ```lisp 224 | lisp> def `sum (\ `(n) `(if (> n 0) `(+ n (sum (- n 1))) `0)) 225 | nil 226 | lisp> sum 100 227 | 5050 228 | lisp> sum (- 0 5) 229 | 0 230 | lisp> def `len (\ `l `(if (== l nil) `0 `(+ 1 (len (cdr l))))) 231 | nil 232 | lisp> len (list 1 2 3 ) 233 | 3 234 | ``` 235 | 236 | ## 测试用例 237 | 238 | 在目录下的test.cpp中。 239 | 240 | ```cpp 241 | TEST #1> [PASSED] (+ 1 2) => 3 242 | TEST #2> [PASSED] (* 1 2 3 4 5 6) => 720 243 | TEST #3> [PASSED] (- 8 4 2 9 8) => -15 244 | TEST #4> [PASSED] (+ "Hello" " " "world!") => "Hello world!" 245 | TEST #5> [PASSED] (eval 5) => 5 246 | TEST #6> [PASSED] (eval `(+ 1 2)) => 3 247 | TEST #7> [PASSED] (eval (+ 1 2)) => 3 248 | TEST #8> [PASSED] `a => `a 249 | TEST #9> [PASSED] `(a b c) => `(a b c) 250 | TEST #10> [PASSED] (+ "Project: " __project__ ", author: " __author__) => "Project: cliblisp, author: bajdcc" 251 | TEST #11> [PASSED] + => 252 | TEST #12> [PASSED] (quote (testing 1 2 -3.14e+159)) => `(testing 1 2 -3.14e+159) 253 | TEST #13> [PASSED] (+ 2 2) => 4 254 | TEST #14> [PASSED] (+ (* 2 100) (* 1 10)) => 210 255 | TEST #15> [PASSED] (if (> 6 5) `(+ 1 1) `(+ 2 2)) => 2 256 | TEST #16> [PASSED] (if (< 6 5) `(+ 1 1) `(+ 2 2)) => 4 257 | TEST #17> [PASSED] (def `x 3) => 3 258 | TEST #18> [PASSED] x => 3 259 | TEST #19> [PASSED] (+ x x) => 6 260 | TEST #20> [PASSED] (begin (def `x 1) (def `x (+ x 1)) (+ x 1)) => 3 261 | TEST #21> [PASSED] ((\ `x `(+ x x)) 5) => 10 262 | TEST #22> [PASSED] (def `twice (\ `x `(* 2 x))) => 263 | TEST #23> [PASSED] (twice 5) => 10 264 | TEST #24> [PASSED] (def `compose (\ `(f g) `(\ `x `(f (g x))))) => 265 | TEST #25> [PASSED] ((compose list twice) 5) => `10 266 | TEST #26> [PASSED] (def `repeat (\ `f `(compose f f))) => 267 | TEST #27> [PASSED] ((repeat twice) 5) => 20 268 | TEST #28> [PASSED] ((repeat (repeat twice)) 5) => 80 269 | TEST #29> [PASSED] (def `fact (\ `n `(if (<= n 1) `1 `(* n (fact (- n 1)))))) => 271 | TEST #30> [PASSED] (fact 3) => 6 272 | TEST #31> [ERROR ] (fact 50) => 0 REQUIRE: 30414093201713378043612608166064768844377641568960512000000000000 273 | TEST #32> [PASSED] (fact 12) => 479001600 274 | TEST #33> [PASSED] (def `abs (\ `n `((if (> n 0) `+ `-) 0 n))) => n 0) `+ `-) 0 n)> 275 | TEST #34> [PASSED] (abs -3) => 3 276 | TEST #35> [PASSED] (list (abs -3) (abs 0) (abs 3)) => `(3 0 3) 277 | TEST #36> [PASSED] (def `combine (\ `f `(\ `(x y) `(if (null? x) `nil `(f (list (car x) (car y)) ((combine f) (cdr x) (c 278 | dr y))))))) => 279 | TEST #37> [PASSED] (def `zip (combine cons)) => 281 | TEST #38> [PASSED] (zip (list 1 2 3 4) (list 5 6 7 8)) => `(`(1 5) `(2 6) `(3 7) `(4 8)) 282 | TEST #39> [PASSED] (def `riff-shuffle (\ `deck `(begin (def `take (\ `(n seq) `(if (<= n 0) `nil `(cons (car seq) (take 283 | (- n 1) (cdr seq)))))) (def `drop (\ `(n seq) `(if (<= n 0) `seq `(drop (- n 1) (cdr seq))))) (def `mid (\ `seq `(/ (len 284 | seq) 2))) ((combine append) (take (mid deck) deck) (drop (mid deck) deck))))) => 288 | TEST #40> [PASSED] (riff-shuffle (list 1 2 3 4 5 6 7 8)) => `(1 5 2 6 3 7 4 8) 289 | TEST #41> [PASSED] ((repeat riff-shuffle) (list 1 2 3 4 5 6 7 8)) => `(1 3 5 7 2 4 6 8) 290 | TEST #42> [PASSED] (riff-shuffle (riff-shuffle (riff-shuffle (list 1 2 3 4 5 6 7 8)))) => `(1 2 3 4 5 6 7 8) 291 | TEST #43> [PASSED] (def `apply (\ `(item L) `(eval (cons item L)))) => 292 | TEST #44> [PASSED] (apply + `(1 2 3)) => 6 293 | TEST #45> [PASSED] (def `sum (\ `n `(if (< n 2) `1 `(+ n (sum (- n 1)))))) => 295 | TEST #46> [PASSED] (sum 10) => 55 296 | TEST #47> [PASSED] (def `Y (\ `f `((\ `self `(f (\ `x `((self self) x)))) (\ `self `(f (\ `x `((self self) x))))))) => 297 | 298 | TEST #48> [PASSED] (def `Y_fib (\ `f `(\ `n `(if (<= n 2) `1 `(+ (f (- n 1)) (f (- n 2))))))) => 300 | TEST #49> [PASSED] ((Y Y_fib) 5) => 5 301 | TEST #50> [PASSED] (def `range (\ `(a b) `(if (== a b) `nil `(cons a (range (+ a 1) b))))) => 303 | TEST #51> [PASSED] (range 1 10) => `(1 2 3 4 5 6 7 8 9) 304 | TEST #52> [PASSED] (apply + (range 1 10)) => 45 305 | ==== ALL TEST PASSED [51/52] ==== 306 | ``` 307 | 308 | ## 目标 309 | 310 | 1. [ ] 对内存使用进行优化,减少不必要的拷贝操作。 311 | 2. [ ] 添加更多测试用例,确保GC工作的可靠性,避免循环引用与僵尸引用。 312 | 3. [ ] 用LISP语言实现高阶函数。 313 | 314 | ## 目标 315 | 316 | 当前进展: 317 | 318 | - [x] 生成文法表达式 319 | - [x] 序列 320 | - [x] 分支 321 | - [x] 可选 322 | - [x] 跳过单词 323 | - [x] 生成LR项目 324 | - [x] 生成非确定性文法自动机 325 | - [x] 闭包求解 326 | - [x] 去Epsilon 327 | - [x] 打印NGA结构 328 | - [x] 生成下推自动机 329 | - [x] 求First集合,并输出 330 | - [x] 检查文法有效性(如不产生Epsilon) 331 | - [x] 检查纯左递归 332 | - [x] 生成PDA 333 | - [x] 打印PDA结构(独立于内存池) 334 | - [x] 生成抽象语法树 335 | - [x] 自动生成AST结构 336 | - [x] 美化AST结构 337 | - [ ] 语义动作 338 | - [x] 设计语言 339 | - [x] 使用[C语言文法](https://github.com/antlr/grammars-v4/blob/master/c/C.g4) 340 | - [x] 实现回溯,解决移进/归约冲突问题,解决回溯的诸多BUG 341 | - [x] 实现LISP的循环 342 | - [ ] LISP虚拟机 343 | - [x] 创建窗口 344 | - [ ] 更多内置指令 345 | - [ ] 控制台交互 346 | - [x] 图形交互 347 | - [ ] 模拟操作系统 348 | 349 | 1. 将文法树转换表(完成) 350 | 2. 根据PDA表生成AST(完成) 351 | 352 | ## 改进 353 | 354 | 1. [x] ~~修改了Lexer识别数字的问题~~ 355 | 2. [x] ~~优化了内存池合并块算法,当没有元素被使用时将重置~~ 356 | 3. [x] ~~添加错误恢复功能,异常时恢复GC的存储栈大小~~ 357 | 4. [x] ~~更改了字符串管理方式,设为不可变~~ 358 | 5. [x] ~~GC申请内存后自动清零~~ 359 | 6. [x] ~~内存池算法可能存在问题,导致不定时崩溃~~ 360 | 7. [x] ~~解决了数字溢出的问题~~ 361 | 8. [x] ~~解决函数闭包问题~~ 362 | 9. [x] ~~改进cons的实现~~ 363 | 10. [x] ~~当使用函数不存在时发生崩溃,原因为内存池容量不够~~ 364 | 11. [x] ~~调用递归时,环境变量env被free的问题,已修正~~ 365 | 12. [x] ~~修正了类似double后缀的数字识别问题~~ 366 | 13. [x] ~~修正了def函数无法修改外界环境的问题~~ 367 | 14. [x] ~~修正了内存池申请的bug:删去为头时的情况;改正申请时块大小为size+1~~ 368 | 15. [x] ~~修正了GC中unlink的bug~~ 369 | 370 | - [ ] 生成LR项目集时将@符号提到集合的外面,减少状态 371 | - [x] PDA表的生成时使用了内存池来保存结点,当生成PDA表后,内存池可以全部回收 372 | - [x] 生成AST时减少嵌套结点 373 | - [ ] 优化回溯时产生的数据结构,减少拷贝 374 | - [ ] 解析成功时释放结点内存 375 | - [x] 将集合结点的标记修改成枚举 376 | - [ ] 设置的终结符可以不添加到语法树中 377 | 378 | ## 参考 379 | 380 | 1. [CMiniLang](https://github.com/bajdcc/CMiniLang) 381 | 2. [lysp](http://piumarta.com/software/lysp/lysp-1.1/lysp.c) 382 | 3. [MyScript](https://github.com/bajdcc/MyScript) 383 | 4. [Build Your Own Lisp](http://buildyourownlisp.com) 384 | 5. [Lisp interpreter in 90 lines of C++](http://howtowriteaprogram.blogspot.com/2010/11/lisp-interpreter-in-90-lines-of-c.html) 385 | 6. [CParser](https://github.com/bajdcc/CParser) 386 | 7. [vczh GLR Parser](https://github.com/vczh-libraries/Vlpp/tree/master/Source/Parsing) -------------------------------------------------------------------------------- /cast.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Created by bajdcc 4 | // 5 | 6 | #include 7 | #include 8 | #include "cast.h" 9 | 10 | namespace clib { 11 | 12 | cast::cast() { 13 | init(); 14 | } 15 | 16 | void cast::init() { 17 | root = new_node(ast_root); 18 | current = root; 19 | } 20 | 21 | ast_node *cast::get_root() const { 22 | return root; 23 | } 24 | 25 | ast_node *cast::new_node(ast_t type) { 26 | if (nodes.available() < 64) { 27 | printf("AST ERROR: 'nodes' out of memory\n"); 28 | throw std::exception(); 29 | } 30 | auto node = nodes.alloc(); 31 | memset(node, 0, sizeof(ast_node)); 32 | node->flag = type; 33 | return node; 34 | } 35 | 36 | ast_node *cast::set_child(ast_node *node, ast_node *child) { 37 | child->parent = node; 38 | if (node->child == nullptr) { // 没有孩子 39 | node->child = child; 40 | child->prev = child->next = child; 41 | } else { // 有孩子,添加到末尾 42 | child->prev = node->child->prev; 43 | child->next = node->child; 44 | node->child->prev->next = child; 45 | node->child->prev = child; 46 | } 47 | return node; 48 | } 49 | 50 | ast_node *cast::set_sibling(ast_node *node, ast_node *sibling) { 51 | sibling->parent = node->parent; 52 | sibling->prev = node; 53 | sibling->next = node->next; 54 | node->next = sibling; 55 | return sibling; 56 | } 57 | 58 | int cast::children_size(ast_node *node) { 59 | if (!node || !node->child) 60 | return 0; 61 | node = node->child; 62 | auto i = node; 63 | auto n = 0; 64 | do { 65 | n++; 66 | i = i->next; 67 | assert(i); 68 | } while (i != node); 69 | return n; 70 | } 71 | 72 | ast_node *cast::add_child(ast_node *node) { 73 | return set_child(current, node); 74 | } 75 | 76 | ast_node *cast::new_child(ast_t type, bool step) { 77 | auto node = new_node(type); 78 | set_child(current, node); 79 | if (step) 80 | current = node; 81 | return node; 82 | } 83 | 84 | ast_node *cast::new_sibling(ast_t type, bool step) { 85 | auto node = new_node(type); 86 | set_sibling(current, node); 87 | if (step) 88 | current = node; 89 | return node; 90 | } 91 | 92 | void cast::remove(ast_node *node) { 93 | if (node->parent && node->parent->child == node) { 94 | if (node->next == node) { 95 | node->parent->child = nullptr; 96 | } else { 97 | node->parent->child = node->next; 98 | } 99 | } 100 | if (node->prev && node->prev != node) { 101 | node->prev->next = node->next; 102 | } 103 | if (node->next && node->next != node) { 104 | node->next->prev = node->prev; 105 | } 106 | if (node->child) { 107 | auto f = node->child; 108 | auto i = f; 109 | i->parent = nullptr; 110 | if (i->next != f) { 111 | i = i->next; 112 | do { 113 | i->parent = nullptr; 114 | i = i->next; 115 | } while (i != f); 116 | } 117 | } 118 | nodes.free(node); 119 | } 120 | 121 | void cast::to(ast_to_t type) { 122 | switch (type) { 123 | case to_parent: 124 | current = current->parent; 125 | break; 126 | case to_prev: 127 | current = current->prev; 128 | break; 129 | case to_next: 130 | current = current->next; 131 | break; 132 | case to_child: 133 | current = current->child; 134 | break; 135 | } 136 | } 137 | 138 | void cast::set_str(ast_node *node, const string_t &str) { 139 | if (strings.available() < 64) { 140 | printf("AST ERROR: 'strings' out of memory\n"); 141 | throw std::exception(); 142 | } 143 | auto len = str.length(); 144 | auto s = strings.alloc_array(len + 1); 145 | memcpy(s, str.c_str(), len); 146 | s[len] = 0; 147 | node->data._string = s; 148 | } 149 | 150 | std::string cast::display_str(const char *str) { 151 | std::stringstream ss; 152 | for (auto c = str; *c != 0; c++) { 153 | if (*c < 0) { 154 | ss << *c; 155 | } else if (isprint(*c)) { 156 | ss << *c; 157 | } else { 158 | if (*c == '\n') 159 | ss << "\\n"; 160 | else 161 | ss << "."; 162 | } 163 | } 164 | return ss.str(); 165 | } 166 | 167 | void cast::reset() { 168 | nodes.clear(); 169 | strings.clear(); 170 | init(); 171 | } 172 | 173 | template 174 | static void ast_recursion(ast_node *node, int level, std::ostream &os, T f) { 175 | if (node == nullptr) 176 | return; 177 | auto i = node; 178 | if (i->next == i) { 179 | f(i, level, os); 180 | return; 181 | } 182 | f(i, level, os); 183 | i = i->next; 184 | while (i != node) { 185 | f(i, level, os); 186 | i = i->next; 187 | } 188 | } 189 | 190 | void cast::print(ast_node *node, int level, std::ostream &os) { 191 | if (node == nullptr) 192 | return; 193 | auto rec = [&](auto n, auto l, auto &os) { cast::print(n, l, os); }; 194 | auto type = (ast_t) node->flag; 195 | switch (type) { 196 | case ast_root: // 根结点,全局声明 197 | ast_recursion(node->child, level, os, rec); 198 | break; 199 | case ast_env: 200 | case ast_sub: 201 | case ast_lambda: 202 | break; 203 | case ast_sexpr: 204 | os << '('; 205 | ast_recursion(node->child, level + 1, os, rec); 206 | os << ')'; 207 | break; 208 | case ast_qexpr: 209 | os << '`'; 210 | if (node->child && node->child == node->child->next) { 211 | ast_recursion(node->child, level + 1, os, rec); 212 | } else { 213 | os << '('; 214 | ast_recursion(node->child, level + 1, os, rec); 215 | os << ')'; 216 | } 217 | break; 218 | case ast_literal: 219 | os << node->data._string; 220 | break; 221 | case ast_string: 222 | os << '"' << display_str(node->data._string) << '"'; 223 | break; 224 | case ast_char: 225 | if (isprint(node->data._char)) 226 | os << '\'' << node->data._char << '\''; 227 | else if (node->data._char == '\n') 228 | os << "'\\n'"; 229 | else 230 | os << "'\\x" << std::setiosflags(std::ios::uppercase) << std::hex 231 | << std::setfill('0') << std::setw(2) 232 | << (unsigned int) node->data._char << '\''; 233 | break; 234 | case ast_uchar: 235 | os << (unsigned int) node->data._uchar; 236 | break; 237 | case ast_short: 238 | os << node->data._short; 239 | break; 240 | case ast_ushort: 241 | os << node->data._ushort; 242 | break; 243 | case ast_int: 244 | os << node->data._int; 245 | break; 246 | case ast_uint: 247 | os << node->data._uint; 248 | break; 249 | case ast_long: 250 | os << node->data._long; 251 | break; 252 | case ast_ulong: 253 | os << node->data._ulong; 254 | break; 255 | case ast_float: 256 | os << node->data._float; 257 | break; 258 | case ast_double: 259 | os << node->data._double; 260 | break; 261 | } 262 | if (node->parent) { 263 | if ((node->parent->flag == ast_qexpr || node->parent->flag == ast_sexpr) && 264 | node->next != node->parent->child) { 265 | os << ' '; 266 | } 267 | } 268 | } 269 | 270 | void cast::print2(ast_node *node, int level, std::ostream &os) { 271 | if (node == nullptr) 272 | return; 273 | auto rec = [&](auto n, auto l, auto &os) { cast::print2(n, l, os); }; 274 | auto type = (ast_t) node->flag; 275 | os << std::setfill(' ') << std::setw(level) << ""; 276 | switch (type) { 277 | case ast_root: // 根结点,全局声明 278 | ast_recursion(node->child, level, os, rec); 279 | break; 280 | case ast_collection: 281 | os << COLL_STRING(node->data._coll) << std::endl; 282 | ast_recursion(node->child, level + 1, os, rec); 283 | break; 284 | case ast_keyword: 285 | os << "keyword: " << KEYWORD_STRING(node->data._keyword) << std::endl; 286 | break; 287 | case ast_operator: 288 | os << "operator: " << OP_STRING(node->data._op) << std::endl; 289 | break; 290 | case ast_literal: 291 | os << "id: " << node->data._string << std::endl; 292 | break; 293 | case ast_string: 294 | os << "string: " << '"' << display_str(node->data._string) << '"' << std::endl; 295 | break; 296 | case ast_char: 297 | os << "char: "; 298 | if (isprint(node->data._char)) 299 | os << '\'' << node->data._char << '\''; 300 | else if (node->data._char == '\n') 301 | os << "'\\n'"; 302 | else 303 | os << "'\\x" << std::setiosflags(std::ios::uppercase) << std::hex 304 | << std::setfill('0') << std::setw(2) 305 | << (unsigned int) node->data._char << '\''; 306 | os << std::endl; 307 | break; 308 | case ast_uchar: 309 | os << "uchar: " << (unsigned int) node->data._uchar << std::endl; 310 | break; 311 | case ast_short: 312 | os << "short: " << node->data._short << std::endl; 313 | break; 314 | case ast_ushort: 315 | os << "ushort: " << node->data._ushort << std::endl; 316 | break; 317 | case ast_int: 318 | os << "int: " << node->data._int << std::endl; 319 | break; 320 | case ast_uint: 321 | os << "uint: " << node->data._uint << std::endl; 322 | break; 323 | case ast_long: 324 | os << "long: " << node->data._long << std::endl; 325 | break; 326 | case ast_ulong: 327 | os << "ulong: " << node->data._ulong << std::endl; 328 | break; 329 | case ast_float: 330 | os << "float: " << node->data._float << std::endl; 331 | break; 332 | case ast_double: 333 | os << "double: " << node->data._double << std::endl; 334 | break; 335 | } 336 | } 337 | 338 | ast_node *cast::index(ast_node *node, int index) { 339 | auto child = node->child; 340 | if (child) { 341 | if (child->next == child) { 342 | return index == 0 ? child : nullptr; 343 | } 344 | auto head = child; 345 | for (auto i = 0; i < index; ++i) { 346 | child = child->next; 347 | if (child == head) 348 | return nullptr; 349 | } 350 | return child; 351 | } 352 | return nullptr; 353 | } 354 | 355 | ast_node *cast::index(ast_node *node, const string_t &index) { 356 | auto child = node->child; 357 | if (child) { 358 | if (child->next == child) { 359 | return index == child->child->data._string ? child : nullptr; 360 | } 361 | auto head = child; 362 | auto i = head; 363 | do { 364 | if (index == i->child->data._string) 365 | return i->child->next; 366 | i = i->next; 367 | } while (i != head); 368 | } 369 | return nullptr; 370 | } 371 | 372 | std::tuple ast_list[] = { 373 | std::make_tuple(ast_root, "root", l_none, 0), 374 | std::make_tuple(ast_collection, "coll", l_none, 0), 375 | std::make_tuple(ast_keyword, "keyword", l_none, 0), 376 | std::make_tuple(ast_operator, "operator", l_operator , 0), 377 | std::make_tuple(ast_literal, "literal", l_identifier, 0), 378 | std::make_tuple(ast_string, "string", l_string, 0), 379 | std::make_tuple(ast_char, "char", l_char, 1), 380 | std::make_tuple(ast_uchar, "uchar", l_uchar, 2), 381 | std::make_tuple(ast_short, "short", l_short, 3), 382 | std::make_tuple(ast_ushort, "ushort", l_ushort, 4), 383 | std::make_tuple(ast_int, "int", l_int, 5), 384 | std::make_tuple(ast_uint, "uint", l_uint, 6), 385 | std::make_tuple(ast_long, "long", l_long, 7), 386 | std::make_tuple(ast_ulong, "ulong", l_ulong, 8), 387 | std::make_tuple(ast_float, "float", l_float, 9), 388 | std::make_tuple(ast_double, "double", l_double, 10), 389 | std::make_tuple(ast_env, "env", l_none, 0), 390 | std::make_tuple(ast_sub, "sub", l_none, 0), 391 | std::make_tuple(ast_lambda, "lambda", l_none, 0), 392 | std::make_tuple(ast_sexpr, "sexpr", l_none, 0), 393 | std::make_tuple(ast_qexpr, "qexpr", l_none, 0), 394 | }; 395 | 396 | const string_t &cast::ast_str(ast_t type) { 397 | return std::get<1>(ast_list[type]); 398 | } 399 | 400 | bool cast::ast_equal(ast_t type, lexer_t lex) { 401 | return std::get<2>(ast_list[type]) == lex; 402 | } 403 | 404 | int cast::ast_prior(ast_t type) { 405 | return std::get<3>(ast_list[type]); 406 | } 407 | 408 | void cast::unlink(ast_node *node) { 409 | if (node->parent) { 410 | auto &parent = node->parent; 411 | auto &ptr = node; 412 | auto i = parent->child; 413 | if (i->next == i) { 414 | assert(i->prev == i); 415 | assert(parent->child == node); 416 | parent->child = nullptr; 417 | node->parent = nullptr; 418 | node->prev = node->next = node; 419 | return; 420 | } 421 | if (ptr == parent->child) { 422 | parent->child = i->next; 423 | i->prev->next = parent->child; 424 | parent->child->prev = i->prev; 425 | node->parent = nullptr; 426 | node->prev = node->next = node; 427 | return; 428 | } 429 | do { 430 | if (i->next == ptr) { 431 | if (i->next->next == parent->child) { 432 | i->next = parent->child; 433 | parent->child->prev = i; 434 | } else { 435 | i->next->next->prev = i; 436 | i->next = i->next->next; 437 | } 438 | break; 439 | } else { 440 | i = i->next; 441 | } 442 | } while (i != parent->child); 443 | node->parent = nullptr; 444 | node->prev = node->next = node; 445 | } 446 | } 447 | } 448 | -------------------------------------------------------------------------------- /cast.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Created by bajdcc 4 | // 5 | 6 | #ifndef CLIBLISP_CAST_H 7 | #define CLIBLISP_CAST_H 8 | 9 | #include "types.h" 10 | #include "memory.h" 11 | 12 | #define AST_NODE_MEM (8 * 1024) 13 | #define AST_STR_MEM (8 * 1024) 14 | 15 | namespace clib { 16 | 17 | enum ast_t { 18 | ast_root, 19 | ast_collection, 20 | ast_keyword, 21 | ast_operator, 22 | ast_literal, 23 | ast_string, 24 | ast_char, 25 | ast_uchar, 26 | ast_short, 27 | ast_ushort, 28 | ast_int, 29 | ast_uint, 30 | ast_long, 31 | ast_ulong, 32 | ast_float, 33 | ast_double, 34 | ast_env, 35 | ast_sub, 36 | ast_lambda, 37 | ast_sexpr, 38 | ast_qexpr, 39 | }; 40 | 41 | enum ast_to_t { 42 | to_parent, 43 | to_prev, 44 | to_next, 45 | to_child, 46 | }; 47 | 48 | // 结点 49 | struct ast_node { 50 | // 类型 51 | uint32_t flag; 52 | 53 | union { 54 | #define DEFINE_NODE_DATA(t) LEX_T(t) _##t; 55 | DEFINE_NODE_DATA(char) 56 | DEFINE_NODE_DATA(uchar) 57 | DEFINE_NODE_DATA(short) 58 | DEFINE_NODE_DATA(ushort) 59 | DEFINE_NODE_DATA(int) 60 | DEFINE_NODE_DATA(uint) 61 | DEFINE_NODE_DATA(long) 62 | DEFINE_NODE_DATA(ulong) 63 | DEFINE_NODE_DATA(float) 64 | DEFINE_NODE_DATA(double) 65 | #undef DEFINE_NODE_DATA 66 | const char *_string; 67 | const char *_identifier; 68 | keyword_t _keyword; 69 | operator_t _op; 70 | coll_t _coll; 71 | } data; // 数据 72 | 73 | // 树型数据结构,广义表 74 | ast_node *parent; // 父亲 75 | ast_node *prev; // 左兄弟 76 | ast_node *next; // 右兄弟 77 | ast_node *child; // 最左儿子 78 | }; 79 | 80 | class cast { 81 | public: 82 | cast(); 83 | ~cast() = default; 84 | 85 | cast(const cast &) = delete; 86 | cast &operator=(const cast &) = delete; 87 | 88 | ast_node *get_root() const; 89 | 90 | ast_node *new_node(ast_t type); 91 | ast_node *new_child(ast_t type, bool step = true); 92 | ast_node *new_sibling(ast_t type, bool step = true); 93 | 94 | void remove(ast_node*); 95 | 96 | ast_node *add_child(ast_node*); 97 | static ast_node *set_child(ast_node*, ast_node*); 98 | static ast_node *set_sibling(ast_node*, ast_node*); 99 | static int children_size(ast_node*); 100 | 101 | void set_str(ast_node *node, const string_t &str); 102 | static std::string display_str(const char *str); 103 | 104 | void to(ast_to_t type); 105 | 106 | static void print(ast_node *node, int level, std::ostream &os); 107 | static void print2(ast_node *node, int level, std::ostream &os); 108 | static const string_t &ast_str(ast_t type); 109 | static bool ast_equal(ast_t type, lexer_t lex); 110 | static int ast_prior(ast_t type); 111 | 112 | static void unlink(ast_node *node); 113 | 114 | static ast_node *index(ast_node *node, int index); 115 | static ast_node *index(ast_node *node, const string_t &index); 116 | 117 | void reset(); 118 | private: 119 | void init(); 120 | 121 | private: 122 | memory_pool nodes; // 全局AST结点内存管理 123 | memory_pool strings; // 全局字符串管理 124 | ast_node *root; // 根结点 125 | ast_node *current; // 当前结点 126 | }; 127 | } 128 | 129 | #endif //CLIBLISP_CAST_H 130 | -------------------------------------------------------------------------------- /cexception.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Project: clibparser 3 | // Author: bajdcc 4 | // 5 | #include "cexception.h" 6 | 7 | namespace clib { 8 | cexception::cexception(const string_t &msg) noexcept : msg(msg) { 9 | 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /cexception.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: clibparser 3 | // Author: bajdcc 4 | // 5 | #ifndef CLIBVM_EXCEPTION_H 6 | #define CLIBVM_EXCEPTION_H 7 | 8 | #include 9 | #include "types.h" 10 | 11 | namespace clib { 12 | class cexception : public std::exception { 13 | public: 14 | explicit cexception(const string_t &msg) noexcept; 15 | ~cexception() = default; 16 | 17 | cexception(const cexception& e) = default; 18 | cexception &operator = (const cexception& e) = default; 19 | 20 | string_t msg; 21 | }; 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /cgui.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Created by bajdcc 4 | // 5 | 6 | #include 7 | #include "cgui.h" 8 | 9 | namespace clib { 10 | 11 | cgui::cgui() { 12 | auto cs = std::vector{ 13 | R"(def `put-str (\ `s `(map ui-put (word s))))", 14 | R"(def `ui-put-delay (\ `(c t) `(begin (ui-put c) (conf `(record)) (conf (attr `wait t)))))", 15 | R"(def `put-str-delay (\ `(s t) `(map (\ `c `(ui-put-delay c t)) (word s))))", 16 | R"(def `(i S) 0 (word __logo__))", 17 | R"(conf `(ticks 1000))", 18 | R"(if (< i (len S)) `(begin (ui-put (index S i)) (def `i (+ i)) (conf `continue)) `(conf `break))", 19 | R"(conf `(ticks 5))", 20 | R"(put-str-delay "Hello world!\n" 0.4d)", 21 | R"(put-str "Welcome to cliblisp by bajdcc!\n")", 22 | }; 23 | std::copy(cs.begin(), cs.end(), std::back_inserter(codes)); 24 | } 25 | 26 | cgui &cgui::singleton() { 27 | static clib::cgui gui; 28 | return gui; 29 | } 30 | 31 | void cgui::draw() { 32 | for (int i = 0; i < ticks; ++i) { 33 | tick(); 34 | } 35 | draw_text(); 36 | } 37 | 38 | void cgui::draw_text() { 39 | glMatrixMode(GL_PROJECTION); 40 | glPushMatrix(); 41 | glLoadIdentity(); 42 | int w = glutGet(GLUT_WINDOW_WIDTH); 43 | int h = glutGet(GLUT_WINDOW_HEIGHT); 44 | int width = GUI_COLS * GUI_FONT_W; 45 | int height = GUI_ROWS * GUI_FONT_H; 46 | gluOrtho2D(0, w, h, 0); 47 | glMatrixMode(GL_MODELVIEW); 48 | glPushMatrix(); 49 | glLoadIdentity(); 50 | 51 | glColor3f(0.9f, 0.9f, 0.9f); 52 | int x = std::max((w - width) / 2, 0); 53 | int y = std::max((h - height) / 2, 0); 54 | 55 | for (auto i = 0; i < GUI_ROWS; ++i) { 56 | glRasterPos2i(x, y); 57 | for (auto j = 0; j < GUI_COLS; ++j) { 58 | glutBitmapCharacter(GUI_FONT, buffer[i][j]); 59 | } 60 | y += GUI_FONT_H; 61 | } 62 | 63 | glPopMatrix(); 64 | glMatrixMode(GL_PROJECTION); 65 | glPopMatrix(); 66 | } 67 | 68 | void cgui::tick() { 69 | auto c = 0; 70 | auto error = false; 71 | if (running) { 72 | try { 73 | auto val = vm.run(cycle, c); 74 | if (val != nullptr) { 75 | vm.gc(); 76 | vm.dump(); 77 | running = false; 78 | } 79 | } catch (const std::exception &e) { 80 | error = true; 81 | printf("RUNTIME ERROR: %s\n", e.what()); 82 | vm.restore(); 83 | vm.gc(); 84 | running = false; 85 | } 86 | } else { 87 | if (!codes.empty()) { 88 | current_code = codes.front(); 89 | codes.pop_front(); 90 | try { 91 | auto root = p.parse(current_code); 92 | vm.prepare(root); 93 | auto val = vm.run(GUI_CYCLES, c); 94 | if (val != nullptr) { 95 | vm.gc(); 96 | vm.dump(); 97 | } else { 98 | running = true; 99 | } 100 | } catch (const std::exception &e) { 101 | error = true; 102 | printf("RUNTIME ERROR: %s\n", e.what()); 103 | vm.restore(); 104 | vm.gc(); 105 | } 106 | } 107 | } 108 | if (continues > 0) { 109 | if (error) { 110 | continues = 0; 111 | current_code = ""; 112 | } else { 113 | codes.push_front(current_code); 114 | } 115 | } 116 | } 117 | 118 | void cgui::put_char(char c) { 119 | if (c == '\n') { 120 | if (ptr_y == GUI_ROWS - 1) { 121 | new_line(); 122 | } else { 123 | ptr_x = 0; 124 | ptr_y++; 125 | } 126 | } else if (c == '\b') { 127 | if (ptr_mx + ptr_my * GUI_COLS < ptr_x + ptr_y * GUI_COLS) { 128 | if (ptr_y == 0) { 129 | if (ptr_x != 0) { 130 | draw_char('\u0000'); 131 | ptr_x--; 132 | } 133 | } else { 134 | if (ptr_x != 0) { 135 | draw_char('\u0000'); 136 | ptr_x--; 137 | } else { 138 | draw_char('\u0000'); 139 | ptr_x = GUI_COLS - 1; 140 | ptr_y--; 141 | } 142 | } 143 | } 144 | } else if (c == '\u0002') { 145 | ptr_x--; 146 | while (ptr_x >= 0) { 147 | draw_char('\u0000'); 148 | ptr_x--; 149 | } 150 | ptr_x = 0; 151 | } else if (c == '\r') { 152 | ptr_x = 0; 153 | } else if (ptr_x == GUI_COLS - 1) { 154 | if (ptr_y == GUI_ROWS - 1) { 155 | draw_char(c); 156 | new_line(); 157 | } else { 158 | draw_char(c); 159 | ptr_x = 0; 160 | ptr_y++; 161 | } 162 | } else { 163 | draw_char(c); 164 | ptr_x++; 165 | } 166 | } 167 | 168 | void cgui::new_line() { 169 | ptr_x = 0; 170 | for (int i = 0; i < GUI_ROWS - 1; ++i) { 171 | std::copy(buffer[i + 1].begin(), buffer[i + 1].end(), buffer[i].begin()); 172 | } 173 | std::fill(buffer[GUI_ROWS - 1].begin(), buffer[GUI_ROWS - 1].end(), 0); 174 | } 175 | 176 | void cgui::draw_char(const char &c) { 177 | buffer[ptr_y][ptr_x] = c; 178 | } 179 | 180 | void cgui::set_cycle(int cycle) { 181 | this->cycle = cycle; 182 | } 183 | 184 | void cgui::set_ticks(int ticks) { 185 | this->ticks = ticks; 186 | } 187 | 188 | void cgui::record() { 189 | record_now = std::chrono::high_resolution_clock::now(); 190 | } 191 | 192 | bool cgui::reach(const decimal &d) { 193 | auto now = std::chrono::high_resolution_clock::now(); 194 | return std::chrono::duration_cast>(now - record_now).count() > d; 195 | } 196 | 197 | void cgui::control(int type) { 198 | if (type == 0) { // continue 199 | continues++; 200 | } else if (type == 1) { // break 201 | continues = 0; 202 | } 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /cgui.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Created by bajdcc 4 | // 5 | 6 | #ifndef CLIBLISP_CGUI_H 7 | #define CLIBLISP_CGUI_H 8 | 9 | #include 10 | #include 11 | #include 12 | #include "types.h" 13 | #include "cparser.h" 14 | #include "cvm.h" 15 | 16 | #define GUI_FONT GLUT_BITMAP_9_BY_15 17 | #define GUI_FONT_W 9 18 | #define GUI_FONT_H 15 19 | #define GUI_ROWS 30 20 | #define GUI_COLS 84 21 | #define GUI_SIZE (GUI_ROWS * GUI_COLS) 22 | #define GUI_CYCLES 50 23 | #define GUI_TICKS 5 24 | 25 | namespace clib { 26 | 27 | class cgui { 28 | public: 29 | cgui(); 30 | ~cgui() = default; 31 | 32 | cgui(const cgui &) = delete; 33 | cgui &operator=(const cgui &) = delete; 34 | 35 | void draw(); 36 | 37 | void put_char(char c); 38 | 39 | void set_cycle(int cycle); 40 | void set_ticks(int ticks); 41 | 42 | void record(); 43 | bool reach(const decimal &d); 44 | void control(int type); 45 | 46 | private: 47 | void tick(); 48 | void draw_text(); 49 | 50 | void new_line(); 51 | inline void draw_char(const char &c); 52 | 53 | public: 54 | static cgui &singleton(); 55 | 56 | private: 57 | std::array, GUI_ROWS> buffer; 58 | std::deque codes; 59 | cvm vm; 60 | cparser p; 61 | bool running{false}; 62 | int cycle{ GUI_CYCLES }; 63 | int ticks{ GUI_TICKS }; 64 | int ptr_x{0}; 65 | int ptr_y{0}; 66 | int ptr_mx{0}; 67 | int ptr_my{0}; 68 | int continues{0}; 69 | string_t current_code; 70 | std::chrono::system_clock::time_point record_now; 71 | }; 72 | } 73 | 74 | #endif //CLIBLISP_CGUI_H 75 | -------------------------------------------------------------------------------- /clexer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Project: CMiniLang 3 | // Author: bajdcc 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "clexer.h" 11 | 12 | namespace clib { 13 | 14 | clexer::clexer(string_t str) : str(str) { 15 | length = (uint) str.length(); 16 | assert(length > 0); 17 | initMap(); 18 | } 19 | 20 | clexer::~clexer() = default; 21 | 22 | #define DEFINE_LEXER_GETTER(t) \ 23 | LEX_T(t) clexer::get_##t() const \ 24 | { \ 25 | return bags._##t; \ 26 | } 27 | 28 | DEFINE_LEXER_GETTER(char) 29 | DEFINE_LEXER_GETTER(uchar) 30 | DEFINE_LEXER_GETTER(short) 31 | DEFINE_LEXER_GETTER(ushort) 32 | DEFINE_LEXER_GETTER(int) 33 | DEFINE_LEXER_GETTER(uint) 34 | DEFINE_LEXER_GETTER(long) 35 | DEFINE_LEXER_GETTER(ulong) 36 | DEFINE_LEXER_GETTER(float) 37 | DEFINE_LEXER_GETTER(double) 38 | DEFINE_LEXER_GETTER(operator) 39 | DEFINE_LEXER_GETTER(keyword) 40 | DEFINE_LEXER_GETTER(identifier) 41 | DEFINE_LEXER_GETTER(string) 42 | DEFINE_LEXER_GETTER(comment) 43 | DEFINE_LEXER_GETTER(space) 44 | DEFINE_LEXER_GETTER(newline) 45 | DEFINE_LEXER_GETTER(error) 46 | #undef DEFINE_LEXER_GETTER 47 | 48 | #define DEFINE_LEXER_GETTER(t) \ 49 | LEX_T(t) clexer::get_store_##t(int index) const \ 50 | { \ 51 | return storage._##t[index]; \ 52 | } 53 | 54 | DEFINE_LEXER_GETTER(char) 55 | DEFINE_LEXER_GETTER(uchar) 56 | DEFINE_LEXER_GETTER(short) 57 | DEFINE_LEXER_GETTER(ushort) 58 | DEFINE_LEXER_GETTER(int) 59 | DEFINE_LEXER_GETTER(uint) 60 | DEFINE_LEXER_GETTER(long) 61 | DEFINE_LEXER_GETTER(ulong) 62 | DEFINE_LEXER_GETTER(float) 63 | DEFINE_LEXER_GETTER(double) 64 | DEFINE_LEXER_GETTER(identifier) 65 | DEFINE_LEXER_GETTER(string) 66 | #undef DEFINE_LEXER_GETTER 67 | 68 | // 记录错误 69 | lexer_t clexer::record_error(error_t error, uint skip) { 70 | err_record_t err{}; 71 | err.line = line; // 起始行 72 | err.column = column; // 起始列 73 | err.start_idx = index; // 文本起始位置 74 | err.end_idx = index + skip; // 文本结束位置 75 | err.err = error; // 错误类型 76 | err.str = str.substr(err.start_idx, err.end_idx - err.start_idx); // 错误字符 77 | records.push_back(err); 78 | bags._error = error; 79 | move(skip); // 略过错误文本 80 | return l_error; 81 | } 82 | 83 | const clexer::err_record_t &clexer::recent_error() const { 84 | return records.back(); 85 | } 86 | 87 | lexer_t clexer::next() { 88 | auto c = local(); 89 | if (c == -1) { 90 | type = l_end; 91 | return l_end; 92 | } 93 | type = l_error; 94 | if (isalpha(c) || c == '_') { // 变量名或关键字 95 | type = next_alpha(); 96 | } else if (isdigit(c) || (c == '-' && isdigit(local(1)))) { // 数字 97 | if (c == '-') { 98 | move(1); 99 | type = next_digit(); 100 | if (type == l_error) 101 | return type; 102 | switch (type) { 103 | case l_char: 104 | bags._char = -bags._char; 105 | break; 106 | case l_short: 107 | bags._short = -bags._short; 108 | break; 109 | case l_int: 110 | bags._int = -bags._int; 111 | break; 112 | case l_long: 113 | bags._long = -bags._long; 114 | break; 115 | case l_float: 116 | bags._float = -bags._float; 117 | break; 118 | case l_double: 119 | bags._double = -bags._double; 120 | break; 121 | default: 122 | break; 123 | } 124 | return type; 125 | } else { 126 | type = next_digit(); 127 | } 128 | } else if (isspace(c)) { // 空白字符 129 | type = next_space(); 130 | } else if (c == '\'') { // 字符 131 | type = next_char(); 132 | } else if (c == '\"') { // 字符串 133 | type = next_string(); 134 | } else if (c == '/') { // 注释 135 | auto c2 = local(1); 136 | if (c2 == '/' || c2 == '*') { // 注释 137 | type = next_comment(); 138 | } else { 139 | type = next_operator(); 140 | bags._identifier = OP_STRING(bags._operator); 141 | type = l_identifier; 142 | } 143 | } else if (c == '`' || c == '(' || c == ')') { // 操作符 144 | type = next_operator(); 145 | } else { 146 | type = next_operator(); 147 | bags._identifier = OP_STRING(bags._operator); 148 | type = l_identifier; 149 | } 150 | return type; 151 | } 152 | 153 | lexer_t clexer::get_type() const { 154 | return type; 155 | } 156 | 157 | int clexer::get_line() const { 158 | return line; 159 | } 160 | 161 | int clexer::get_column() const { 162 | return column; 163 | } 164 | 165 | int clexer::get_last_line() const { 166 | return last_line; 167 | } 168 | 169 | int clexer::get_last_column() const { 170 | return last_column; 171 | } 172 | 173 | string_t clexer::current() const { 174 | switch (type) { 175 | case l_operator: 176 | return str.substr(last_index, index - last_index) + "\t[" + OPERATOR_STRING(bags._operator) + "]"; 177 | default: 178 | break; 179 | } 180 | return str.substr(last_index, index - last_index); 181 | } 182 | 183 | bool clexer::is_type(lexer_t type) const { 184 | return get_type() == type; 185 | } 186 | 187 | bool clexer::is_keyword(keyword_t type) const { 188 | return get_type() == l_keyword && get_keyword() == type; 189 | } 190 | 191 | bool clexer::is_operator(operator_t type) const { 192 | return get_type() == l_operator && get_operator() == type; 193 | } 194 | 195 | bool clexer::is_operator(operator_t type1, operator_t type2) const { 196 | return get_type() == l_operator && (get_operator() == type1 || get_operator() == type2); 197 | } 198 | 199 | bool clexer::is_number() const { 200 | return get_type() >= l_char && get_type() <= l_double; 201 | } 202 | 203 | bool clexer::is_integer() const { 204 | return get_type() >= l_char && get_type() <= l_ulong; 205 | } 206 | 207 | LEX_T(int) clexer::get_integer() const { 208 | assert(is_integer()); 209 | switch (type) { 210 | #define DEFINE_LEXER_CASE(t) case l_##t: return get_##t(); 211 | DEFINE_LEXER_CASE(char) 212 | DEFINE_LEXER_CASE(uchar) 213 | DEFINE_LEXER_CASE(short) 214 | DEFINE_LEXER_CASE(ushort) 215 | DEFINE_LEXER_CASE(int) 216 | DEFINE_LEXER_CASE(uint) 217 | DEFINE_LEXER_CASE(long) 218 | DEFINE_LEXER_CASE(ulong) 219 | #undef DEFINE_LEXER_CASE 220 | default: 221 | break; 222 | } 223 | return 0; 224 | } 225 | 226 | 227 | 228 | void clexer::move(uint idx, int inc) { 229 | last_index = index; 230 | last_line = line; 231 | last_column = column; 232 | if (inc < 0) { 233 | column += idx; 234 | } else { 235 | column = 1; 236 | line += inc; 237 | } 238 | index += idx; 239 | } 240 | 241 | // 计算幂 242 | template 243 | static T calc_exp(T d, int e) { 244 | if (e == 0) 245 | return d; 246 | else if (e > 0) 247 | for (int i = 0; i < e; i++) 248 | d *= 10; 249 | else 250 | for (int i = e; i < 0; i++) 251 | d /= 10; 252 | return d; 253 | } 254 | 255 | // 转无符号类型 256 | static lexer_t unsigned_type(lexer_t t) { 257 | switch (t) { 258 | case l_char: 259 | return l_uchar; 260 | case l_short: 261 | return l_ushort; 262 | case l_int: 263 | return l_uint; 264 | case l_long: 265 | return l_ulong; 266 | default: 267 | return t; 268 | } 269 | } 270 | 271 | // 数字类型后缀 272 | static lexer_t digit_type_postfix(char c) { 273 | switch (c) { 274 | case 'C': 275 | case 'c': 276 | return l_char; 277 | case 'S': 278 | case 's': 279 | return l_short; 280 | case 'I': 281 | case 'i': 282 | return l_int; 283 | case 'L': 284 | case 'l': 285 | return l_long; 286 | case 'F': 287 | case 'f': 288 | return l_float; 289 | case 'D': 290 | case 'd': 291 | return l_double; 292 | default: 293 | return l_error; 294 | } 295 | } 296 | 297 | // 数字类型后缀(带无符号) 298 | lexer_t clexer::digit_type(lexer_t t, uint &i) { 299 | if (i == length) { 300 | return l_error; 301 | } 302 | if (str[i] == 'U' || str[i] == 'u') { 303 | if (++i == length) { 304 | return unsigned_type(t); 305 | } 306 | if ((t = unsigned_type(digit_type_postfix(str[i]))) == l_error) { 307 | return l_error; 308 | } 309 | ++i; 310 | return t; 311 | } else { 312 | if ((t = digit_type_postfix(str[i])) == l_error) { 313 | return l_error; 314 | } 315 | ++i; 316 | return t; 317 | } 318 | } 319 | 320 | bool clexer::digit_from_integer(lexer_t t, LEX_T(ulong) n) { 321 | switch (t) { 322 | #define DEFINE_LEXER_CONV_INTEGER(t) case l_##t: bags._##t = (LEX_T(t)) n; break; 323 | DEFINE_LEXER_CONV_INTEGER(char) 324 | DEFINE_LEXER_CONV_INTEGER(uchar) 325 | DEFINE_LEXER_CONV_INTEGER(short) 326 | DEFINE_LEXER_CONV_INTEGER(ushort) 327 | DEFINE_LEXER_CONV_INTEGER(int) 328 | DEFINE_LEXER_CONV_INTEGER(uint) 329 | DEFINE_LEXER_CONV_INTEGER(long) 330 | DEFINE_LEXER_CONV_INTEGER(ulong) 331 | DEFINE_LEXER_CONV_INTEGER(float) 332 | DEFINE_LEXER_CONV_INTEGER(double) 333 | #undef DEFINE_LEXER_CONV_INTEGER 334 | default: 335 | return false; 336 | } 337 | return true; 338 | } 339 | 340 | bool clexer::digit_from_double(lexer_t t, LEX_T(double) d) { 341 | switch (t) { 342 | #define DEFINE_LEXER_CONV_INTEGER(t) case l_##t: bags._##t = (LEX_T(t)) d; break; 343 | DEFINE_LEXER_CONV_INTEGER(char) 344 | DEFINE_LEXER_CONV_INTEGER(uchar) 345 | DEFINE_LEXER_CONV_INTEGER(short) 346 | DEFINE_LEXER_CONV_INTEGER(ushort) 347 | DEFINE_LEXER_CONV_INTEGER(int) 348 | DEFINE_LEXER_CONV_INTEGER(uint) 349 | DEFINE_LEXER_CONV_INTEGER(long) 350 | DEFINE_LEXER_CONV_INTEGER(ulong) 351 | DEFINE_LEXER_CONV_INTEGER(float) 352 | DEFINE_LEXER_CONV_INTEGER(double) 353 | #undef DEFINE_LEXER_CONV_INTEGER 354 | default: 355 | return false; 356 | } 357 | return true; 358 | } 359 | 360 | // 返回数字(依照目前识别的类型) 361 | lexer_t clexer::digit_return(lexer_t t, LEX_T(ulong) n, LEX_T(double) d, uint i) { 362 | if (t == l_int) { 363 | bags._int = (int) n; 364 | } else if (t == l_double) { 365 | bags._double = d; 366 | } else if (t == l_long) { 367 | bags._long = n; 368 | } else { 369 | bags._double = d; 370 | } 371 | move(i - index); 372 | return t; 373 | } 374 | 375 | // 十六进制字符转十进制 376 | static int hex2dec(char c) { 377 | if (c >= '0' && c <= '9') { 378 | return c - '0'; 379 | } else if (c >= 'a' && c <= 'f') { 380 | return c - 'a' + 10; 381 | } else if (c >= 'A' && c <= 'F') { 382 | return c - 'A' + 10; 383 | } else { 384 | return -1; 385 | } 386 | } 387 | 388 | // 参考自:https://github.com/bajdcc/CEval/blob/master/CEval/CEval.cpp#L105 389 | lexer_t clexer::next_digit() { 390 | // 假定这里的数字规则是以0-9开头 391 | // 正则:^((?:\d+(\.)?\d*)(?:[eE][+-]?\d+)?)([uU])?([fFdCcSsDiIlL])?$ 392 | // 正则:^0[Xx][0-9A-Fa-f]+$ 393 | // 手动实现atof/atoi,并类型转换 394 | // 其他功能:int溢出转double,e科学记数法 395 | // 注意:这里不考虑负数,因为估计到歧义(可能是减法呢?) 396 | auto _type = l_int; // 默认是整型 397 | auto _postfix = l_none; 398 | auto i = index; 399 | auto n = 0ULL, _n = 0ULL; 400 | auto d = 0.0; 401 | if (local() == '0' && (local(1) == 'x' || local(1) == 'x')) { 402 | auto cc = 0; 403 | // 预先判断十六进制 404 | for (i += 2; i < length && ((cc = hex2dec(str[i])) != -1); i++) { // 解析整数部分 405 | if (_type == l_double) { // 小数加位,溢出后自动转换 406 | d *= 16.0; 407 | d += cc; 408 | } else { // 整数加位 409 | _n = n; 410 | n <<= 4; 411 | n += cc; 412 | } 413 | if (_type == l_int) { // 超过int范围,转为long 414 | if (n > INT_MAX) 415 | _type = l_long; 416 | } else if (_type == l_long) { // 超过long范围,转为double 417 | if (n >> 4 != _n) { 418 | d = (double) _n; 419 | d *= 16.0; 420 | d += cc; 421 | _type = l_double; 422 | } 423 | } 424 | } 425 | return digit_return(_type, n, d, i); 426 | } 427 | // 判断整数部分 428 | for (; i < length && (isdigit(str[i])); i++) { // 解析整数部分 429 | if (_type == l_double) { // 小数加位,溢出后自动转换 430 | d *= 10.0; 431 | d += str[i] - '0'; 432 | } else { // 整数加位 433 | _n = n; 434 | n *= 10; 435 | n += str[i] - '0'; 436 | } 437 | if (_type == l_int) { // 超过int范围,转为long 438 | if (n > INT_MAX) { 439 | _type = l_long; 440 | } 441 | } else if (_type == l_long) { // 超过long范围,转为double 442 | if (n / 10 != _n) { 443 | d = (double) _n; 444 | d *= 10.0; 445 | d += str[i] - '0'; 446 | _type = l_double; 447 | } 448 | } 449 | } 450 | if (i == length) { // 只有整数部分 451 | return digit_return(_type, n, d, i); 452 | } 453 | if ((_postfix = digit_type(_type, i)) != l_error) { // 判断有无后缀 454 | move(i - index); 455 | if (_type == l_int) 456 | return digit_from_integer(_postfix, n) ? _postfix : _type; 457 | else 458 | return digit_from_double(_postfix, d) ? _postfix : _type; 459 | } 460 | if (str[i] == '.') { // 解析小数部分 461 | sint l = ++i; 462 | for (; i < length && (isdigit(str[i])); i++) { 463 | d *= 10.0; 464 | d += str[i] - '0'; 465 | } 466 | l = i - l; 467 | if (l > 0) { 468 | d = (double) n + calc_exp(d, -l); 469 | _type = l_double; 470 | } 471 | } 472 | if (i == length) { // 只有整数部分和小数部分 473 | return digit_return(_type, n, d, i); 474 | } 475 | if ((_postfix = digit_type(_type, i)) != l_error) { // 判断有无后缀 476 | move(i - index); 477 | if (_type == l_int) 478 | return digit_from_integer(_postfix, n) ? _postfix : _type; 479 | else 480 | return digit_from_double(_postfix, d) ? _postfix : _type; 481 | } 482 | if (str[i] == 'e' || str[i] == 'E') { // 科学计数法强制转成double 483 | auto neg = false; 484 | auto e = 0; 485 | if (_type != l_double) { 486 | _type = l_double; 487 | d = (double) n; 488 | } 489 | if (++i == length) { 490 | return digit_return(_type, n, d, i); 491 | } 492 | if (!isdigit(str[i])) { 493 | if (str[i] == '-') { // 1e-1 494 | if (++i == length) 495 | return digit_return(_type, n, d, i); 496 | neg = true; 497 | } else if (str[i] == '+') { // 1e+1 498 | if (++i == length) 499 | return digit_return(_type, n, d, i); 500 | } else { 501 | return digit_return(_type, n, d, i); 502 | } 503 | } 504 | for (; i < length && (isdigit(str[i])); i++) { // 解析指数部分 505 | e *= 10; 506 | e += str[i] - '0'; 507 | } 508 | d = calc_exp(d, neg ? -e : e); 509 | } 510 | if ((_postfix = digit_type(_type, i)) != l_error) { // 判断有无后缀 511 | move(i - index); 512 | if (_type == l_int) 513 | return digit_from_integer(_postfix, n) ? _postfix : _type; 514 | else 515 | return digit_from_double(_postfix, d) ? _postfix : _type; 516 | } 517 | return digit_return(_type, n, d, i); 518 | } 519 | 520 | lexer_t clexer::next_alpha() { 521 | sint i; 522 | for (i = index + 1; i < length && (isalnum(str[i]) || bitIdOp.test(str[i])); i++); 523 | auto s = str.substr(index, i - index); 524 | /*auto kw = mapKeyword.find(s); 525 | if (kw != mapKeyword.end()) { // 哈希查找关键字 526 | bags._keyword = kw->second; 527 | move(s.length()); 528 | return l_keyword; 529 | }*/ 530 | // 普通变量名 531 | bags._identifier = s; 532 | move(s.length()); 533 | return l_identifier; 534 | } 535 | 536 | lexer_t clexer::next_space() { 537 | uint i, j; 538 | switch (str[index]) { 539 | case ' ': 540 | case '\t': 541 | // 查找连续的空格或Tab 542 | for (i = index + 1; i < length && (str[i] == ' ' || str[i] == '\t'); i++); 543 | bags._space = i - index; 544 | move(bags._space); 545 | return l_space; 546 | case '\r': 547 | case '\n': 548 | // 查找连续的'\n'或'\r\n' 549 | for (i = index, j = 0; i < length && 550 | (str[i] == '\r' || (str[i] == '\n' ? ++j > 0 : false)); i++); 551 | bags._newline = j; 552 | move(i - index, bags._newline); 553 | return l_newline; 554 | } 555 | assert(!"space not match"); // cannot reach 556 | move(1); 557 | return l_error; 558 | } 559 | 560 | // 单字符转义 561 | static int escape(char c) { 562 | if (c >= '0' && c <= '9') { 563 | return c - '0'; 564 | } else { 565 | switch (c) { // like \r, \n, ... 566 | case 'b': 567 | return '\b'; 568 | case 'f': 569 | return '\f'; 570 | case 'n': 571 | return '\n'; 572 | case 'r': 573 | return '\r'; 574 | case 't': 575 | return '\t'; 576 | case 'v': 577 | return '\v'; 578 | case '\'': 579 | return '\''; 580 | case '\"': 581 | return '\"'; 582 | case '\\': 583 | return '\\'; 584 | default: 585 | return -1; 586 | } 587 | } 588 | } 589 | 590 | lexer_t clexer::next_char() { 591 | // 提前判定 '\'' 及 '\\' 这两种特殊情况(向前看) 592 | if (local(1) == '\\' && local(3) == '\'') { 593 | auto c = local(2); 594 | auto esc = escape((char) c); // '\?' 595 | if (esc != -1) { 596 | bags._char = (char) esc; 597 | move(4); 598 | return l_char; 599 | } 600 | return record_error(e_invalid_char, 4); 601 | } 602 | uint i; 603 | // 寻找 '\'' 的右边界(限定) 604 | for (i = 1; index + i < length && str[index + i] != '\'' && i <= 4; i++); 605 | if (i == 1) { // '' 606 | return record_error(e_invalid_char, i + 1); 607 | } 608 | auto j = index + i; 609 | i++; 610 | if (j < length && str[j] == '\'') { 611 | if (str[index + 1] == '\\') { 612 | if (i == 3) { // '\' 613 | return record_error(e_invalid_char, i); 614 | } 615 | // i 不可能为 4 616 | if (i == 5) { // '\x?' 617 | if (str[index + 1] == '\\' && str[index + 2] == 'x') { 618 | auto esc = hex2dec(str[index + 3]); 619 | if (esc != -1) { 620 | bags._char = (char) esc; 621 | move(i); 622 | return l_char; 623 | } 624 | } 625 | return record_error(e_invalid_char, i); 626 | } 627 | // '\x??' 628 | if (str[index + 1] == '\\' && str[index + 2] == 'x') { 629 | auto esc = hex2dec(str[index + 3]); // '\x?_' 630 | if (esc != -1) { 631 | bags._char = (char) esc; 632 | esc = hex2dec(str[index + 4]); // '\x_?' 633 | if (esc != -1) { 634 | bags._char *= 0x10; 635 | bags._char += (char) esc; 636 | move(i); 637 | return l_char; 638 | } 639 | } 640 | } 641 | return record_error(e_invalid_char, i); 642 | } else if (i == 3) { // '?' 643 | bags._char = str[index + 1]; 644 | move((uint) i); 645 | return l_char; 646 | } 647 | } 648 | return record_error(e_invalid_char, 1); 649 | } 650 | 651 | lexer_t clexer::next_string() { 652 | auto i = index; 653 | auto prev = str[i]; 654 | // 寻找非'\"'的第一个'"' 655 | for (i++; i < length && (prev == '\\' || (str[i]) != '"'); prev = str[i++]); 656 | auto j = i; 657 | if (j == length) { // " EOF 658 | return record_error(e_invalid_string, i - index); 659 | } 660 | std::stringstream ss; 661 | auto status = 1; // 状态机 662 | char c = 0; 663 | for (i = index + 1; i < j;) { 664 | switch (status) { 665 | case 1: { // 处理字符 666 | if (str[i] == '\\') { 667 | status = 2; 668 | } else { // '?' 669 | ss << str[i]; 670 | } 671 | i++; 672 | } 673 | break; 674 | case 2: { // 处理转义 675 | if (str[i] == 'x') { 676 | status = 3; 677 | i++; 678 | } else { 679 | auto esc = escape(str[i]); 680 | if (esc != -1) { 681 | ss << (char) esc; 682 | i++; 683 | status = 1; 684 | } else { 685 | status = 0; // 失败 686 | } 687 | } 688 | } 689 | break; 690 | case 3: { // 处理 '\x??' 前一位十六进制数字 691 | auto esc = hex2dec(str[i]); 692 | if (esc != -1) { 693 | c = (char) esc; 694 | status = 4; 695 | i++; 696 | } else { 697 | status = 0; // 失败 698 | } 699 | } 700 | break; 701 | case 4: { // 处理 '\x??' 后一位十六进制数字 702 | auto esc = hex2dec(str[i]); 703 | if (esc != -1) { 704 | c *= 10; 705 | c += (char) esc; 706 | ss << c; 707 | status = 1; 708 | i++; 709 | } else { 710 | ss << c; 711 | status = 1; 712 | } 713 | } 714 | break; 715 | default: // 失败 716 | bags._string = str.substr(index + 1, j - index - 1); 717 | move(j - index + 1); 718 | return l_string; 719 | } 720 | } 721 | if (status == 1) { // 为初态/终态 722 | bags._string = ss.str(); 723 | move(j - index + 1); 724 | return l_string; 725 | } 726 | bags._string = str.substr(index + 1, j - index - 1); 727 | move(j - index + 1); 728 | return l_string; 729 | } 730 | 731 | lexer_t clexer::next_comment() { 732 | sint i = index; 733 | if (str[++i] == '/') { // '//' 734 | // 寻找第一个换行符 735 | for (++i; i < length && (str[i] != '\n' && str[i] != '\r'); i++); 736 | bags._comment = str.substr(index + 2, i - index - 2); 737 | move(i - index); 738 | return l_comment; 739 | } else { // '/* */' 740 | // 寻找第一个 '*/' 741 | char prev = 0; 742 | auto newline = 0; 743 | for (++i; i < length && (prev != '*' || (str[i]) != '/'); 744 | prev = str[i++], prev == '\n' ? ++newline : 0); 745 | i++; 746 | bags._comment = str.substr(index + 2, i - index - 1); 747 | move(i - index, newline); // 检查换行 748 | return l_comment; 749 | } 750 | } 751 | 752 | lexer_t clexer::next_operator() { 753 | auto c = local(); 754 | if (bitOp[0].test((uint) c)) { // 操作符第一个char判断非法 755 | auto c2 = local(1); 756 | if (c2 != -1 && bitOp[1].test((uint) c2)) { // 操作符第二个char判断非法,否则解析单字符操作符 757 | auto c3 = local(2); 758 | if (c3 != -1 && (c3 == '=' || c3 == '.')) { // 操作符第三个char判断非法,否则解析双字符操作符 759 | // 三字符操作符 760 | auto p = op__start; 761 | if (c3 == '=') { // 手动判断 762 | if (c == c2) { 763 | if (c == '<') { 764 | p = op_left_shift_assign; 765 | } else if (c == '>') { 766 | p = op_left_shift_assign; 767 | } 768 | } 769 | } else { 770 | if (c == '.' && c2 == '.') { 771 | p = op_ellipsis; 772 | } 773 | } 774 | if (p == op__start) { 775 | auto p2 = sinOp[c]; 776 | if (p2 != 0) { 777 | bags._operator = p2; 778 | move(1); 779 | return l_operator; 780 | } 781 | return record_error(e_invalid_operator, 3); 782 | } else { 783 | bags._operator = (operator_t) p; 784 | move(3); 785 | return l_operator; 786 | } 787 | } else { 788 | // 双字符操作符 789 | if (c2 == '=') { 790 | auto p = sinOp[c]; 791 | if (p == 0 || p > op_logical_not) { 792 | // 单字符操作符 793 | auto p = sinOp[c]; 794 | bags._operator = (operator_t) p; 795 | move(1); 796 | return l_operator; 797 | } 798 | bags._operator = (operator_t) (p + 1); // 从 '?' 到 '?=' 799 | move(2); 800 | return l_operator; 801 | } 802 | auto p = op__start; 803 | if (c == c2) { // 相同位的双字符操作符 804 | switch (c2) { 805 | case '+': 806 | p = op_plus_plus; 807 | break; 808 | case '-': 809 | p = op_minus_minus; 810 | break; 811 | case '&': 812 | p = op_logical_and; 813 | break; 814 | case '|': 815 | p = op_logical_or; 816 | break; 817 | case '<': 818 | p = op_left_shift; 819 | break; 820 | case '>': 821 | p = op_right_shift; 822 | break; 823 | default: 824 | break; 825 | } 826 | } else if (c == '-' && c2 == '>') { // '->' 827 | p = op_pointer; 828 | } 829 | if (p == op__start) { // 双字符非法,则回退到单字符 830 | auto p = sinOp[c]; 831 | if (p == 0) { 832 | return record_error(e_invalid_operator, 1); 833 | } 834 | bags._operator = (operator_t) p; 835 | move(1); 836 | return l_operator; 837 | } else { 838 | bags._operator = (operator_t) p; 839 | move(2); 840 | return l_operator; 841 | } 842 | } 843 | } else { 844 | // 单字符操作符 845 | auto p = sinOp[c]; 846 | if (p == 0) { 847 | return record_error(e_invalid_operator, 1); 848 | } 849 | bags._operator = (operator_t) p; 850 | move(1); 851 | return l_operator; 852 | } 853 | } else { 854 | return record_error(e_invalid_operator, 1); 855 | } 856 | } 857 | 858 | int clexer::local() { 859 | if (index < length) 860 | return str[index]; 861 | return -1; 862 | } 863 | 864 | int clexer::local(int offset) { 865 | if (index + offset < length) 866 | return str[index + offset]; 867 | return -1; 868 | } 869 | 870 | void clexer::initMap() { 871 | // Keyword 872 | for (auto i = k__start + 1; i < k__end; i++) { 873 | mapKeyword[KEYWORD_STRING((keyword_t) i)] = (keyword_t) i; 874 | } 875 | auto len = 0; 876 | for (auto i = op__start + 1; i < op__end; i++) { 877 | const auto &op = OP_STRING((operator_t) i); 878 | len = op.length(); 879 | if (len == 1) { 880 | sinOp[op[0]] = (operator_t) i; // 操作符第一位char映射 881 | } 882 | len = std::min(len, 2); 883 | for (auto j = 0; j < len; j++) { 884 | bitOp[j].set((uint) op[j]); // 操作符第一/二位char二进制查找 885 | } 886 | } 887 | string_t enable_char = "_-?"; 888 | for (auto &c : enable_char) { 889 | bitIdOp.set((uint) c); 890 | } 891 | } 892 | 893 | void clexer::reset() { 894 | index = 0; 895 | last_index = 0; 896 | 897 | type = l_none; 898 | line = 1; 899 | column = 1; 900 | last_line = 1; 901 | last_column = 1; 902 | 903 | records.clear(); 904 | } 905 | } 906 | -------------------------------------------------------------------------------- /clexer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: CMiniLang 3 | // Author: bajdcc 4 | // 5 | 6 | #ifndef CMINILANG_LEXER_H 7 | #define CMINILANG_LEXER_H 8 | 9 | #include 10 | #include 11 | #include 12 | #include "types.h" 13 | 14 | namespace clib { 15 | 16 | // 词法分析 17 | class clexer { 18 | public: 19 | explicit clexer(string_t str); 20 | ~clexer(); 21 | 22 | clexer(const clexer &) = delete; 23 | clexer &operator=(const clexer &) = delete; 24 | 25 | // 外部接口 26 | #define DEFINE_LEXER_GETTER(t) LEX_T(t) get_##t() const; 27 | DEFINE_LEXER_GETTER(char) 28 | DEFINE_LEXER_GETTER(uchar) 29 | DEFINE_LEXER_GETTER(short) 30 | DEFINE_LEXER_GETTER(ushort) 31 | DEFINE_LEXER_GETTER(int) 32 | DEFINE_LEXER_GETTER(uint) 33 | DEFINE_LEXER_GETTER(long) 34 | DEFINE_LEXER_GETTER(ulong) 35 | DEFINE_LEXER_GETTER(float) 36 | DEFINE_LEXER_GETTER(double) 37 | DEFINE_LEXER_GETTER(operator) 38 | DEFINE_LEXER_GETTER(keyword) 39 | DEFINE_LEXER_GETTER(identifier) 40 | DEFINE_LEXER_GETTER(string) 41 | DEFINE_LEXER_GETTER(comment) 42 | DEFINE_LEXER_GETTER(space) 43 | DEFINE_LEXER_GETTER(newline) 44 | DEFINE_LEXER_GETTER(error) 45 | #undef DEFINE_LEXER_GETTER 46 | #define DEFINE_LEXER_GETTER(t) LEX_T(t) get_store_##t(int) const; 47 | DEFINE_LEXER_GETTER(char) 48 | DEFINE_LEXER_GETTER(uchar) 49 | DEFINE_LEXER_GETTER(short) 50 | DEFINE_LEXER_GETTER(ushort) 51 | DEFINE_LEXER_GETTER(int) 52 | DEFINE_LEXER_GETTER(uint) 53 | DEFINE_LEXER_GETTER(long) 54 | DEFINE_LEXER_GETTER(ulong) 55 | DEFINE_LEXER_GETTER(float) 56 | DEFINE_LEXER_GETTER(double) 57 | DEFINE_LEXER_GETTER(identifier) 58 | DEFINE_LEXER_GETTER(string) 59 | #undef DEFINE_LEXER_GETTER 60 | 61 | public: 62 | struct err_record_t { 63 | int line, column; 64 | uint start_idx, end_idx; 65 | error_t err; 66 | string_t str; 67 | }; 68 | 69 | private: 70 | std::vector records; 71 | 72 | lexer_t record_error(error_t error, uint skip); 73 | 74 | public: 75 | lexer_t next(); 76 | 77 | lexer_t get_type() const; 78 | int get_line() const; 79 | int get_column() const; 80 | int get_last_line() const; 81 | int get_last_column() const; 82 | string_t current() const; 83 | 84 | const err_record_t& recent_error() const; 85 | 86 | lexer_t digit_type(lexer_t t, uint &i); 87 | bool digit_from_integer(lexer_t t, LEX_T(ulong) n); 88 | bool digit_from_double(lexer_t t, LEX_T(double) n); 89 | lexer_t digit_return(lexer_t t, LEX_T(ulong) n, LEX_T(double) d, uint i); 90 | 91 | private: 92 | void move(uint idx, int inc = -1); 93 | 94 | // 内部解析 95 | lexer_t next_digit(); 96 | lexer_t next_alpha(); 97 | lexer_t next_space(); 98 | lexer_t next_char(); 99 | lexer_t next_string(); 100 | lexer_t next_comment(); 101 | lexer_t next_operator(); 102 | 103 | int local(); 104 | int local(int offset); 105 | 106 | public: 107 | bool is_type(lexer_t) const; 108 | bool is_keyword(keyword_t) const; 109 | bool is_operator(operator_t) const; 110 | bool is_operator(operator_t, operator_t) const; 111 | bool is_number() const; 112 | bool is_integer() const; 113 | 114 | LEX_T(int) get_integer() const; 115 | 116 | void reset(); 117 | 118 | private: 119 | string_t str; 120 | uint index{0}; 121 | uint last_index{0}; 122 | uint length{0}; 123 | 124 | lexer_t type{l_none}; 125 | uint line{1}; 126 | uint column{1}; 127 | uint last_line{1}; 128 | uint last_column{1}; 129 | 130 | struct { 131 | #define DEFINE_LEXER_GETTER(t) LEX_T(t) _##t; 132 | DEFINE_LEXER_GETTER(char) 133 | DEFINE_LEXER_GETTER(uchar) 134 | DEFINE_LEXER_GETTER(short) 135 | DEFINE_LEXER_GETTER(ushort) 136 | DEFINE_LEXER_GETTER(int) 137 | DEFINE_LEXER_GETTER(uint) 138 | DEFINE_LEXER_GETTER(long) 139 | DEFINE_LEXER_GETTER(ulong) 140 | DEFINE_LEXER_GETTER(float) 141 | DEFINE_LEXER_GETTER(double) 142 | DEFINE_LEXER_GETTER(operator) 143 | DEFINE_LEXER_GETTER(keyword) 144 | DEFINE_LEXER_GETTER(identifier) 145 | DEFINE_LEXER_GETTER(string) 146 | DEFINE_LEXER_GETTER(comment) 147 | DEFINE_LEXER_GETTER(space) 148 | DEFINE_LEXER_GETTER(newline) 149 | DEFINE_LEXER_GETTER(error) 150 | #undef DEFINE_LEXER_GETTER 151 | } bags; 152 | 153 | struct { 154 | #define DEFINE_LEXER_STORAGE(t) std::vector _##t; 155 | DEFINE_LEXER_STORAGE(char) 156 | DEFINE_LEXER_STORAGE(uchar) 157 | DEFINE_LEXER_STORAGE(short) 158 | DEFINE_LEXER_STORAGE(ushort) 159 | DEFINE_LEXER_STORAGE(int) 160 | DEFINE_LEXER_STORAGE(uint) 161 | DEFINE_LEXER_STORAGE(long) 162 | DEFINE_LEXER_STORAGE(ulong) 163 | DEFINE_LEXER_STORAGE(float) 164 | DEFINE_LEXER_STORAGE(double) 165 | DEFINE_LEXER_STORAGE(operator) 166 | DEFINE_LEXER_STORAGE(keyword) 167 | DEFINE_LEXER_STORAGE(identifier) 168 | DEFINE_LEXER_STORAGE(string) 169 | DEFINE_LEXER_STORAGE(comment) 170 | DEFINE_LEXER_STORAGE(space) 171 | DEFINE_LEXER_STORAGE(newline) 172 | DEFINE_LEXER_STORAGE(error) 173 | #undef DEFINE_LEXER_STORAGE 174 | } storage; 175 | 176 | // 字典 177 | map_t mapKeyword; 178 | std::bitset<128> bitOp[2]; 179 | std::array sinOp; 180 | std::bitset<128> bitIdOp; 181 | 182 | void initMap(); 183 | }; 184 | } 185 | 186 | #endif //CMINILANG_LEXER_H -------------------------------------------------------------------------------- /cparser.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Project: CMiniLang 3 | // Author: bajdcc 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include "cexception.h" 10 | #include "cparser.h" 11 | #include "clexer.h" 12 | #include "cast.h" 13 | #include "cunit.h" 14 | 15 | #define TRACE_PARSING 0 16 | #define DUMP_PDA 0 17 | #define DEBUG_AST 0 18 | #define CHECK_AST 0 19 | 20 | namespace clib { 21 | 22 | ast_node *cparser::parse(const string_t &str) { 23 | lexer = std::make_unique(str.empty() ? str : (str[0] == '`' ? str : ("(" + str + ")"))); 24 | // 清空词法分析结果 25 | lexer->reset(); 26 | // 清空AST 27 | ast.reset(); 28 | // 产生式 29 | if (unit.get_pda().empty()) 30 | gen(); 31 | // 语法分析(LR) 32 | program(); 33 | //cast::print2(ast.get_root(), 0, std::cout); 34 | simplify(ast.get_root()); 35 | return ast.get_root(); 36 | } 37 | 38 | ast_node *cparser::root() const { 39 | return ast.get_root(); 40 | } 41 | 42 | void cparser::reset() { 43 | ast_cache_index = 0; 44 | state_stack.clear(); 45 | ast_stack.clear(); 46 | ast_cache.clear(); 47 | ast_coll_cache.clear(); 48 | ast_reduce_cache.clear(); 49 | state_stack.push_back(0); 50 | } 51 | 52 | void cparser::next() { 53 | lexer_t token; 54 | do { 55 | token = lexer->next(); 56 | if (token == l_error) { 57 | auto err = lexer->recent_error(); 58 | printf("[%04d:%03d] %-12s - %s\n", 59 | err.line, 60 | err.column, 61 | ERROR_STRING(err.err).c_str(), 62 | err.str.c_str()); 63 | } 64 | } while (token == l_newline || token == l_space || token == l_error || token == l_comment); 65 | #if 0 66 | if (token != l_end) { 67 | qDebug("[%04d:%03d] %-12s - %s\n", 68 | lexer->get_last_line(), 69 | lexer->get_last_column(), 70 | LEX_STRING(lexer->get_type()).c_str(), 71 | lexer->current().c_str()); 72 | } 73 | #endif 74 | } 75 | 76 | ast_node *cparser::simplify(ast_node *node) { 77 | if (node == nullptr) 78 | return nullptr; 79 | auto type = (ast_t) node->flag; 80 | switch (type) { 81 | case ast_root: // 根结点,全局声明 82 | return node->child = simplify(node->child); 83 | case ast_collection: { 84 | switch (node->data._coll) { 85 | case c_program: 86 | if (node->child->data._coll == c_list && node->child->child == node->child->child->next) 87 | return simplify(node->child->child); 88 | return simplify(node->child); 89 | case c_list: { 90 | auto i = node->child; 91 | if (i) { 92 | std::vector children; 93 | children.push_back(i); 94 | i = i->next; 95 | while (i != node->child) { 96 | children.push_back(i); 97 | i = i->next; 98 | } 99 | node->child = nullptr; 100 | for (auto &child: children) { 101 | cast::set_child(node, simplify(child->child)); 102 | } 103 | } 104 | node->flag = ast_sexpr; 105 | return node; 106 | } 107 | case c_sexpr: 108 | node->child = simplify(node->child); 109 | if (node->child && node->child == node->child->next) { 110 | node->child->flag = ast_sexpr; 111 | return node->child; 112 | } 113 | node->flag = ast_sexpr; 114 | return node; 115 | case c_qexpr: { 116 | auto q = node->child->child; 117 | if (q->data._coll == c_sexpr) { 118 | auto t = simplify(q); 119 | t->flag = ast_qexpr; 120 | return t; 121 | } 122 | q = simplify(node->child); 123 | node->child = q; 124 | node->flag = ast_qexpr; 125 | return node; 126 | } 127 | case c_object: 128 | return simplify(node->child); 129 | default: 130 | break; 131 | } 132 | } 133 | break; 134 | case ast_string: 135 | case ast_literal: 136 | case ast_char: 137 | case ast_uchar: 138 | case ast_short: 139 | case ast_ushort: 140 | case ast_int: 141 | case ast_uint: 142 | case ast_long: 143 | case ast_ulong: 144 | case ast_float: 145 | case ast_double: 146 | return node; 147 | default: 148 | break; 149 | } 150 | error("invalid val type"); 151 | return nullptr; 152 | } 153 | 154 | void cparser::gen() { 155 | #define DEF_OP(name) auto &_##name##_ = unit.token(op_##name) 156 | DEF_OP(lparan); 157 | DEF_OP(rparan); 158 | DEF_OP(quote); 159 | #undef DEF_OP 160 | #define DEF_LEX(name, real) auto &real = unit.token(l_##name) 161 | DEF_LEX(char, Char); 162 | DEF_LEX(uchar, UnsignedChar); 163 | DEF_LEX(short, Short); 164 | DEF_LEX(ushort, UnsignedShort); 165 | DEF_LEX(int, Integer); 166 | DEF_LEX(uint, UnsignedInteger); 167 | DEF_LEX(long, Long); 168 | DEF_LEX(ulong, UnsignedLong); 169 | DEF_LEX(float, Float); 170 | DEF_LEX(double, Double); 171 | DEF_LEX(identifier, Identifier); 172 | DEF_LEX(string, String); 173 | DEF_LEX(comment, Comment); 174 | DEF_LEX(space, Space); 175 | DEF_LEX(newline, Newline); 176 | #undef DEF_LEX 177 | #define DEF_RULE(name) auto &name = unit.rule(#name, c_##name) 178 | DEF_RULE(program); 179 | DEF_RULE(list); 180 | DEF_RULE(sexpr); 181 | DEF_RULE(qexpr); 182 | DEF_RULE(object); 183 | #undef DEF_RULE 184 | program = list | sexpr; 185 | list = *list + object; 186 | sexpr = ~_lparan_ + *list + ~_rparan_; 187 | qexpr = ~_quote_ + object; 188 | object = Char | UnsignedChar | Short | UnsignedShort | Integer | UnsignedInteger | 189 | Long | UnsignedLong | Float | Double | String | Identifier | sexpr | qexpr; 190 | unit.gen(&program); 191 | #if DUMP_PDA 192 | unit.dump(std::cout); 193 | #endif 194 | } 195 | 196 | void check_ast(ast_node *node) { 197 | #if CHECK_AST 198 | if (node->child) { 199 | auto &c = node->child; 200 | auto i = c; 201 | assert(i->parent == node); 202 | check_ast(i); 203 | if (i->next != i) { 204 | assert(i->prev->next == i); 205 | assert(i->next->prev == i); 206 | i = i->next; 207 | do { 208 | assert(i->parent == node); 209 | assert(i->prev->next == i); 210 | assert(i->next->prev == i); 211 | check_ast(i); 212 | i = i->next; 213 | } while (i != c); 214 | } else { 215 | assert(i->prev == i); 216 | } 217 | } 218 | #endif 219 | } 220 | 221 | void cparser::program() { 222 | reset(); 223 | next(); 224 | auto &pdas = unit.get_pda(); 225 | auto root = ast.new_node(ast_collection); 226 | root->data._coll = pdas[0].coll; 227 | cast::set_child(ast.get_root(), root); 228 | ast_stack.push_back(root); 229 | std::vector jumps; 230 | std::vector trans_ids; 231 | backtrace_t bk_tmp; 232 | bk_tmp.lexer_index = 0; 233 | bk_tmp.state_stack = state_stack; 234 | bk_tmp.ast_stack = ast_stack; 235 | bk_tmp.current_state = 0; 236 | bk_tmp.coll_index = 0; 237 | bk_tmp.reduce_index = 0; 238 | bk_tmp.direction = b_next; 239 | std::vector bks; 240 | bks.push_back(bk_tmp); 241 | auto trans_id = -1; 242 | while (!bks.empty()) { 243 | auto bk = &bks.back(); 244 | if (bk->direction == b_success || bk->direction == b_fail) { 245 | break; 246 | } 247 | if (bk->direction == b_fallback) { 248 | if (bk->trans_ids.empty()) { 249 | if (bks.size() > 1) { 250 | bks.pop_back(); 251 | bks.back().direction = b_error; 252 | bk = &bks.back(); 253 | } else { 254 | bk->direction = b_fail; 255 | continue; 256 | } 257 | } 258 | } 259 | ast_cache_index = bk->lexer_index; 260 | state_stack = bk->state_stack; 261 | ast_stack = bk->ast_stack; 262 | auto state = bk->current_state; 263 | if (bk->direction != b_error) 264 | for (;;) { 265 | auto ¤t_state = pdas[state]; 266 | if (lexer->is_type(l_end)) { 267 | if (current_state.final) { 268 | if (state_stack.empty()) { 269 | bk->direction = b_success; 270 | break; 271 | } 272 | } 273 | } 274 | auto &trans = current_state.trans; 275 | if (trans_id == -1 && !bk->trans_ids.empty()) { 276 | trans_id = bk->trans_ids.back() & ((1 << 16) - 1); 277 | bk->trans_ids.pop_back(); 278 | } else { 279 | trans_ids.clear(); 280 | for (auto i = 0; i < trans.size(); ++i) { 281 | auto &cs = trans[i]; 282 | if (valid_trans(cs)) 283 | trans_ids.push_back(i | pda_edge_priority(cs.type) << 16); 284 | } 285 | if (!trans_ids.empty()) { 286 | std::sort(trans_ids.begin(), trans_ids.end(), std::greater<>()); 287 | if (trans_ids.size() > 1) { 288 | bk_tmp.lexer_index = ast_cache_index; 289 | bk_tmp.state_stack = state_stack; 290 | bk_tmp.ast_stack = ast_stack; 291 | bk_tmp.current_state = state; 292 | bk_tmp.trans_ids = trans_ids; 293 | bk_tmp.coll_index = ast_coll_cache.size(); 294 | bk_tmp.reduce_index = ast_reduce_cache.size(); 295 | bk_tmp.direction = b_next; 296 | #if DEBUG_AST 297 | for (auto i = 0; i < bks.size(); ++i) { 298 | auto &_bk = bks[i]; 299 | printf("[DEBUG] Branch old: i=%d, LI=%d, SS=%d, AS=%d, S=%d, TS=%d, CI=%d, RI=%d, TK=%d\n", 300 | i, _bk.lexer_index, _bk.state_stack.size(), 301 | _bk.ast_stack.size(), _bk.current_state, _bk.trans_ids.size(), 302 | _bk.coll_index, _bk.reduce_index, _bk.ast_ids.size()); 303 | } 304 | #endif 305 | bks.push_back(bk_tmp); 306 | bk = &bks.back(); 307 | #if DEBUG_AST 308 | printf("[DEBUG] Branch new: BS=%d, LI=%d, SS=%d, AS=%d, S=%d, TS=%d, CI=%d, RI=%d, TK=%d\n", 309 | bks.size(), bk_tmp.lexer_index, bk_tmp.state_stack.size(), 310 | bk_tmp.ast_stack.size(), bk_tmp.current_state, bk_tmp.trans_ids.size(), 311 | bk_tmp.coll_index, bk_tmp.reduce_index, bk_tmp.ast_ids.size()); 312 | #endif 313 | bk->direction = b_next; 314 | break; 315 | } else { 316 | trans_id = trans_ids.back() & ((1 << 16) - 1); 317 | trans_ids.pop_back(); 318 | } 319 | } else { 320 | #if TRACE_PARSING 321 | std::cout << "parsing error: " << current_state.label << std::endl; 322 | #endif 323 | bk->direction = b_error; 324 | break; 325 | } 326 | } 327 | auto &t = trans[trans_id]; 328 | if (t.type == e_finish) { 329 | if (!lexer->is_type(l_end)) { 330 | #if TRACE_PARSING 331 | std::cout << "parsing redundant code: " << current_state.label << std::endl; 332 | #endif 333 | bk->direction = b_fail; 334 | break; 335 | } 336 | } 337 | auto jump = trans[trans_id].jump; 338 | #if TRACE_PARSING 339 | printf("State: %3d => To: %3d -- Action: %-10s -- Rule: %s\n", 340 | state, jump, pda_edge_str(t.type).c_str(), current_state.label.c_str()); 341 | #endif 342 | do_trans(state, *bk, trans[trans_id]); 343 | state = jump; 344 | } 345 | if (bk->direction == b_error) { 346 | #if DEBUG_AST 347 | for (auto i = 0; i < bks.size(); ++i) { 348 | auto &_bk = bks[i]; 349 | printf("[DEBUG] Backtrace failed: i=%d, LI=%d, SS=%d, AS=%d, S=%d, TS=%d, CI=%d, RI=%d, TK=%d\n", 350 | i, _bk.lexer_index, _bk.state_stack.size(), 351 | _bk.ast_stack.size(), _bk.current_state, _bk.trans_ids.size(), 352 | _bk.coll_index, _bk.reduce_index, _bk.ast_ids.size()); 353 | } 354 | #endif 355 | for (auto &i : bk->ast_ids) { 356 | auto &token = ast_cache[i]; 357 | check_ast(token); 358 | #if DEBUG_AST 359 | printf("[DEBUG] Backtrace failed, unlink token: %p, PB=%p\n", token, token->parent); 360 | #endif 361 | cast::unlink(token); 362 | check_ast(token); 363 | } 364 | auto size = ast_reduce_cache.size(); 365 | for (auto i = size; i > bk->reduce_index; --i) { 366 | auto &coll = ast_reduce_cache[i - 1]; 367 | check_ast(coll); 368 | #if DEBUG_AST 369 | printf("[DEBUG] Backtrace failed, unlink: %p, PB=%p, NE=%d, CB=%d\n", 370 | coll, coll->parent, cast::children_size(coll->parent), cast::children_size(coll)); 371 | #endif 372 | cast::unlink(coll); 373 | check_ast(coll); 374 | } 375 | ast_reduce_cache.erase(ast_reduce_cache.begin() + bk->reduce_index, ast_reduce_cache.end()); 376 | size = ast_coll_cache.size(); 377 | for (auto i = size; i > bk->coll_index; --i) { 378 | auto &coll = ast_coll_cache[i - 1]; 379 | assert(coll->flag == ast_collection); 380 | check_ast(coll); 381 | #if DEBUG_AST 382 | printf("[DEBUG] Backtrace failed, delete coll: %p, PB=%p, CB=%p, NE=%d, CS=%d\n", 383 | coll, coll->parent, coll->child, 384 | cast::children_size(coll->parent), cast::children_size(coll)); 385 | #endif 386 | cast::unlink(coll); 387 | check_ast(coll); 388 | ast.remove(coll); 389 | } 390 | ast_coll_cache.erase(ast_coll_cache.begin() + bk->coll_index, ast_coll_cache.end()); 391 | bk->direction = b_fallback; 392 | } 393 | trans_id = -1; 394 | } 395 | } 396 | 397 | ast_node *cparser::terminal() { 398 | if (lexer->is_type(l_end)) { // 结尾 399 | error("unexpected token EOF of expression"); 400 | } 401 | if (ast_cache_index < ast_cache.size()) { 402 | return ast_cache[ast_cache_index++]; 403 | } 404 | if (lexer->is_type(l_operator)) { 405 | auto node = ast.new_node(ast_operator); 406 | node->data._op = lexer->get_operator(); 407 | match_operator(node->data._op); 408 | ast_cache.push_back(node); 409 | ast_cache_index++; 410 | return node; 411 | } 412 | if (lexer->is_type(l_keyword)) { 413 | auto node = ast.new_node(ast_keyword); 414 | node->data._keyword = lexer->get_keyword(); 415 | match_keyword(node->data._keyword); 416 | ast_cache.push_back(node); 417 | ast_cache_index++; 418 | return node; 419 | } 420 | if (lexer->is_type(l_identifier)) { 421 | auto node = ast.new_node(ast_literal); 422 | ast.set_str(node, lexer->get_identifier()); 423 | match_type(l_identifier); 424 | ast_cache.push_back(node); 425 | ast_cache_index++; 426 | return node; 427 | } 428 | if (lexer->is_number()) { 429 | ast_node *node = nullptr; 430 | auto type = lexer->get_type(); 431 | switch (type) { 432 | #define DEFINE_NODE_INT(t) \ 433 | case l_##t: \ 434 | node = ast.new_node(ast_##t); \ 435 | node->data._##t = lexer->get_##t(); \ 436 | break; 437 | DEFINE_NODE_INT(char) 438 | DEFINE_NODE_INT(uchar) 439 | DEFINE_NODE_INT(short) 440 | DEFINE_NODE_INT(ushort) 441 | DEFINE_NODE_INT(int) 442 | DEFINE_NODE_INT(uint) 443 | DEFINE_NODE_INT(long) 444 | DEFINE_NODE_INT(ulong) 445 | DEFINE_NODE_INT(float) 446 | DEFINE_NODE_INT(double) 447 | #undef DEFINE_NODE_INT 448 | default: 449 | error("invalid number"); 450 | break; 451 | } 452 | match_number(); 453 | ast_cache.push_back(node); 454 | ast_cache_index++; 455 | return node; 456 | } 457 | if (lexer->is_type(l_string)) { 458 | std::stringstream ss; 459 | ss << lexer->get_string(); 460 | #if 0 461 | printf("[%04d:%03d] String> %04X '%s'\n", clexer->get_line(), clexer->get_column(), idx, clexer->get_string().c_str()); 462 | #endif 463 | match_type(l_string); 464 | 465 | while (lexer->is_type(l_string)) { 466 | ss << lexer->get_string(); 467 | #if 0 468 | printf("[%04d:%03d] String> %04X '%s'\n", clexer->get_line(), clexer->get_column(), idx, clexer->get_string().c_str()); 469 | #endif 470 | match_type(l_string); 471 | } 472 | auto node = ast.new_node(ast_string); 473 | ast.set_str(node, ss.str()); 474 | ast_cache.push_back(node); 475 | ast_cache_index++; 476 | return node; 477 | } 478 | error("invalid type"); 479 | return nullptr; 480 | } 481 | 482 | bool cparser::valid_trans(const pda_trans &trans) const { 483 | auto &la = trans.LA; 484 | if (!la.empty()) { 485 | auto success = false; 486 | for (auto &_la : la) { 487 | if (LA(_la)) { 488 | success = true; 489 | break; 490 | } 491 | } 492 | if (!success) 493 | return false; 494 | } 495 | switch (trans.type) { 496 | case e_shift: 497 | break; 498 | case e_pass: 499 | break; 500 | case e_move: 501 | break; 502 | case e_left_recursion: 503 | break; 504 | case e_reduce: { 505 | if (state_stack.empty()) 506 | return false; 507 | if (trans.status != state_stack.back()) 508 | return false; 509 | } 510 | break; 511 | case e_finish: 512 | break; 513 | default: 514 | break; 515 | } 516 | return true; 517 | } 518 | 519 | void cparser::do_trans(int state, backtrace_t &bk, const pda_trans &trans) { 520 | switch (trans.type) { 521 | case e_shift: { 522 | state_stack.push_back(state); 523 | auto new_node = ast.new_node(ast_collection); 524 | auto &pdas = unit.get_pda(); 525 | new_node->data._coll = pdas[trans.jump].coll; 526 | #if DEBUG_AST 527 | printf("[DEBUG] Shift: top=%p, new=%p, CS=%d\n", ast_stack.back(), new_node, 528 | cast::children_size(ast_stack.back())); 529 | #endif 530 | ast_coll_cache.push_back(new_node); 531 | ast_stack.push_back(new_node); 532 | } 533 | break; 534 | case e_pass: { 535 | bk.ast_ids.insert(ast_cache_index); 536 | terminal(); 537 | #if CHECK_AST 538 | check_ast(t); 539 | #endif 540 | #if DEBUG_AST 541 | printf("[DEBUG] Move: parent=%p, child=%p, CS=%d\n", ast_stack.back(), t, 542 | cast::children_size(ast_stack.back())); 543 | #endif 544 | } 545 | break; 546 | case e_move: { 547 | bk.ast_ids.insert(ast_cache_index); 548 | auto t = terminal(); 549 | #if CHECK_AST 550 | check_ast(t); 551 | #endif 552 | #if DEBUG_AST 553 | printf("[DEBUG] Move: parent=%p, child=%p, CS=%d\n", ast_stack.back(), t, 554 | cast::children_size(ast_stack.back())); 555 | #endif 556 | cast::set_child(ast_stack.back(), t); 557 | } 558 | break; 559 | case e_left_recursion: 560 | break; 561 | case e_reduce: { 562 | auto new_ast = ast_stack.back(); 563 | check_ast(new_ast); 564 | if (new_ast->flag != ast_collection) { 565 | bk.ast_ids.insert(ast_cache_index); 566 | } 567 | state_stack.pop_back(); 568 | ast_stack.pop_back(); 569 | ast_reduce_cache.push_back(new_ast); 570 | #if DEBUG_AST 571 | printf("[DEBUG] Reduce: parent=%p, child=%p, CS=%d, AS=%d, RI=%d\n", 572 | ast_stack.back(), new_ast, cast::children_size(ast_stack.back()), 573 | ast_stack.size(), ast_reduce_cache.size()); 574 | #endif 575 | cast::set_child(ast_stack.back(), new_ast); 576 | check_ast(ast_stack.back()); 577 | } 578 | break; 579 | case e_finish: 580 | state_stack.pop_back(); 581 | break; 582 | } 583 | } 584 | 585 | bool cparser::LA(struct unit *u) const { 586 | if (u->t != u_token) 587 | return false; 588 | auto token = to_token(u); 589 | if (ast_cache_index < ast_cache.size()) { 590 | auto &cache = ast_cache[ast_cache_index]; 591 | if (token->type == l_keyword) 592 | return cache->flag == ast_keyword && cache->data._keyword == token->value.keyword; 593 | if (token->type == l_operator) 594 | return cache->flag == ast_operator && cache->data._op == token->value.op; 595 | return cast::ast_equal((ast_t) cache->flag, token->type); 596 | } 597 | if (token->type == l_keyword) 598 | return lexer->is_keyword(token->value.keyword); 599 | if (token->type == l_operator) 600 | return lexer->is_operator(token->value.op); 601 | return lexer->is_type(token->type); 602 | } 603 | 604 | void cparser::expect(bool flag, const string_t &info) { 605 | if (!flag) { 606 | error(info); 607 | } 608 | } 609 | 610 | void cparser::match_keyword(keyword_t type) { 611 | expect(lexer->is_keyword(type), string_t("expect keyword ") + KEYWORD_STRING(type)); 612 | next(); 613 | } 614 | 615 | void cparser::match_operator(operator_t type) { 616 | expect(lexer->is_operator(type), string_t("expect operator " + OPERATOR_STRING(type))); 617 | next(); 618 | } 619 | 620 | void cparser::match_type(lexer_t type) { 621 | expect(lexer->is_type(type), string_t("expect type " + LEX_STRING(type))); 622 | next(); 623 | } 624 | 625 | void cparser::match_number() { 626 | expect(lexer->is_number(), "expect number"); 627 | next(); 628 | } 629 | 630 | void cparser::match_integer() { 631 | expect(lexer->is_integer(), "expect integer"); 632 | next(); 633 | } 634 | 635 | void cparser::error(const string_t &info) { 636 | std::stringstream ss; 637 | ss << '[' << std::setfill('0') << std::setw(4) << lexer->get_line(); 638 | ss << ':' << std::setfill('0') << std::setw(3) << lexer->get_column(); 639 | ss << ']' << " PARSER ERROR: " << info; 640 | throw cexception(ss.str()); 641 | } 642 | } 643 | -------------------------------------------------------------------------------- /cparser.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: CMiniLang 3 | // Author: bajdcc 4 | // 5 | #ifndef CMINILANG_PARSER_H 6 | #define CMINILANG_PARSER_H 7 | 8 | #include 9 | #include "types.h" 10 | #include "clexer.h" 11 | #include "cast.h" 12 | #include "cunit.h" 13 | 14 | namespace clib { 15 | 16 | enum backtrace_direction { 17 | b_success, 18 | b_next, 19 | b_error, 20 | b_fail, 21 | b_fallback, 22 | }; 23 | 24 | struct backtrace_t { 25 | int lexer_index; 26 | std::vector state_stack; 27 | std::vector ast_stack; 28 | int current_state; 29 | int coll_index; 30 | int reduce_index; 31 | std::vector trans_ids; 32 | std::unordered_set ast_ids; 33 | backtrace_direction direction; 34 | }; 35 | 36 | class cparser { 37 | public: 38 | cparser() = default; 39 | ~cparser() = default; 40 | 41 | cparser(const cparser &) = delete; 42 | cparser &operator=(const cparser &) = delete; 43 | 44 | ast_node *parse(const string_t &str); 45 | ast_node *root() const; 46 | 47 | private: 48 | void next(); 49 | void reset(); 50 | 51 | void gen(); 52 | void program(); 53 | ast_node *terminal(); 54 | 55 | ast_node *simplify(ast_node *node); 56 | 57 | bool valid_trans(const pda_trans &trans) const; 58 | void do_trans(int state, backtrace_t &bk, const pda_trans &trans); 59 | bool LA(unit *u) const; 60 | 61 | private: 62 | void expect(bool, const string_t &); 63 | void match_keyword(keyword_t); 64 | void match_operator(operator_t); 65 | void match_type(lexer_t); 66 | void match_number(); 67 | void match_integer(); 68 | 69 | void error(const string_t &); 70 | 71 | private: 72 | std::vector state_stack; 73 | std::vector ast_stack; 74 | std::vector ast_cache; 75 | int ast_cache_index{0}; 76 | std::vector ast_coll_cache; 77 | std::vector ast_reduce_cache; 78 | 79 | private: 80 | cunit unit; 81 | std::unique_ptr lexer; 82 | cast ast; 83 | }; 84 | } 85 | #endif //CMINILANG_PARSER_H -------------------------------------------------------------------------------- /csub.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Created by bajdcc 4 | // 5 | 6 | #ifndef CLIBLISP_CSUB_H 7 | #define CLIBLISP_CSUB_H 8 | 9 | #include "cvm.h" 10 | 11 | namespace clib { 12 | struct cval; 13 | class cvm; 14 | 15 | class builtins { 16 | public: 17 | static status_t add(cvm *vm, cframe *frame); 18 | static status_t sub(cvm *vm, cframe *frame); 19 | static status_t mul(cvm *vm, cframe *frame); 20 | static status_t div(cvm *vm, cframe *frame); 21 | static status_t quote(cvm *vm, cframe *frame); 22 | static status_t list(cvm *vm, cframe *frame); 23 | static status_t car(cvm *vm, cframe *frame); 24 | static status_t cdr(cvm *vm, cframe *frame); 25 | static status_t cons(cvm *vm, cframe *frame); 26 | 27 | static status_t def(cvm *vm, cframe *frame); 28 | static status_t lambda(cvm *vm, cframe *frame); 29 | static status_t call_lambda(cvm *vm, cframe *frame); 30 | static status_t call_eval(cvm *vm, cframe *frame); 31 | 32 | static status_t lt(cvm *vm, cframe *frame); 33 | static status_t le(cvm *vm, cframe *frame); 34 | static status_t gt(cvm *vm, cframe *frame); 35 | static status_t ge(cvm *vm, cframe *frame); 36 | static status_t eq(cvm *vm, cframe *frame); 37 | static status_t ne(cvm *vm, cframe *frame); 38 | 39 | static status_t begin(cvm *vm, cframe *frame); 40 | static status_t _if(cvm *vm, cframe *frame); 41 | 42 | static status_t len(cvm *vm, cframe *frame); 43 | static status_t index(cvm *vm, cframe *frame); 44 | static status_t append(cvm *vm, cframe *frame); 45 | 46 | static status_t is_null(cvm *vm, cframe *frame); 47 | static status_t type(cvm *vm, cframe *frame); 48 | static status_t str(cvm *vm, cframe *frame); 49 | static status_t word(cvm *vm, cframe *frame); 50 | 51 | static status_t print(cvm *vm, cframe *frame); 52 | static status_t conf(cvm *vm, cframe *frame); 53 | static status_t attr(cvm *vm, cframe *frame); 54 | 55 | // GUI 56 | static status_t ui_put(cvm *vm, cframe *frame); 57 | }; 58 | } 59 | 60 | #endif //CLIBLISP_CSUB_H 61 | -------------------------------------------------------------------------------- /cunit.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: clibparser 3 | // Created by CC 4 | // 5 | 6 | #ifndef CLIBPARSER_CUNIT_H 7 | #define CLIBPARSER_CUNIT_H 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "memory.h" 15 | 16 | #define UNIT_NODE_MEM (32 * 1024) 17 | 18 | namespace clib { 19 | 20 | enum unit_t { 21 | u_none, 22 | u_token, 23 | u_token_ref, 24 | u_rule, 25 | u_rule_ref, 26 | u_sequence, 27 | u_branch, 28 | u_optional, 29 | }; 30 | 31 | class unit_builder; 32 | 33 | struct unit { 34 | unit_t t; 35 | unit *next; 36 | unit *prev; 37 | unit_builder *builder; 38 | 39 | unit &operator=(const unit &u); 40 | unit &operator+(const unit &u); 41 | unit &operator|(const unit &u); 42 | unit &operator*(); 43 | unit &operator~(); 44 | unit &init(unit_builder *builder); 45 | unit &set_t(unit_t type); 46 | }; 47 | 48 | struct unit_token : public unit { 49 | lexer_t type; 50 | union { 51 | operator_t op; 52 | keyword_t keyword; 53 | } value; 54 | 55 | unit_token &set_type(lexer_t type); 56 | unit_token &set_op(operator_t op); 57 | unit_token &set_keyword(keyword_t keyword); 58 | }; 59 | 60 | struct unit_collection : public unit { 61 | bool skip; 62 | unit *child; 63 | 64 | unit_collection &set_skip(bool skip); 65 | unit_collection &set_child(unit *node); 66 | }; 67 | 68 | struct unit_rule : public unit_collection { 69 | const char *s; 70 | 71 | unit_rule &set_s(const char *str); 72 | }; 73 | 74 | struct nga_edge; 75 | struct nga_edge_list; 76 | 77 | struct nga_status { 78 | const char *label; 79 | bool final; 80 | nga_edge_list *in, *out; 81 | }; 82 | 83 | struct pda_status : public nga_status { 84 | int rule; 85 | }; 86 | 87 | struct nga_edge { 88 | nga_status *begin, *end; 89 | bool skip; 90 | unit *data; 91 | }; 92 | 93 | enum pda_edge_t { 94 | e_shift, 95 | e_pass, 96 | e_move, 97 | e_left_recursion, 98 | e_reduce, 99 | e_finish, 100 | }; 101 | 102 | struct pda_edge : public nga_edge { 103 | pda_edge_t type; 104 | }; 105 | 106 | struct nga_edge_list { 107 | nga_edge_list *prev, *next; 108 | nga_edge *edge; 109 | }; 110 | 111 | unit_rule *to_rule(unit *u); 112 | unit_token *to_token(unit *u); 113 | unit_collection *to_collection(unit *u); 114 | unit_collection *to_ref(unit *u); 115 | const string_t &pda_edge_str(pda_edge_t type); 116 | const int &pda_edge_priority(pda_edge_t type); 117 | 118 | class unit_builder { 119 | public: 120 | virtual unit_collection &append(unit *collection, unit *child) = 0; 121 | virtual unit_collection &merge(unit *a, unit *b) = 0; 122 | virtual unit_collection &collection(unit *a, unit *b, unit_t type) = 0; 123 | virtual unit_collection &optional(unit *a) = 0; 124 | virtual unit *copy(unit *u) = 0; 125 | 126 | virtual nga_edge *enga(unit *node, bool init) = 0; 127 | virtual nga_edge *enga(unit *node, unit *u) = 0; 128 | virtual nga_edge *connect(nga_status *a, nga_status *b, bool is_pda = false) = 0; 129 | }; 130 | 131 | struct nga_rule { 132 | int id; 133 | unit_rule *u; 134 | nga_status *status; 135 | int recursive; 136 | std::unordered_set tokensList; 137 | std::unordered_set tokensFirstset; 138 | std::unordered_set rulesFirstset; 139 | }; 140 | 141 | struct pda_trans { 142 | int jump; 143 | pda_edge_t type; 144 | int status; 145 | string_t label; 146 | std::vector LA; 147 | }; 148 | 149 | struct pda_rule { 150 | int id; 151 | int rule; 152 | bool final; 153 | coll_t coll; 154 | string_t label; 155 | std::vector trans; 156 | }; 157 | 158 | // 文法表达式 159 | class cunit : public unit_builder { 160 | public: 161 | cunit() = default; 162 | ~cunit() = default; 163 | 164 | cunit(const cunit &) = delete; 165 | cunit &operator=(const cunit &) = delete; 166 | 167 | unit &token(const lexer_t &type); 168 | unit &token(const operator_t &op); 169 | unit &token(const keyword_t &keyword); 170 | unit &rule(const string_t &s, coll_t t); 171 | 172 | unit_collection &append(unit *collection, unit *child) override; 173 | unit_collection &merge(unit *a, unit *b) override; 174 | unit_collection &collection(unit *a, unit *b, unit_t type) override; 175 | unit_collection &optional(unit *a) override; 176 | unit *copy(unit *u) override; 177 | 178 | nga_edge *enga(unit *node, bool init) override; 179 | nga_edge *enga(unit *node, unit *u) override; 180 | nga_edge *connect(nga_status *a, nga_status *b, bool is_pda = false) override; 181 | 182 | const std::vector &get_pda() const; 183 | 184 | private: 185 | nga_status *status(); 186 | pda_status *status(const char *label, int rule, bool final); 187 | void add_edge(nga_edge_list *&list, nga_edge *edge); 188 | void remove_edge(nga_edge_list *&list, nga_edge_list *edge); 189 | const char *label(unit *focused, bool front); 190 | void label(unit *node, unit *parent, unit *focused, bool front, std::ostream &os); 191 | void disconnect(nga_status *status); 192 | 193 | public: 194 | void gen(unit *root); 195 | void dump(std::ostream &os); 196 | 197 | private: 198 | void gen_nga(); 199 | void check_nga(); 200 | void gen_pda(unit *root); 201 | 202 | static nga_edge *conv_nga(unit *u); 203 | nga_status *delete_epsilon(nga_edge *edge); 204 | 205 | private: 206 | const char *str(const string_t &s); 207 | 208 | private: 209 | memory_pool nodes; 210 | std::unordered_set strings; 211 | std::vector labels; 212 | std::map rules; 213 | std::unordered_map rulesMap; 214 | std::vector pdas; 215 | unit_rule *current_rule{nullptr}; 216 | }; 217 | }; 218 | 219 | 220 | #endif //CLIBPARSER_CUNIT_H 221 | -------------------------------------------------------------------------------- /cvm.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Created by bajdcc 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include "cvm.h" 10 | #include "cast.h" 11 | #include "csub.h" 12 | 13 | namespace clib { 14 | 15 | cvm::cvm() { 16 | set_free_callback(); 17 | builtin(); 18 | } 19 | 20 | void cvm::builtin() { 21 | global_env = val_obj(ast_env); 22 | global_env->val._env.env = new cval::cenv_t(); 23 | global_env->val._env.parent = nullptr; 24 | mem.push_root(global_env); 25 | #if SHOW_ALLOCATE_NODE 26 | printf("[DEBUG] ALLOC | addr: 0x%p, node: %-10s\n", global_env, cast::ast_str(global_env->type).c_str()); 27 | #endif 28 | builtin_init(); 29 | mem.pop_root(); 30 | mem.protect(global_env); 31 | builtin_load(); 32 | } 33 | 34 | cval *cvm::val_obj(ast_t type) { 35 | auto v = mem.alloc(); 36 | v->type = type; 37 | v->next = nullptr; 38 | return v; 39 | } 40 | 41 | cval *cvm::val_str(ast_t type, const char *str) { 42 | auto len = strlen(str); 43 | auto v = (cval *) mem.alloc(sizeof(cval) + len + 1); 44 | v->type = type; 45 | v->next = nullptr; 46 | v->val._string = ((char *) v) + sizeof(cval); 47 | strncpy((char *) v->val._string, str, len); 48 | return v; 49 | } 50 | 51 | cval *cvm::val_sub(const char *name, csub sub) { 52 | auto len = strlen(name); 53 | auto v = (cval *) mem.alloc(sizeof(cval) + len + 1); 54 | v->type = ast_sub; 55 | v->next = nullptr; 56 | auto str = ((char *) v) + sizeof(cval); 57 | strncpy(str, name, len); 58 | v->val._sub.vm = this; 59 | v->val._sub.sub = sub; 60 | return v; 61 | } 62 | 63 | cval *cvm::val_char(char c) { 64 | auto v = mem.alloc(); 65 | v->type = ast_char; 66 | v->val._char = c; 67 | v->next = nullptr; 68 | return v; 69 | } 70 | 71 | cval *cvm::val_sub(cval *val) { 72 | auto name = ((char *) val) + sizeof(cval); 73 | auto sub = val_sub(name, val->val._sub.sub); 74 | sub->val._sub.vm = val->val._sub.vm; 75 | return sub; 76 | } 77 | 78 | cval *cvm::val_bool(bool flag) { 79 | auto v = val_obj(ast_int); 80 | v->val._int = flag ? 1 : 0; 81 | return v; 82 | } 83 | 84 | static cval **lambda_env(cval *val) { 85 | return (cval **) ((char *) val + sizeof(cval)); 86 | } 87 | 88 | cval *cvm::val_lambda(cval *param, cval *body, cval *env) { 89 | auto v = (cval *) mem.alloc(sizeof(cval) + sizeof(cval *)); 90 | v->type = ast_lambda; 91 | v->next = nullptr; 92 | mem.push_root(v); 93 | v->val._lambda.param = copy(param); 94 | v->val._lambda.body = copy(body); 95 | if (env == global_env) { 96 | *lambda_env(v) = new_env(env); 97 | } else { 98 | auto _env = *lambda_env(v) = new_env(env->val._env.parent); 99 | mem.push_root(_env); 100 | auto &_new_env = *_env->val._env.env; 101 | for (auto &en : *env->val._env.env) { 102 | _new_env.insert(std::make_pair(en.first, copy(en.second))); 103 | } 104 | mem.pop_root(); 105 | } 106 | mem.pop_root(); 107 | return v; 108 | } 109 | 110 | static char *sub_name(cval *val) { 111 | return (char *) val + sizeof(cval); 112 | } 113 | 114 | uint cvm::children_size(cval *val) { 115 | if (!val || (val->type != ast_sexpr && val->type != ast_qexpr)) 116 | return 0; 117 | return val->val._v.count; 118 | } 119 | 120 | cval *cvm::conv(ast_node *node, cval *env) { 121 | if (node == nullptr) 122 | return nullptr; 123 | auto type = (ast_t) node->flag; 124 | switch (type) { 125 | case ast_root: // 根结点,全局声明 126 | return conv(node->child, env); 127 | case ast_sexpr: 128 | if (!node->child) 129 | error("S-exp: missing value"); 130 | if (node->child->flag == ast_literal || node->child->flag == ast_sexpr) { 131 | auto v = val_obj(type); 132 | mem.push_root(v); 133 | #if SHOW_ALLOCATE_NODE 134 | printf("[DEBUG] ALLOC | addr: 0x%p, node: %-10s, count: %d\n", v, cast::ast_str(type).c_str(), 135 | cast::children_size(node)); 136 | #endif 137 | v->val._v.child = nullptr; 138 | auto i = node->child; 139 | auto local = conv(i, env); 140 | v->val._v.child = local; 141 | v->val._v.count = 1; 142 | i = i->next; 143 | while (i != node->child) { 144 | v->val._v.count++; 145 | local->next = conv(i, env); 146 | local = local->next; 147 | i = i->next; 148 | } 149 | mem.pop_root(); 150 | return v; 151 | } else { 152 | error("S-exp: missing literal"); 153 | } 154 | break; 155 | case ast_qexpr: 156 | if (!node->child) { 157 | auto v = val_obj(type); 158 | v->val._v.count = 0; 159 | v->val._v.child = nullptr; 160 | return v; 161 | } else { 162 | auto v = val_obj(type); 163 | mem.push_root(v); 164 | #if SHOW_ALLOCATE_NODE 165 | printf("[DEBUG] ALLOC | addr: 0x%p, node: %-10s, count: %d\n", node, cast::ast_str(type).c_str(), 166 | cast::children_size(node)); 167 | #endif 168 | v->val._v.child = nullptr; 169 | auto i = node->child; 170 | auto local = conv(i, env); 171 | v->val._v.child = local; 172 | v->val._v.count = 1; 173 | i = i->next; 174 | while (i != node->child) { 175 | v->val._v.count++; 176 | local->next = conv(i, env); 177 | local = local->next; 178 | i = i->next; 179 | } 180 | mem.pop_root(); 181 | return v; 182 | } 183 | case ast_string: { 184 | auto v = val_str(type, node->data._string); 185 | #if SHOW_ALLOCATE_NODE 186 | printf("[DEBUG] ALLOC | addr: 0x%p, node: %-10s, val: %s\n", v, cast::ast_str(type).c_str(), 187 | v->val._string); 188 | #endif 189 | return v; 190 | } 191 | case ast_literal: { 192 | auto v = val_str(type, node->data._string); 193 | #if SHOW_ALLOCATE_NODE 194 | printf("[DEBUG] ALLOC | addr: 0x%p, node: %-10s, val: %s\n", v, cast::ast_str(type).c_str(), 195 | v->val._string); 196 | #endif 197 | return v; 198 | } 199 | #if SHOW_ALLOCATE_NODE 200 | #define DEFINE_VAL(t) \ 201 | case ast_##t: { \ 202 | auto v = val_obj(type); \ 203 | v->val._##t = node->data._##t; \ 204 | printf("[DEBUG] ALLOC | addr: 0x%p, node: %-10s, val: ", v, cast::ast_str(type).c_str()); \ 205 | print(v, std::cout); \ 206 | std::cout << std::endl; \ 207 | return v; } 208 | #else 209 | #define DEFINE_VAL(t) \ 210 | case ast_##t: { \ 211 | auto v = val_obj(type); \ 212 | v->val._##t = node->data._##t; \ 213 | return v; } 214 | #endif 215 | DEFINE_VAL(char) 216 | DEFINE_VAL(uchar) 217 | DEFINE_VAL(short) 218 | DEFINE_VAL(ushort) 219 | DEFINE_VAL(int) 220 | DEFINE_VAL(uint) 221 | DEFINE_VAL(long) 222 | DEFINE_VAL(ulong) 223 | DEFINE_VAL(float) 224 | DEFINE_VAL(double) 225 | #undef DEFINE_VAL 226 | default: 227 | break; 228 | } 229 | error("invalid val type"); 230 | return nullptr; 231 | } 232 | 233 | status_t cvm::call(csub fun, cval *val, cval *env, cval **ret) { 234 | auto frame = eval_mem.alloc(); 235 | memset(frame, 0, sizeof(cframe)); 236 | frame->fun = fun; 237 | frame->val = val; 238 | frame->env = env; 239 | frame->ret = ret; 240 | eval_stack.push_back(frame); 241 | return s_call; 242 | } 243 | 244 | void cvm::prepare(ast_node *node) { 245 | if (!root) { 246 | mem.save_stack(); 247 | root = conv(node, global_env); 248 | ret = nullptr; 249 | call(eval, root, global_env, &ret); 250 | } 251 | } 252 | 253 | cval *cvm::run(int cycle, int &cycles) { 254 | // 自己实现调用栈 255 | for (auto i = 0; !eval_stack.empty() && i < cycle; i++) { 256 | cycles++; 257 | auto frame = eval_stack.back(); 258 | auto r = frame->fun(this, frame); 259 | if (r == s_ret) { 260 | eval_mem.free(frame); 261 | eval_stack.pop_back(); 262 | } 263 | if (r == s_sleep) { 264 | return nullptr; 265 | } 266 | } 267 | if (ret == nullptr) 268 | return nullptr; 269 | assert(ret); 270 | root = nullptr; 271 | eval_stack.clear(); 272 | eval_mem.clear(); 273 | eval_tmp.clear(); 274 | return ret; 275 | } 276 | 277 | void cvm::error(const string_t &info) { 278 | printf("COMPILER ERROR: %s\n", info.c_str()); 279 | throw std::exception(); 280 | } 281 | 282 | void cvm::print(cval *val, std::ostream &os) { 283 | if (!val) 284 | return; 285 | switch (val->type) { 286 | case ast_root: 287 | break; 288 | case ast_env: 289 | break; 290 | case ast_lambda: 291 | os << "val._lambda.param, os); 293 | os << ' '; 294 | print(val->val._lambda.body, os); 295 | os << ">"; 296 | break; 297 | case ast_sub: 298 | os << ""; 299 | break; 300 | case ast_sexpr: { 301 | os << '('; 302 | auto head = val->val._v.child; 303 | while (head) { 304 | print(head, os); 305 | head = head->next; 306 | } 307 | os << ')'; 308 | } 309 | break; 310 | case ast_qexpr: 311 | if (val->val._v.count == 0) { 312 | os << "nil"; 313 | } else { 314 | os << '`'; 315 | auto head = val->val._v.child; 316 | if (val->val._v.count == 1) { 317 | print(head, os); 318 | } else { 319 | os << '('; 320 | while (head) { 321 | print(head, os); 322 | head = head->next; 323 | } 324 | os << ')'; 325 | } 326 | } 327 | break; 328 | case ast_literal: 329 | os << val->val._string; 330 | break; 331 | case ast_string: 332 | os << '"' << cast::display_str(val->val._string) << '"'; 333 | break; 334 | case ast_char: 335 | if (isprint(val->val._char)) 336 | os << '\'' << val->val._char << '\''; 337 | else if (val->val._char == '\n') 338 | os << "'\\n'"; 339 | else 340 | os << "'\\x" << std::setiosflags(std::ios::uppercase) << std::hex 341 | << std::setfill('0') << std::setw(2) 342 | << (unsigned int) val->val._char << '\''; 343 | break; 344 | case ast_uchar: 345 | os << (unsigned int) val->val._uchar; 346 | break; 347 | case ast_short: 348 | os << val->val._short; 349 | break; 350 | case ast_ushort: 351 | os << val->val._ushort; 352 | break; 353 | case ast_int: 354 | os << val->val._int; 355 | break; 356 | case ast_uint: 357 | os << val->val._uint; 358 | break; 359 | case ast_long: 360 | os << val->val._long; 361 | break; 362 | case ast_ulong: 363 | os << val->val._ulong; 364 | break; 365 | case ast_float: 366 | os << val->val._float; 367 | break; 368 | case ast_double: 369 | os << val->val._double; 370 | break; 371 | } 372 | if (val->next) { 373 | os << ' '; 374 | } 375 | } 376 | 377 | void cvm::gc() { 378 | #if SHOW_ALLOCATE_NODE 379 | dump(); 380 | #endif 381 | mem.gc(); 382 | #if SHOW_ALLOCATE_NODE 383 | printf("[DEBUG] MEM | Alive objects: %lu\n", mem.count()); 384 | #endif 385 | } 386 | 387 | cval *cvm::copy(cval *val) { 388 | cval *new_val{nullptr}; 389 | switch (val->type) { 390 | case ast_root: 391 | case ast_env: 392 | error("not supported"); 393 | break; 394 | case ast_lambda: 395 | new_val = val_lambda(val->val._lambda.param, val->val._lambda.body, *lambda_env(val)); 396 | break; 397 | case ast_sub: 398 | new_val = val_sub(val); 399 | new_val->val._sub.vm = val->val._sub.vm; 400 | break; 401 | case ast_sexpr: 402 | case ast_qexpr: 403 | new_val = val_obj(val->type); 404 | new_val->val._v.count = val->val._v.count; 405 | if (new_val->val._v.count > 0) { 406 | mem.push_root(new_val); 407 | auto head = val->val._v.child; 408 | new_val->val._v.child = copy(head); 409 | if (val->val._v.count > 1) { 410 | auto _head = new_val->val._v.child; 411 | head = head->next; 412 | while (head) { 413 | _head->next = copy(head); 414 | head = head->next; 415 | _head = _head->next; 416 | } 417 | } 418 | mem.pop_root(); 419 | } else { 420 | new_val->val._v.child = nullptr; 421 | } 422 | break; 423 | case ast_literal: 424 | case ast_string: 425 | new_val = val_str(val->type, val->val._string); 426 | break; 427 | case ast_char: 428 | case ast_uchar: 429 | case ast_short: 430 | case ast_ushort: 431 | case ast_int: 432 | case ast_uint: 433 | case ast_long: 434 | case ast_ulong: 435 | case ast_float: 436 | case ast_double: 437 | new_val = val_obj(val->type); 438 | std::memcpy((char *) &new_val->val, (char *) &val->val, sizeof(val->val)); 439 | break; 440 | default: 441 | error("invalid copy"); 442 | break; 443 | } 444 | #if SHOW_ALLOCATE_NODE 445 | printf("[DEBUG] COPY | addr: 0x%p, node: %-10s, val: ", new_val, cast::ast_str(val->type).c_str()); 446 | print(val, std::cout); 447 | std::cout << std::endl; 448 | #endif 449 | return new_val; 450 | } 451 | 452 | cval *cvm::calc_symbol(const char *sym, cval *env) { 453 | while (env) { 454 | auto &_env = *env->val._env.env; 455 | auto f = _env.find(sym); 456 | if (f != _env.end()) { 457 | return copy(f->second); 458 | } 459 | env = env->val._env.parent; 460 | } 461 | printf("invalid symbol: %s\n", sym); 462 | error("cannot find symbol"); 463 | return nullptr; 464 | } 465 | 466 | cval *cvm::def(cval *env, const char *sym, cval *val) { 467 | auto e = env; 468 | while (env) { 469 | auto &_env = *env->val._env.env; 470 | auto f = _env.find(sym); 471 | if (f != _env.end()) { 472 | mem.push_root(env); 473 | auto new_val = copy(val); 474 | mem.pop_root(); 475 | mem.unlink(env, f->second); 476 | _env[sym] = new_val; 477 | return new_val; 478 | } 479 | env = env->val._env.parent; 480 | } 481 | mem.push_root(e); 482 | auto new_val = copy(val); 483 | mem.pop_root(); 484 | (*e->val._env.env)[sym] = new_val; 485 | return new_val; 486 | } 487 | 488 | cval *cvm::new_env(cval *env) { 489 | auto _env = val_obj(ast_env); 490 | _env->val._env.env = new cval::cenv_t(); 491 | _env->val._env.parent = env; 492 | return _env; 493 | } 494 | 495 | void cvm::set_free_callback() { 496 | #if SHOW_ALLOCATE_NODE 497 | mem.set_callback([](void *ptr) { 498 | cval *val = (cval *) ptr; 499 | printf("[DEBUG] GC | free: 0x%p, node: %-10s, ", ptr, cast::ast_str(val->type).c_str()); 500 | if (val->type == ast_sexpr || val->type == ast_qexpr) { 501 | printf("count: %lu\n", children_size(val)); 502 | } else if (val->type == ast_literal) { 503 | printf("id: %s\n", val->val._string); 504 | } else if (val->type == ast_env) { 505 | printf("env: %d\n", val->val._env.env->size()); 506 | delete val->val._env.env; 507 | } else if (val->type == ast_sub) { 508 | printf("name: %s\n", sub_name(val)); 509 | } else { 510 | printf("val: "); 511 | print(val, std::cout); 512 | std::cout << std::endl; 513 | } 514 | }); 515 | mem.set_dump_callback([](void *ptr, int level) { 516 | cval *val = (cval *) ptr; 517 | printf("[DEBUG] DUMP | "); 518 | std::cout << std::setfill('_') << std::setw(level << 2) << ""; 519 | printf("addr: 0x%p, node: %-10s, ", ptr, cast::ast_str(val->type).c_str()); 520 | if (val->type == ast_sexpr || val->type == ast_qexpr) { 521 | printf("count: %lu\n", children_size(val)); 522 | } else if (val->type == ast_literal) { 523 | printf("id: %s\n", val->val._string); 524 | } else if (val->type == ast_env) { 525 | printf("env: %d\n", val->val._env.env->size()); 526 | } else if (val->type == ast_sub) { 527 | printf("name: %s\n", sub_name(val)); 528 | } else { 529 | printf("val: "); 530 | print(val, std::cout); 531 | std::cout << std::endl; 532 | } 533 | }); 534 | #else 535 | mem.set_callback([](void *ptr) { 536 | cval *val = (cval *) ptr; 537 | if (val->type == ast_env) { 538 | delete val->val._env.env; 539 | } 540 | }); 541 | #endif 542 | } 543 | 544 | void cvm::save() { 545 | mem.save_stack(); 546 | } 547 | 548 | void cvm::restore() { 549 | root = nullptr; 550 | mem.restore_stack(); 551 | eval_stack.clear(); 552 | eval_mem.clear(); 553 | eval_tmp.clear(); 554 | } 555 | 556 | void cvm::dump() { 557 | #if SHOW_ALLOCATE_NODE 558 | mem.dump(std::cout); 559 | #endif 560 | } 561 | 562 | void cvm::reset() { 563 | global_env = nullptr; 564 | mem.clear(); 565 | eval_stack.clear(); 566 | eval_mem.clear(); 567 | eval_tmp.clear(); 568 | builtin(); 569 | } 570 | } 571 | -------------------------------------------------------------------------------- /cvm.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Created by bajdcc 4 | // 5 | 6 | #ifndef CLIBLISP_CVM_H 7 | #define CLIBLISP_CVM_H 8 | 9 | #define VM_MEM (32 * 1024) 10 | #define VM_EVAL (32 * 1024) 11 | #define VM_TMP (32 * 1024) 12 | #define SHOW_ALLOCATE_NODE 0 13 | 14 | #include 15 | #include "cast.h" 16 | #include "memory_gc.h" 17 | 18 | namespace clib { 19 | 20 | class cvm; 21 | struct cframe; 22 | 23 | enum status_t { 24 | s_ret, 25 | s_call, 26 | s_sleep, 27 | s_error, 28 | }; 29 | 30 | using ctmp = void *; 31 | 32 | struct cval { 33 | using cenv_t = std::unordered_map; 34 | using csub_t = status_t (*)(cvm *vm, cframe *frame); 35 | ast_t type; 36 | cval *next; 37 | union { 38 | struct { 39 | uint count; 40 | cval *child; 41 | } _v; 42 | struct { 43 | cval *parent; 44 | cenv_t *env; 45 | } _env; 46 | struct { 47 | void *vm; 48 | csub_t sub; 49 | } _sub; 50 | struct { 51 | cval *param; 52 | cval *body; 53 | } _lambda; 54 | const char *_string; 55 | #define DEFINE_CVAL(t) LEX_T(t) _##t; 56 | DEFINE_CVAL(char) 57 | DEFINE_CVAL(uchar) 58 | DEFINE_CVAL(short) 59 | DEFINE_CVAL(ushort) 60 | DEFINE_CVAL(int) 61 | DEFINE_CVAL(uint) 62 | DEFINE_CVAL(long) 63 | DEFINE_CVAL(ulong) 64 | DEFINE_CVAL(float) 65 | DEFINE_CVAL(double) 66 | #undef DEFINE_CVAL 67 | } val; 68 | }; 69 | 70 | using cenv = cval::cenv_t; 71 | using csub = cval::csub_t; 72 | 73 | struct cframe { 74 | csub fun; 75 | cval *val, *env, **ret; 76 | void *arg; 77 | }; 78 | 79 | class cvm { 80 | public: 81 | cvm(); 82 | ~cvm() = default; 83 | 84 | cvm(const cvm &) = delete; 85 | cvm &operator=(const cvm &) = delete; 86 | 87 | friend class builtins; 88 | 89 | void prepare(ast_node *node); 90 | cval *run(int cycle, int &cycles); 91 | void gc(); 92 | 93 | static void print(cval *val, std::ostream &os); 94 | 95 | void save(); 96 | void restore(); 97 | 98 | void error(const string_t &info); 99 | 100 | void dump(); 101 | void reset(); 102 | 103 | private: 104 | void builtin(); 105 | void builtin_init(); 106 | void builtin_load(); 107 | cval *conv(ast_node *node, cval *env); 108 | 109 | status_t call(csub fun, cval *val, cval *env, cval **ret); 110 | 111 | int calc(int op, ast_t type, cval *r, cval *v, cval *env); 112 | cval *calc_op(int op, cval *val, cval *env); 113 | cval *calc_symbol(const char *sym, cval *env); 114 | cval *def(cval *env, const char *sym, cval *val); 115 | cval *calc_sub(const char *sub, cval *val, cval *env); 116 | 117 | static status_t eval(cvm *vm, cframe *frame); 118 | static status_t eval_one(cvm *vm, cframe *frame); 119 | static status_t eval_child(cvm *vm, cframe *frame); 120 | 121 | cval *val_obj(ast_t type); 122 | cval *val_str(ast_t type, const char *str); 123 | cval *val_char(char c); 124 | cval *val_sub(const char *name, csub sub); 125 | cval *val_sub(cval *val); 126 | cval *val_bool(bool flag); 127 | cval *val_lambda(cval *param, cval *body, cval *env); 128 | 129 | cval *copy(cval *val); 130 | cval *new_env(cval *env); 131 | 132 | static uint children_size(cval *val); 133 | 134 | void set_free_callback(); 135 | 136 | private: 137 | cval *global_env{nullptr}; 138 | memory_pool_gc mem; 139 | std::vector eval_stack; 140 | memory_pool eval_mem; 141 | memory_pool eval_tmp; 142 | cval *root{nullptr}; 143 | cval *ret{nullptr}; 144 | }; 145 | } 146 | 147 | #endif //CLIBLISP_CVM_H 148 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Created by bajdcc 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include "cparser.h" 10 | #include "cvm.h" 11 | #include "cgui.h" 12 | 13 | #define FPS 30 14 | #define FRAME_SPAN (1.0 / FPS) 15 | 16 | static std::chrono::system_clock::time_point last_clock; 17 | static clib::decimal dt; 18 | static clib::decimal dt_inv; 19 | static bool paused; 20 | static string_t title; 21 | 22 | /** 23 | * 绘制文字 24 | * @param x X坐标 25 | * @param y Y坐标 26 | * @param format 格式化字符串 27 | */ 28 | static void draw_text(int x, int y, const char *format, ...) { 29 | glMatrixMode(GL_PROJECTION); 30 | glPushMatrix(); 31 | glLoadIdentity(); 32 | int w = glutGet(GLUT_WINDOW_WIDTH); 33 | int h = glutGet(GLUT_WINDOW_HEIGHT); 34 | gluOrtho2D(0, w, h, 0); // 正射投影,无3D透视效果,直接打到屏幕上 35 | // gluOrtho2D 裁剪面(最终窗口呈现的)是一个左下角点为(left,bottom)、右上角点为(right,top)的矩形 36 | // 这个投影跟Windows的窗口绘制一样,以左上为(left,top),右下为(right,top),但与数学上的直角坐标系不同! 37 | glMatrixMode(GL_MODELVIEW); // 为什么要添加这句话,因为在绘制物体中修改了视图 38 | glPushMatrix(); 39 | glLoadIdentity(); 40 | 41 | glColor3f(0.9f, 0.9f, 0.9f); // 文字颜色为90%白 42 | glRasterPos2i(x, y); // 设置文字的起始位置 43 | 44 | char buffer[256]; // 这里暂不做缓冲区溢出判断 45 | va_list args; 46 | va_start(args, format); 47 | int len = vsprintf(buffer, format, args); // 格式化字符串 48 | va_end(args); 49 | for (int i = 0; i < len; ++i) { 50 | glutBitmapCharacter(GLUT_BITMAP_9_BY_15, buffer[i]); // 第一个参数为字体,第二个参数为字符 51 | } 52 | 53 | glPopMatrix(); 54 | glMatrixMode(GL_PROJECTION); 55 | glPopMatrix(); 56 | } 57 | 58 | void display() { 59 | glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // 清空屏幕 60 | 61 | int h = glutGet(GLUT_WINDOW_HEIGHT); // 窗口的高 62 | int w = glutGet(GLUT_WINDOW_WIDTH); // 窗口的宽 63 | 64 | clib::cgui::singleton().draw(); 65 | 66 | // 绘制文字 67 | draw_text(10, 20, "cliblisp @bajdcc"); // 暂不支持中文 68 | draw_text(w - 110, 20, "FPS: %.1f", dt_inv); 69 | draw_text(10, h - 20, "#clibos"); 70 | if (paused) 71 | draw_text(w / 2 - 30, 20, "PAUSED"); 72 | 73 | draw_text(w / 2 - 200, (glutGet(GLUT_SCREEN_WIDTH) < 1920) ? 60 : 80, title.c_str()); 74 | 75 | glutSwapBuffers(); // 切换双缓冲 76 | } 77 | 78 | void reshape(int width, int height) { 79 | glViewport(0, 0, width, height); // 改变视口大小 80 | glMatrixMode(GL_PROJECTION);// 透视投影 81 | glLoadIdentity(); // 重置成单位矩阵 82 | gluPerspective(45.0, width / (float) height, 0.1, 100.0); // 透视投影 83 | } 84 | 85 | void keyboard(unsigned char key, int x, int y) { 86 | if (key >= '0' && key <= '9') { 87 | } else { 88 | switch (key) { 89 | case 27: 90 | glutLeaveMainLoop(); // 按ESC退出 91 | break; 92 | case ' ': 93 | paused = !paused; 94 | break; 95 | default: 96 | break; 97 | } 98 | } 99 | } 100 | 101 | void mouse(int button, int state, int x, int y) { 102 | if (button == GLUT_LEFT_BUTTON) { 103 | } 104 | } 105 | 106 | void motion(int x, int y) { 107 | } 108 | 109 | void idle() { 110 | auto now = std::chrono::high_resolution_clock::now(); 111 | // 计算每帧时间间隔 112 | dt = std::chrono::duration_cast>(now - last_clock).count(); 113 | 114 | // 锁帧 115 | if (dt > FRAME_SPAN) { 116 | dt_inv = 1.0 / dt; 117 | last_clock = now; 118 | display(); 119 | } 120 | } 121 | 122 | void entry(int state) { 123 | paused = state == GLUT_LEFT; 124 | } 125 | 126 | int main(int argc, char *argv[]) { 127 | glutInit(&argc, argv); 128 | if (glutGet(GLUT_SCREEN_WIDTH) < 1920) { 129 | glutInitWindowSize(800, 600); 130 | glutInitWindowPosition(50, 50); 131 | } else { 132 | glutInitWindowSize(1200, 900); 133 | glutInitWindowPosition(50, 50); 134 | } 135 | glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE); // GLUT_DOUBLE 启用双缓冲,避免闪屏 136 | glutCreateWindow("cliblisp system -- bajdcc"); 137 | glutDisplayFunc(&idle); // 绘制 138 | glutReshapeFunc(&reshape); // 窗口大小改变事件 139 | glutMouseFunc(&mouse); // 鼠标点击事件 140 | glutMotionFunc(&motion); // 鼠标拖动事件 141 | glutKeyboardFunc(&keyboard); // 键盘输入 142 | glutIdleFunc(&idle); // 没有事件输入时调用,这里不用它 143 | glutEntryFunc(&entry); // 没有事件输入时调用,这里不用它 144 | glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE, GLUT_ACTION_CONTINUE_EXECUTION); 145 | glutMainLoop(); // 主事件循环 146 | return 0; 147 | } -------------------------------------------------------------------------------- /memory.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Author: bajdcc 4 | // 5 | 6 | #ifndef CLIBLISP_MEMORY_H 7 | #define CLIBLISP_MEMORY_H 8 | 9 | #include 10 | #include 11 | #include 12 | #include "types.h" 13 | 14 | namespace clib { 15 | // 默认的内存分配策略 16 | template 17 | class default_allocator { 18 | public: 19 | static const size_t DEFAULT_ALLOC_BLOCK_SIZE = DefaultSize; 20 | 21 | template 22 | T *__alloc() { 23 | return new T; 24 | } 25 | 26 | template 27 | T *__alloc_array(uint size) { 28 | return new T[size]; 29 | } 30 | 31 | template 32 | T *__alloc_args(const TArgs &&... args) { 33 | return new T(std::forward(args)...); 34 | } 35 | 36 | template 37 | T *__alloc_array_args(uint size, const TArgs &&... args) { 38 | return new T[size]; 39 | } 40 | 41 | template 42 | bool __free(T *t) { 43 | delete t; 44 | return true; 45 | } 46 | 47 | template 48 | bool __free_array(T *t) { 49 | delete[] t; 50 | return true; 51 | } 52 | }; 53 | 54 | // 原始内存池 55 | template 56 | class legacy_memory_pool { 57 | public: 58 | // 块 59 | struct block { 60 | size_t size; // 数据部分的大小 61 | uint flag; // 参数 62 | block *prev; // 前指针 63 | block *next; // 后指针 64 | }; 65 | 66 | // 块参数 67 | enum block_flag { 68 | BLOCK_USING = 0, 69 | BLOCK_MARK = 1, 70 | }; 71 | 72 | // 块的元信息部分的大小 73 | static const size_t BLOCK_SIZE = sizeof(block); 74 | // 块大小掩码 75 | static const uint BLOCK_SIZE_MASK = BLOCK_SIZE - 1; 76 | 77 | private: 78 | // 内存管理接口 79 | Allocator allocator; 80 | // 块链表头指针 81 | block *block_head{nullptr}; 82 | // 用于循环遍历的指针 83 | block *block_current{nullptr}; 84 | // 空闲块数 85 | size_t block_available_size{0}; 86 | 87 | // ------------------------ // 88 | 89 | // 块大小对齐 90 | static size_t block_align(size_t size) { 91 | if ((size & BLOCK_SIZE_MASK) == 0) 92 | return size / BLOCK_SIZE; 93 | return (size / BLOCK_SIZE) + 1; 94 | } 95 | 96 | // 块初始化 97 | static void block_init(block *blk, size_t size) { 98 | blk->size = size; 99 | blk->flag = 0; 100 | blk->prev = nullptr; 101 | blk->next = nullptr; 102 | } 103 | 104 | // 块连接 105 | static void block_connect(block *blk, block *new_blk) { 106 | new_blk->prev = blk; 107 | new_blk->next = blk->next; 108 | new_blk->next->prev = new_blk; 109 | blk->next = new_blk; 110 | } 111 | 112 | // 二块合并 113 | static size_t block_merge(block *blk, block *next, bool flag) { 114 | if (flag) { // prev(USING) - blk(TO FREE) - next(FREE) 115 | auto tmp = blk->size + 1; 116 | next->next->prev = blk; 117 | blk->size += next->size + 1; 118 | blk->next = next->next; 119 | return tmp; 120 | } else { // blk(FREE) - next(TO FREE) - next(USING) 121 | next->next->prev = blk; 122 | blk->size += next->size + 1; 123 | blk->next = next->next; 124 | return next->size + 1; 125 | } 126 | } 127 | 128 | // 三块合并 129 | static size_t block_merge(block *prev, block *blk, block *next) { 130 | next->next->prev = prev; 131 | prev->size += blk->size + next->size + 2; 132 | prev->next = next->next; 133 | return blk->size + 1; 134 | } 135 | 136 | // 块设置参数 137 | static void block_set_flag(block *blk, block_flag flag, uint value) { 138 | if (value) { 139 | blk->flag |= 1 << flag; 140 | } else { 141 | blk->flag &= ~(1 << flag); 142 | } 143 | } 144 | 145 | // 块获取参数 146 | static uint block_get_flag(block *blk, block_flag flag) { 147 | return (blk->flag & (1 << flag)) != 0 ? 1 : 0; 148 | } 149 | 150 | // ------------------------ // 151 | 152 | // 创建内存池 153 | void _create() { 154 | block_head = allocator.template __alloc_array(DEFAULT_ALLOC_BLOCK_SIZE); 155 | assert(block_head); 156 | _init(); 157 | } 158 | 159 | // 初始化内存池 160 | void _init() { 161 | block_available_size = DEFAULT_ALLOC_BLOCK_SIZE - 1; 162 | block_init(block_head, block_available_size); 163 | block_head->prev = block_head->next = block_head; 164 | block_current = block_head; 165 | } 166 | 167 | // 销毁内存池 168 | void _destroy() { 169 | allocator.__free_array(block_head); 170 | } 171 | 172 | // 申请内存 173 | void *_alloc(size_t size) { 174 | if (size == 0) 175 | return nullptr; 176 | auto old_size = size; 177 | size = block_align(size); 178 | if (size >= block_available_size) 179 | return nullptr; 180 | auto blk = block_current; 181 | do { 182 | if (block_get_flag(blk, BLOCK_USING) == 0 && blk->size >= size + 1) { 183 | block_current = blk; 184 | return alloc_free_block(size); 185 | } 186 | blk = blk->next; 187 | } while (blk != block_current); 188 | return nullptr; 189 | } 190 | 191 | // 查找空闲块 192 | void *alloc_free_block(size_t size) { 193 | if (block_current->size == size + 1) // 申请的大小正好是空闲块大小 194 | { 195 | return alloc_cur_block(size + 1); 196 | } 197 | // 申请的空间小于空闲块大小,将空闲块分裂 198 | auto new_size = block_current->size - size - 1; 199 | if (new_size == 0) 200 | return alloc_cur_block(size); // 分裂后的新块空间过低,放弃分裂 201 | block *new_blk = block_current + size + 1; 202 | block_init(new_blk, new_size); 203 | block_connect(block_current, new_blk); 204 | return alloc_cur_block(size); 205 | } 206 | 207 | // 直接使用当前的空闲块 208 | void *alloc_cur_block(size_t size) { 209 | // 直接使用空闲块 210 | block_set_flag(block_current, BLOCK_USING, 1); // 设置标志为可用 211 | block_current->size = size; 212 | block_available_size -= size + 1; 213 | auto cur = static_cast(block_current + 1); 214 | block_current = block_current->next; // 指向后一个块 215 | return cur; 216 | } 217 | 218 | // 释放内存 219 | bool _free(void *p) { 220 | auto blk = static_cast(p); 221 | --blk; // 自减得到块的元信息头 222 | if (!verify_address(blk)) 223 | return false; 224 | if (blk->next == blk) // 只有一个块 225 | { 226 | block_set_flag(blk, BLOCK_USING, 0); 227 | return true; 228 | } 229 | if (blk->prev == blk->next && block_get_flag(blk->prev, BLOCK_USING) == 0) // 只有两个块 230 | { 231 | _init(); // 两个块都空闲,直接初始化 232 | return true; 233 | } 234 | auto is_prev_free = block_get_flag(blk->prev, BLOCK_USING) == 0 && blk->prev < blk; 235 | auto is_next_free = block_get_flag(blk->next, BLOCK_USING) == 0 && blk < blk->next; 236 | auto bit = (is_prev_free << 1) + is_next_free; 237 | switch (bit) { 238 | case 0: 239 | block_available_size += blk->size + 1; 240 | block_set_flag(blk, BLOCK_USING, 0); 241 | break; 242 | case 1: 243 | if (block_current == blk->next) 244 | block_current = blk; 245 | block_available_size += block_merge(blk, blk->next, true); 246 | block_set_flag(blk, BLOCK_USING, 0); 247 | break; 248 | case 2: 249 | block_available_size += block_merge(blk->prev, blk, false); 250 | break; 251 | case 3: 252 | if (block_current == blk->next) 253 | block_current = blk->prev; 254 | block_available_size += block_merge(blk->prev, blk, blk->next); 255 | break; 256 | default: 257 | break; 258 | } 259 | return true; 260 | } 261 | 262 | // 验证地址是否合法 263 | bool verify_address(block *blk) { 264 | if (blk < block_head || blk > block_head + DEFAULT_ALLOC_MEMORY_SIZE - 1) 265 | return false; 266 | return (blk->next->prev == blk) && (blk->prev->next == blk) && (block_get_flag(blk, BLOCK_USING) == 1); 267 | } 268 | 269 | // 重新分配内存 270 | void *_realloc(void *p, uint newSize, uint clsSize) { 271 | auto blk = static_cast(p); 272 | --blk; // 自减得到块的元信息头 273 | if (!verify_address(blk)) 274 | return nullptr; 275 | auto size = block_align(newSize * clsSize); // 计算新的内存大小 276 | auto _new = _alloc(size); 277 | if (!_new) { 278 | // 空间不足 279 | _free(blk); 280 | return nullptr; 281 | } 282 | auto oldSize = blk->size; 283 | memmove(_new, p, sizeof(block) * __min(oldSize, size)); // 移动内存 284 | _free(p); 285 | return _new; 286 | } 287 | 288 | public: 289 | // 默认的块总数 290 | static const size_t DEFAULT_ALLOC_BLOCK_SIZE = DefaultSize; 291 | // 默认的内存总量 292 | static const size_t DEFAULT_ALLOC_MEMORY_SIZE = BLOCK_SIZE * DEFAULT_ALLOC_BLOCK_SIZE; 293 | 294 | legacy_memory_pool() { 295 | _create(); 296 | } 297 | 298 | ~legacy_memory_pool() { 299 | _destroy(); 300 | } 301 | 302 | template 303 | T *alloc() { 304 | return static_cast(_alloc(sizeof(T))); 305 | } 306 | 307 | template 308 | T *alloc_array(uint count) { 309 | return static_cast(_alloc(count * sizeof(T))); 310 | } 311 | 312 | template 313 | T *alloc_args(const TArgs &&... args) { 314 | T *obj = static_cast(_alloc(sizeof(T))); 315 | (*obj)(std::forward(args)...); 316 | return obj; 317 | } 318 | 319 | template 320 | T *alloc_array_args(uint count, const TArgs &&... args) { 321 | T *obj = static_cast(_alloc(count * sizeof(T))); 322 | for (uint i = 0; i < count; ++i) { 323 | (obj[i])(std::forward(args)...); 324 | } 325 | return obj; 326 | } 327 | 328 | template 329 | T *realloc(T *obj, uint newSize) { 330 | return static_cast(_realloc(obj, newSize, sizeof(T))); 331 | } 332 | 333 | template 334 | bool free(T *obj) { 335 | return _free(obj); 336 | } 337 | 338 | template 339 | bool free_array(T *obj) { 340 | return _free(obj); 341 | } 342 | 343 | size_t available() const { 344 | return block_available_size; 345 | } 346 | 347 | void clear() { 348 | _init(); 349 | } 350 | 351 | void dump(std::ostream &os) { 352 | auto ptr = block_head; 353 | printf("[DEBUG] MEM | Available: %lu\n", block_available_size); 354 | if (ptr->next == ptr) { 355 | if (block_get_flag(ptr, BLOCK_USING)) { 356 | dump_block(ptr, os); 357 | } else { 358 | os << "[DEBUG] MEM | All Free." << std::endl; 359 | } 360 | } else { 361 | dump_block(ptr, os); 362 | ptr = ptr->next; 363 | while (ptr != block_head) { 364 | dump_block(ptr, os); 365 | ptr = ptr->next; 366 | } 367 | } 368 | } 369 | 370 | private: 371 | static void dump_block(block *blk, std::ostream &os) { 372 | printf("[DEBUG] MEM | [%p-%p] Size: %8lu, State: %s\n", blk, blk + blk->size, blk->size, block_get_flag(blk, BLOCK_USING) ? "Using" : "Free"); 373 | } 374 | }; 375 | 376 | // 基于原始内存池的内存分配策略 377 | template, size_t DefaultSize = Allocator::DEFAULT_ALLOC_BLOCK_SIZE> 378 | class legacy_memory_pool_allocator { 379 | legacy_memory_pool memory_pool; 380 | 381 | public: 382 | static const size_t DEFAULT_ALLOC_BLOCK_SIZE = DefaultSize - 2; 383 | 384 | template 385 | T *__alloc() { 386 | return memory_pool.template alloc(); 387 | } 388 | 389 | template 390 | T *__alloc_array(uint count) { 391 | return memory_pool.template alloc_array(count); 392 | } 393 | 394 | template 395 | T *__alloc_args(const TArgs &&... args) { 396 | return memory_pool.template alloc_args(std::forward(args)...); 397 | } 398 | 399 | template 400 | T *__alloc_array_args(uint count, const TArgs &&... args) { 401 | return memory_pool.template alloc_array_args(count, std::forward(args)...); 402 | } 403 | 404 | template 405 | T *__realloc(T *t, uint newSize) { 406 | return memory_pool.template realloc(t, newSize); 407 | } 408 | 409 | template 410 | bool __free(T *t) { 411 | return memory_pool.free(t); 412 | } 413 | 414 | template 415 | bool __free_array(T *t) { 416 | return memory_pool.free_array(t); 417 | } 418 | }; 419 | 420 | template::DEFAULT_ALLOC_BLOCK_SIZE> 421 | using memory_pool = legacy_memory_pool, DefaultSize>>; 422 | } 423 | 424 | #endif //CLIBLISP_MEMORY_H 425 | -------------------------------------------------------------------------------- /memory_gc.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Author: bajdcc 4 | // 5 | 6 | #ifndef CLIBLISP_MEMORY_GC_H 7 | #define CLIBLISP_MEMORY_GC_H 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "memory.h" 14 | #include "types.h" 15 | 16 | #define SHOW_GC 1 17 | 18 | namespace clib { 19 | 20 | template::DEFAULT_ALLOC_BLOCK_SIZE> 21 | class legacy_memory_gc { 22 | public: 23 | struct gc_header { 24 | gc_header *child; 25 | gc_header *next; 26 | gc_header *prev; 27 | }; 28 | 29 | using memory_pool_t = memory_pool; 30 | using blk_t = typename memory_pool_t::block; 31 | static const auto BLOCK_MARK = memory_pool_t::BLOCK_MARK; 32 | static const auto GC_HEADER_SIZE = sizeof(gc_header); 33 | static const auto GC_BLOCK_SIZE = sizeof(blk_t); 34 | 35 | legacy_memory_gc() { 36 | stack_roots.push_back(&stack_head); 37 | } 38 | 39 | static gc_header *header(void *ptr) { 40 | return static_cast((void *) (static_cast(ptr) - GC_HEADER_SIZE)); 41 | } 42 | 43 | static void *data(void *ptr) { 44 | return static_cast(static_cast(ptr) + GC_HEADER_SIZE); 45 | } 46 | 47 | static blk_t *block(void *ptr) { 48 | return static_cast((void *) (static_cast(ptr) - GC_BLOCK_SIZE)); 49 | } 50 | 51 | static void set_marked(void *ptr, bool value) { 52 | auto blk = block(ptr); 53 | if (value) { 54 | blk->flag |= 1 << BLOCK_MARK; 55 | } else { 56 | blk->flag &= ~(1 << BLOCK_MARK); 57 | } 58 | } 59 | 60 | static uint is_marked(void *ptr) { 61 | auto blk = block(ptr); 62 | return (blk->flag & (1 << BLOCK_MARK)) != 0 ? 1 : 0; 63 | } 64 | 65 | template 66 | T *alloc() { 67 | return static_cast(alloc(sizeof(T))); 68 | } 69 | 70 | void *alloc(size_t size) { 71 | auto new_node = static_cast((void *) memory.template alloc_array(GC_HEADER_SIZE + size)); 72 | assert(new_node); 73 | memset(new_node, 0, GC_HEADER_SIZE + size); 74 | auto &top = stack_roots.back(); 75 | if (top->child) { 76 | new_node->prev = top->child->prev; 77 | new_node->prev->next = new_node; 78 | new_node->next = top->child; 79 | new_node->next->prev = new_node; 80 | } else { 81 | top->child = new_node; 82 | new_node->next = new_node->prev = new_node; 83 | } 84 | objects.push_back(new_node); 85 | return (void *) (static_cast((void *) new_node) + GC_HEADER_SIZE); 86 | } 87 | 88 | void push_root(void *ptr) { 89 | stack_roots.push_back(header(ptr)); 90 | } 91 | 92 | void pop_root() { 93 | stack_roots.pop_back(); 94 | } 95 | 96 | void link(void *parent, void *ptr) { 97 | auto _parent = header(parent); 98 | auto _ptr = header(ptr); 99 | _link(_parent, _ptr); 100 | } 101 | 102 | void unlink(void *parent, void *ptr) { 103 | auto _parent = header(parent); 104 | auto _ptr = header(ptr); 105 | _unlink(_parent, _ptr); 106 | } 107 | 108 | void unlink(void *ptr) { 109 | auto _parent = stack_roots.back(); 110 | auto _ptr = header(ptr); 111 | _unlink(_parent, _ptr); 112 | } 113 | 114 | void protect(void *ptr) { 115 | roots.insert(header(ptr)); 116 | } 117 | 118 | void unprotect(void *ptr) { 119 | roots.erase(ptr); 120 | } 121 | 122 | void gc() { 123 | mark(); 124 | sweep(); 125 | } 126 | 127 | size_t count() const { 128 | return objects.size(); 129 | } 130 | 131 | void set_callback(std::function callback) { 132 | gc_callback = callback; 133 | } 134 | 135 | void set_dump_callback(std::function callback) { 136 | dump_callback = callback; 137 | } 138 | 139 | void save_stack() { 140 | saved_stack = stack_roots.size(); 141 | } 142 | 143 | void restore_stack() { 144 | stack_roots.erase(stack_roots.begin() + saved_stack, stack_roots.end()); 145 | } 146 | 147 | void dump(std::ostream &os) { 148 | memory.dump(os); 149 | dump_tree(); 150 | } 151 | 152 | void clear() { 153 | for (auto &obj : objects) { 154 | gc_callback(data(obj)); 155 | } 156 | objects.clear(); 157 | stack_roots.clear(); 158 | stack_roots.push_back(&stack_head); 159 | memory.clear(); 160 | saved_stack = 0; 161 | stack_head = {nullptr, nullptr, nullptr}; 162 | } 163 | 164 | private: 165 | void mark_children(gc_header *ptr) { 166 | if (ptr->child) { 167 | auto i = ptr->child; 168 | set_marked(i, true); 169 | mark_children(i); 170 | i = i->next; 171 | while (i != ptr->child) { 172 | set_marked(i, true); 173 | mark_children(i); 174 | i = i->next; 175 | } 176 | } 177 | } 178 | 179 | void _link(gc_header *parent, gc_header *ptr) { 180 | if (parent->child) { 181 | ptr->prev = parent->child->prev; 182 | ptr->prev->next = ptr; 183 | ptr->next = parent->child; 184 | ptr->next->prev = ptr; 185 | } else { 186 | parent->child = ptr; 187 | ptr->next = ptr->prev = ptr; 188 | } 189 | } 190 | 191 | void _unlink(gc_header *parent, gc_header *ptr) { 192 | if (parent->child) { 193 | auto i = parent->child; 194 | if (i->next == i) { 195 | parent->child = nullptr; 196 | return; 197 | } 198 | if (i == ptr) { 199 | parent->child = i->next; 200 | i->prev->next = parent->child; 201 | i->next->prev = i->prev; 202 | return; 203 | } else { 204 | i = i->next; 205 | } 206 | while (i != parent->child) { 207 | if (i->next == ptr) { 208 | if (i->next->next == parent->child) { 209 | i->next = parent->child; 210 | parent->child->prev = i; 211 | } else { 212 | i->next->next->prev = i; 213 | i->next = i->next->next; 214 | } 215 | break; 216 | } else { 217 | i = i->next; 218 | } 219 | } 220 | } 221 | } 222 | 223 | void mark() { 224 | stack_roots.front()->child = nullptr; 225 | for (auto &root : roots) { 226 | set_marked(root, true); 227 | mark_children(root); 228 | } 229 | for (auto it = stack_roots.begin() + 1; it != stack_roots.end(); it++) { 230 | auto &root = *it; 231 | set_marked(root, true); 232 | mark_children(root); 233 | } 234 | } 235 | 236 | void sweep() { 237 | for (auto it = objects.begin(); it != objects.end();) { 238 | auto &obj = *it; 239 | if (is_marked(obj)) { 240 | set_marked(obj, false); 241 | it++; 242 | } else { 243 | #if SHOW_GC 244 | if (gc_callback) 245 | gc_callback((void *) data((void *) obj)); 246 | #endif 247 | memory.free(obj); 248 | it = objects.erase(it); 249 | } 250 | } 251 | } 252 | 253 | void dump_children(gc_header *ptr, int level) { 254 | dump_callback(data(ptr), level); 255 | if (ptr->child) { 256 | auto i = ptr->child; 257 | dump_children(i, level + 1); 258 | i = i->next; 259 | while (i != ptr->child) { 260 | dump_children(i, level + 1); 261 | i = i->next; 262 | } 263 | } 264 | } 265 | 266 | void dump_tree() { 267 | if (!dump_callback) 268 | return; 269 | for (auto &root : roots) { 270 | dump_children(root, 0); 271 | } 272 | for (auto it = stack_roots.begin() + 1; it != stack_roots.end(); it++) { 273 | auto &root = *it; 274 | dump_children(root, 0); 275 | } 276 | } 277 | 278 | private: 279 | size_t saved_stack{0}; 280 | gc_header stack_head{nullptr, nullptr, nullptr}; 281 | std::function gc_callback{[](void *) {}}; 282 | std::function dump_callback{[](void *, int) {}}; 283 | std::vector objects; 284 | std::vector stack_roots; 285 | std::unordered_set roots; 286 | memory_pool memory; 287 | }; 288 | 289 | template::DEFAULT_ALLOC_BLOCK_SIZE> 290 | using memory_pool_gc = legacy_memory_gc; 291 | } 292 | 293 | #endif //CLIBLISP_MEMORY_GC_H 294 | -------------------------------------------------------------------------------- /screenshots/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bajdcc/cliblisp/40038bc76119c1ea73f93b1053cb8aecb6ab37a1/screenshots/1.png -------------------------------------------------------------------------------- /test.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Project: cliblisp 3 | // Created by bajdcc 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include "cparser.h" 10 | #include "cvm.h" 11 | 12 | #define TEST(a,b) std::make_tuple(a, b) 13 | 14 | int main(int argc, char *argv[]) { 15 | clib::cvm vm; 16 | auto codes = std::vector>{ 17 | TEST("+ 1 2", "3"), 18 | TEST("* 1 2 3 4 5 6", "720"), 19 | TEST("- 8 4 2 9 8 ", "-15"), 20 | TEST(R"(+ "Hello" " " "world!")", R"("Hello world!")"), 21 | TEST("eval 5", "5"), 22 | TEST("eval `(+ 1 2)", "3"), 23 | TEST("eval (+ 1 2)", "3"), 24 | TEST("`a", "`a"), 25 | TEST("`(a b c)", "`(a b c)"), 26 | TEST(R"(+ "Project: " __project__ ", author: " __author__)", R"("Project: cliblisp, author: bajdcc")"), 27 | TEST("+", R"()"), 28 | // tests for lis.py 29 | TEST(R"(quote (testing 1 2.0 -3.14e159))", "`(testing 1 2 -3.14e+159)"), 30 | TEST(R"(+ 2 2)", "4"), 31 | TEST(R"(+ (* 2 100) (* 1 10))", "210"), 32 | TEST(R"(if (> 6 5) `(+ 1 1) `(+ 2 2))", "2"), 33 | TEST(R"(if (< 6 5) `(+ 1 1) `(+ 2 2))", "4"), 34 | TEST(R"(def `x 3)", "3"), 35 | TEST(R"(x)", "3"), 36 | TEST(R"(+ x x)", "6"), 37 | TEST(R"(begin (def `x 1) (def `x (+ x 1)) (+ x 1))", "3"), 38 | TEST(R"((\ `(x) `(+ x x)) 5)", "10"), 39 | TEST(R"(def `twice (\ `(x) `(* 2 x)))", R"()"), 40 | TEST(R"(twice 5)", "10"), 41 | TEST(R"(def `compose (\ `(f g) `(\ `(x) `(f (g x)))))", R"()"), 42 | TEST(R"((compose list twice) 5)", "`10"), 43 | TEST(R"(def `repeat (\ `(f) `(compose f f)))", ""), 44 | TEST(R"((repeat twice) 5)", "20"), 45 | TEST(R"((repeat (repeat twice)) 5)", "80"), 46 | TEST(R"(def `fact (\ `(n) `(if (<= n 1) `1 `(* n (fact (- n 1))))))", 47 | R"()"), 48 | TEST(R"(fact 3)", "6"), 49 | TEST(R"(fact 50)", "30414093201713378043612608166064768844377641568960512000000000000"), 50 | TEST(R"(fact 12)", "479001600"), 51 | TEST(R"(def `abs (\ `(n) `((if (> n 0) `+ `-) 0 n)))", " n 0) `+ `-) 0 n)>"), 52 | TEST(R"(abs -3)", "3"), 53 | TEST(R"(list (abs -3) (abs 0) (abs 3))", "`(3 0 3)"), 54 | TEST(R"(def `combine (\ `(f) 55 | `(\ `(x y) 56 | `(if (null? x) `nil 57 | `(f (list (car x) (car y)) 58 | ((combine f) (cdr x) (cdr y)))))))", 59 | R"()"), 60 | TEST(R"(def `zip (combine cons))", 61 | ""), 62 | TEST(R"(zip (list 1 2 3 4) (list 5 6 7 8))", "`(`(1 5) `(2 6) `(3 7) `(4 8))"), 63 | TEST(R"(def `riff-shuffle (\ `(deck) `(begin 64 | (def `take (\ `(n seq) `(if (<= n 0) `nil `(cons (car seq) (take (- n 1) (cdr seq)))))) 65 | (def `drop (\ `(n seq) `(if (<= n 0) `seq `(drop (- n 1) (cdr seq))))) 66 | (def `mid (\ `(seq) `(/ (len seq) 2))) 67 | ((combine append) (take (mid deck) deck) (drop (mid deck) deck)))))", 68 | R"()"), 73 | TEST(R"(riff-shuffle (list 1 2 3 4 5 6 7 8))", "`(1 5 2 6 3 7 4 8)"), 74 | TEST(R"((repeat riff-shuffle) (list 1 2 3 4 5 6 7 8))", "`(1 3 5 7 2 4 6 8)"), 75 | TEST(R"(riff-shuffle (riff-shuffle (riff-shuffle (list 1 2 3 4 5 6 7 8))))", "`(1 2 3 4 5 6 7 8)"), 76 | TEST(R"(def `apply (\ `(item L) `(eval (cons item L))))", ""), 77 | TEST(R"(apply + `(1 2 3))", "6"), 78 | TEST(R"(def `sum (\ `n `(if (< n 2) `1 `(+ n (sum (- n 1))))))", 79 | ""), 80 | TEST(R"(sum 10)", "55"), 81 | TEST(R"(def `Y (\ `f `((\ `self `(f (\ `x `((self self) x)))) (\ `self `(f (\ `x `((self self) x)))))))", 82 | R"()"), 83 | TEST(R"(def `Y_fib (\ `f `(\ `n `(if (<= n 2) `1 `(+ (f (- n 1)) (f (- n 2)))))))", 84 | R"()"), 85 | TEST(R"((Y Y_fib) 5)", "5"), 86 | TEST(R"((def `range (\ `(a b) `(if (== a b) `nil `(cons a (range (+ a 1) b))))))", 87 | ""), 88 | TEST(R"(range 1 10)", "`(1 2 3 4 5 6 7 8 9)"), 89 | TEST(R"(apply + (range 1 10))", "45"), 90 | TEST(R"(def `map (\ `(f L) `(if (null? L) `nil `(cons (f (car L)) (map f (cdr L))))))", 91 | ""), 92 | TEST(R"(map + (range 1 10))", "`(2 3 4 5 6 7 8 9 10)"), 93 | }; 94 | auto i = 0; 95 | auto failed = 0; 96 | std::stringstream ss; 97 | std::string ast, out; 98 | int c = 0; 99 | for (auto &code : codes) { 100 | vm.save(); 101 | try { 102 | ast = std::get<0>(code); 103 | out.clear(); 104 | clib::cparser p; 105 | auto root = p.parse(ast); 106 | //clib::cast::print(root, 0, std::cout); 107 | vm.prepare(root); 108 | auto val = vm.run(INT32_MAX, c); 109 | std::cout << "TEST #" << (++i) << "> "; 110 | ss.str(""); 111 | clib::cast::print(root, 0, ss); 112 | ast = ss.str(); 113 | ss.str(""); 114 | clib::cvm::print(val, ss); 115 | out = ss.str(); 116 | auto &right = std::get<1>(code); 117 | if (out == right) { 118 | std::cout << "[PASSED] " << ast << " => " << out; 119 | } else { 120 | std::cout << "[ERROR ] " << ast << " => " << out << " REQUIRE: " << right; 121 | failed++; 122 | } 123 | std::cout << std::endl; 124 | vm.gc(); 125 | } catch (const std::exception &e) { 126 | failed++; 127 | std::cout << "TEST #" << (++i) << "> [ERROR ] " << ast << std::endl; 128 | //printf("RUNTIME ERROR: %s\n", e.what()); 129 | vm.restore(); 130 | vm.gc(); 131 | } 132 | } 133 | std::cout << "==== ALL TEST PASSED [" << (i - failed) << "/" << i << "] ====" << std::endl; 134 | } 135 | -------------------------------------------------------------------------------- /types.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Project: CMiniLang 3 | // Author: bajdcc 4 | // 5 | 6 | #include 7 | #include 8 | #include "types.h" 9 | 10 | namespace clib { 11 | std::tuple lexer_string_list[] = { 12 | std::make_tuple(l_none, "none", 0), 13 | std::make_tuple(l_error, "error", 0), 14 | std::make_tuple(l_char, "char", 1), 15 | std::make_tuple(l_uchar, "uchar", 2), 16 | std::make_tuple(l_short, "short", 3), 17 | std::make_tuple(l_ushort, "ushort", 4), 18 | std::make_tuple(l_int, "int", 5), 19 | std::make_tuple(l_uint, "uint", 6), 20 | std::make_tuple(l_long, "long", 7), 21 | std::make_tuple(l_ulong, "ulong", 8), 22 | std::make_tuple(l_float, "float", 9), 23 | std::make_tuple(l_double, "double", 10), 24 | std::make_tuple(l_operator, "operator", 0), 25 | std::make_tuple(l_keyword, "keyword", 0), 26 | std::make_tuple(l_identifier, "identifier", 0), 27 | std::make_tuple(l_string, "string", 0), 28 | std::make_tuple(l_comment, "comment", 0), 29 | std::make_tuple(l_space, "space", 0), 30 | std::make_tuple(l_newline, "newline", 0), 31 | std::make_tuple(l_end, "EOF", 0), 32 | }; 33 | 34 | const string_t &lexer_typestr(lexer_t type) { 35 | assert(type >= l_none && type <= l_end); 36 | return std::get<1>(lexer_string_list[type]); 37 | } 38 | 39 | int lexer_prior(lexer_t type) { 40 | assert(type >= l_none && type <= l_end); 41 | return std::get<2>(lexer_string_list[type]); 42 | } 43 | 44 | std::tuple keyword_string_list[] = { 45 | std::make_tuple(k__start, "@START"), 46 | std::make_tuple(k_auto, "auto"), 47 | std::make_tuple(k_bool, "bool"), 48 | std::make_tuple(k_break, "break"), 49 | std::make_tuple(k_case, "case"), 50 | std::make_tuple(k_char, "char"), 51 | std::make_tuple(k_const, "const"), 52 | std::make_tuple(k_continue, "continue"), 53 | std::make_tuple(k_default, "default"), 54 | std::make_tuple(k_do, "do"), 55 | std::make_tuple(k_double, "double"), 56 | std::make_tuple(k_else, "else"), 57 | std::make_tuple(k_enum, "enum"), 58 | std::make_tuple(k_extern, "extern"), 59 | std::make_tuple(k_false, "false"), 60 | std::make_tuple(k_float, "float"), 61 | std::make_tuple(k_for, "for"), 62 | std::make_tuple(k_goto, "goto"), 63 | std::make_tuple(k_if, "if"), 64 | std::make_tuple(k_int, "int"), 65 | std::make_tuple(k_long, "long"), 66 | std::make_tuple(k_register, "register"), 67 | std::make_tuple(k_return, "return"), 68 | std::make_tuple(k_short, "short"), 69 | std::make_tuple(k_signed, "signed"), 70 | std::make_tuple(k_sizeof, "sizeof"), 71 | std::make_tuple(k_static, "static"), 72 | std::make_tuple(k_struct, "struct"), 73 | std::make_tuple(k_switch, "switch"), 74 | std::make_tuple(k_true, "true"), 75 | std::make_tuple(k_typedef, "typedef"), 76 | std::make_tuple(k_union, "union"), 77 | std::make_tuple(k_unsigned, "unsigned"), 78 | std::make_tuple(k_void, "void"), 79 | std::make_tuple(k_volatile, "volatile"), 80 | std::make_tuple(k_while, "while"), 81 | std::make_tuple(k__end, "@END"), 82 | }; 83 | 84 | const string_t &lexer_keywordstr(keyword_t type) { 85 | assert(type > k__start && type < k__end); 86 | return std::get<1>(keyword_string_list[type - k__start]); 87 | } 88 | 89 | std::tuple operator_string_list[] = { 90 | std::make_tuple(op__start, "@START", "@START", NOP, 9999), 91 | std::make_tuple(op_assign, "=", "assign", NOP, 1401), 92 | std::make_tuple(op_equal, "==", "equal", EQ, 701), 93 | std::make_tuple(op_plus, "+", "plus", ADD, 401), 94 | std::make_tuple(op_plus_assign, "+=", "plus_assign", ADD, 1405), 95 | std::make_tuple(op_minus, "-", "minus", SUB, 402), 96 | std::make_tuple(op_minus_assign, "-=", "minus_assign", SUB, 1406), 97 | std::make_tuple(op_times, "*", "times", MUL, 302), 98 | std::make_tuple(op_times_assign, "*=", "times_assign", MUL, 1403), 99 | std::make_tuple(op_divide, "/", "divide", DIV, 301), 100 | std::make_tuple(op_div_assign, "/=", "div_assign", DIV, 1402), 101 | std::make_tuple(op_bit_and, "&", "bit_and", AND, 801), 102 | std::make_tuple(op_and_assign, "&=", "and_assign", AND, 1409), 103 | std::make_tuple(op_bit_or, "|", "bit_or", OR, 1001), 104 | std::make_tuple(op_or_assign, "|=", "or_assign", OR, 1411), 105 | std::make_tuple(op_bit_xor, "^", "bit_xor", XOR, 901), 106 | std::make_tuple(op_xor_assign, "^=", "xor_assign", XOR, 1410), 107 | std::make_tuple(op_mod, "%", "mod", MOD, 303), 108 | std::make_tuple(op_mod_assign, "%=", "mod_assign", MOD, 1404), 109 | std::make_tuple(op_less_than, "<", "less_than", LT, 603), 110 | std::make_tuple(op_less_than_or_equal, "<=", "less_than_or_equal", LE, 604), 111 | std::make_tuple(op_greater_than, ">", "greater_than", GT, 601), 112 | std::make_tuple(op_greater_than_or_equal, ">=", "greater_than_or_equal", GE, 602), 113 | std::make_tuple(op_logical_not, "!", "logical_not", NOP, 207), 114 | std::make_tuple(op_not_equal, "!=", "not_equal", NE, 702), 115 | std::make_tuple(op_escape, "\\", "escape", NOP, 9000), 116 | std::make_tuple(op_query, "?", "query", NOP, 1301), 117 | std::make_tuple(op_bit_not, "~", "bit_not", NOP, 208), 118 | std::make_tuple(op_lparan, "(", "lparan", NOP, 102), 119 | std::make_tuple(op_rparan, ")", "rparan", NOP, 102), 120 | std::make_tuple(op_lbrace, "{", "lbrace", NOP, 9000), 121 | std::make_tuple(op_rbrace, "}", "rbrace", NOP, 9000), 122 | std::make_tuple(op_lsquare, "[", "lsquare", NOP, 101), 123 | std::make_tuple(op_rsquare, "]", "rsquare", NOP, 101), 124 | std::make_tuple(op_comma, ",", "comma", NOP, 1501), 125 | std::make_tuple(op_dot, ".", "dot", NOP, 103), 126 | std::make_tuple(op_semi, ";", "semi", NOP, 9000), 127 | std::make_tuple(op_colon, ":", "colon", NOP, 1302), 128 | std::make_tuple(op_plus_plus, "++", "plus_plus", ADD, 203), 129 | std::make_tuple(op_minus_minus, "--", "minus_minus", SUB, 204), 130 | std::make_tuple(op_logical_and, "&&", "logical_and", JZ, 1101), 131 | std::make_tuple(op_logical_or, "||", "logical_or", JNZ, 1201), 132 | std::make_tuple(op_pointer, "->", "pointer", NOP, 104), 133 | std::make_tuple(op_left_shift, "<<", "left_shift", SHL, 501), 134 | std::make_tuple(op_right_shift, ">>", "right_shift", SHR, 502), 135 | std::make_tuple(op_left_shift_assign, "<<=", "left_shift_assign", SHL, 1407), 136 | std::make_tuple(op_right_shift_assign, ">>=", "right_shift_assign", SHR, 1408), 137 | std::make_tuple(op_ellipsis, "...", "ellipsis", NOP, 9000), 138 | std::make_tuple(op_quote, "`", "quote", NOP, 9000), 139 | std::make_tuple(op__end, "@END", "@END", NOP, 9999), 140 | }; 141 | 142 | const string_t &lexer_opstr(operator_t type) { 143 | assert(type > op__start && type <= op__end); 144 | return std::get<1>(operator_string_list[type]); 145 | } 146 | 147 | const string_t &lexer_opnamestr(operator_t type) { 148 | assert(type > op__start && type <= op__end); 149 | return std::get<2>(operator_string_list[type]); 150 | } 151 | 152 | string_t err_string_list[] = { 153 | "@START", 154 | "#E !char!", 155 | "#E !operator!", 156 | "#E !digit!", 157 | "#E !string!", 158 | "@END", 159 | }; 160 | 161 | const string_t &lexer_errstr(error_t type) { 162 | assert(type > e__start && type < e__end); 163 | return err_string_list[type]; 164 | } 165 | 166 | int lexer_operatorpred(operator_t type) { 167 | assert(type > op__start && type < op__end); 168 | return std::get<4>(operator_string_list[type]); 169 | } 170 | 171 | int lexer_op2ins(operator_t type) { 172 | assert(type > op__start && type < op__end); 173 | return std::get<3>(operator_string_list[type]); 174 | } 175 | 176 | std::tuple coll_string_list[] = { 177 | std::make_tuple(c_program, "program"), 178 | std::make_tuple(c_list, "list"), 179 | std::make_tuple(c_sexpr, "sexpr"), 180 | std::make_tuple(c_qexpr, "qexpr"), 181 | std::make_tuple(c_object, "object"), 182 | }; 183 | 184 | const string_t &coll_str(coll_t t) { 185 | assert(t >= c_program && t <= c_object); 186 | return std::get<1>(coll_string_list[t]); 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /types.h: -------------------------------------------------------------------------------- 1 | // 2 | // Project: CMiniLang 3 | // Author: bajdcc 4 | // 5 | 6 | #ifndef CMINILANG_TYPES_H 7 | #define CMINILANG_TYPES_H 8 | 9 | #include 10 | #include 11 | 12 | using string_t = std::string; 13 | template using map_t = std::unordered_map; 14 | 15 | namespace clib { 16 | #if __APPLE__ && __MACH__ 17 | using int8 = int8_t; 18 | using uint8 = uint8_t; 19 | using int16 = int16_t; 20 | using uint16 = uint16_t; 21 | using int32 = int32_t; 22 | using uint32 = uint32_t; 23 | using int64 = int64_t; 24 | using uint64 = uint64_t; 25 | #else 26 | using int8 = signed __int8; 27 | using uint8 = unsigned __int8; 28 | using int16 = signed __int16; 29 | using uint16 = unsigned __int16; 30 | using int32 = signed __int32; 31 | using uint32 = unsigned __int32; 32 | using int64 = signed __int64; 33 | using uint64 = unsigned __int64; 34 | #endif 35 | 36 | using sint = signed int; 37 | using uint = unsigned int; 38 | using slong = long long; 39 | using ulong = unsigned long long; 40 | 41 | using byte = uint8; 42 | using decimal = double; // 浮点类型 43 | 44 | enum lexer_t { 45 | l_none, 46 | l_error, 47 | l_char, 48 | l_uchar, 49 | l_short, 50 | l_ushort, 51 | l_int, 52 | l_uint, 53 | l_long, 54 | l_ulong, 55 | l_float, 56 | l_double, 57 | l_operator, 58 | l_keyword, 59 | l_identifier, 60 | l_string, 61 | l_comment, 62 | l_space, 63 | l_newline, 64 | l_end, 65 | }; 66 | 67 | enum keyword_t { 68 | k__start, 69 | k_auto, 70 | k_bool, 71 | k_break, 72 | k_case, 73 | k_char, 74 | k_const, 75 | k_continue, 76 | k_default, 77 | k_do, 78 | k_double, 79 | k_else, 80 | k_enum, 81 | k_extern, 82 | k_false, 83 | k_float, 84 | k_for, 85 | k_goto, 86 | k_if, 87 | k_int, 88 | k_long, 89 | k_register, 90 | k_return, 91 | k_short, 92 | k_signed, 93 | k_sizeof, 94 | k_static, 95 | k_struct, 96 | k_switch, 97 | k_true, 98 | k_typedef, 99 | k_union, 100 | k_unsigned, 101 | k_void, 102 | k_volatile, 103 | k_while, 104 | k__end 105 | }; 106 | 107 | enum operator_t { 108 | op__start, 109 | op_assign, 110 | op_equal, 111 | op_plus, 112 | op_plus_assign, 113 | op_minus, 114 | op_minus_assign, 115 | op_times, 116 | op_times_assign, 117 | op_divide, 118 | op_div_assign, 119 | op_bit_and, 120 | op_and_assign, 121 | op_bit_or, 122 | op_or_assign, 123 | op_bit_xor, 124 | op_xor_assign, 125 | op_mod, 126 | op_mod_assign, 127 | op_less_than, 128 | op_less_than_or_equal, 129 | op_greater_than, 130 | op_greater_than_or_equal, 131 | op_logical_not, 132 | op_not_equal, 133 | op_escape, 134 | op_query, 135 | op_bit_not, 136 | op_lparan, 137 | op_rparan, 138 | op_lbrace, 139 | op_rbrace, 140 | op_lsquare, 141 | op_rsquare, 142 | op_comma, 143 | op_dot, 144 | op_semi, 145 | op_colon, 146 | op_plus_plus, 147 | op_minus_minus, 148 | op_logical_and, 149 | op_logical_or, 150 | op_pointer, 151 | op_left_shift, 152 | op_right_shift, 153 | op_left_shift_assign, 154 | op_right_shift_assign, 155 | op_ellipsis, 156 | op_quote, 157 | op__end, 158 | }; 159 | 160 | enum error_t { 161 | e__start, 162 | e_invalid_char, 163 | e_invalid_operator, 164 | e_invalid_digit, 165 | e_invalid_string, 166 | e__end 167 | }; 168 | 169 | enum ins_t { 170 | NOP, LEA, IMM, IMX, JMP, CALL, JZ, JNZ, ENT, ADJ, LEV, LI, SI, LC, SC, PUSH, LOAD, 171 | OR, XOR, AND, EQ, NE, LT, GT, LE, GE, SHL, SHR, ADD, SUB, MUL, DIV, MOD, 172 | OPEN, READ, CLOS, PRTF, MALC, MSET, MCMP, TRAC, TRAN, EXIT 173 | }; 174 | 175 | template 176 | struct base_t { 177 | using type = void *; 178 | }; 179 | template 180 | struct base_lexer_t { 181 | static const lexer_t type = l_none; 182 | }; 183 | 184 | #define DEFINE_BASE_TYPE(t, obj) \ 185 | template<> \ 186 | struct base_t \ 187 | { \ 188 | using type = obj; \ 189 | static const int size = sizeof(obj); \ 190 | }; 191 | 192 | DEFINE_BASE_TYPE(l_char, char) 193 | DEFINE_BASE_TYPE(l_uchar, unsigned char) 194 | DEFINE_BASE_TYPE(l_short, short) 195 | DEFINE_BASE_TYPE(l_ushort, unsigned short) 196 | DEFINE_BASE_TYPE(l_int, int) 197 | DEFINE_BASE_TYPE(l_uint, unsigned int) 198 | DEFINE_BASE_TYPE(l_long, slong) 199 | DEFINE_BASE_TYPE(l_ulong, ulong) 200 | DEFINE_BASE_TYPE(l_float, float) 201 | DEFINE_BASE_TYPE(l_double, double) 202 | DEFINE_BASE_TYPE(l_keyword, keyword_t) 203 | DEFINE_BASE_TYPE(l_operator, operator_t) 204 | DEFINE_BASE_TYPE(l_identifier, string_t) 205 | DEFINE_BASE_TYPE(l_string, string_t) 206 | DEFINE_BASE_TYPE(l_comment, string_t) 207 | DEFINE_BASE_TYPE(l_space, uint) 208 | DEFINE_BASE_TYPE(l_newline, uint) 209 | DEFINE_BASE_TYPE(l_error, error_t) 210 | #undef DEFINE_BASE_TYPE 211 | 212 | #define DEFINE_CONV_TYPE(t, obj) \ 213 | template<> \ 214 | struct base_lexer_t \ 215 | { \ 216 | static const lexer_t type = t; \ 217 | }; 218 | 219 | DEFINE_CONV_TYPE(l_char, char) 220 | DEFINE_CONV_TYPE(l_uchar, unsigned char) 221 | DEFINE_CONV_TYPE(l_short, short) 222 | DEFINE_CONV_TYPE(l_ushort, unsigned short) 223 | DEFINE_CONV_TYPE(l_int, int) 224 | DEFINE_CONV_TYPE(l_uint, unsigned int) 225 | DEFINE_CONV_TYPE(l_long, slong) 226 | DEFINE_CONV_TYPE(l_ulong, ulong) 227 | DEFINE_CONV_TYPE(l_float, float) 228 | DEFINE_CONV_TYPE(l_double, double) 229 | DEFINE_CONV_TYPE(l_string, string_t) 230 | DEFINE_CONV_TYPE(l_error, error_t) 231 | #undef DEFINE_CONV_TYPE 232 | 233 | const string_t &lexer_typestr(lexer_t); 234 | const string_t &lexer_keywordstr(keyword_t); 235 | int lexer_prior(lexer_t); 236 | const string_t &lexer_opstr(operator_t); 237 | const string_t &lexer_opnamestr(operator_t); 238 | const string_t &lexer_errstr(error_t); 239 | int lexer_operatorpred(operator_t); 240 | int lexer_op2ins(operator_t); 241 | 242 | #define LEX_T(t) base_t::type 243 | #define LEX_CONV_T(t) base_lexer_t::type 244 | #define LEX_SIZEOF(t) base_t::size 245 | #define LEX_STRING(t) lexer_typestr(t) 246 | #define LEX_PRIOR(t) lexer_prior(t) 247 | 248 | #define KEYWORD_STRING(t) lexer_keywordstr(t) 249 | #define OPERATOR_STRING(t) lexer_opnamestr(t) 250 | #define OP_STRING(t) lexer_opstr(t) 251 | #define ERROR_STRING(t) lexer_errstr(t) 252 | 253 | #define OPERATOR_PRED(t) lexer_operatorpred(t) 254 | #define OP_INS(t) lexer_op2ins(t) 255 | 256 | enum coll_t { 257 | c_program, 258 | c_list, 259 | c_sexpr, 260 | c_qexpr, 261 | c_object, 262 | }; 263 | 264 | const string_t &coll_str(coll_t); 265 | 266 | #define COLL_STRING(t) coll_str(t) 267 | } 268 | 269 | #endif //CMINILANG_TYPES_H 270 | --------------------------------------------------------------------------------