├── LICENSE ├── README.md └── source ├── casmpure.c ├── casmpure.h ├── cencoding.c ├── cencoding.h ├── cinstruct.c ├── cinstruct.h ├── cinstset.c ├── cinstset.h ├── ckeywords.c ├── ckeywords.h ├── cloader.c ├── cloader.h ├── cparser.c ├── cparser.h ├── cscanner.c ├── cscanner.h ├── csynthesis.c ├── csynthesis.h ├── ctoken.c ├── ctoken.h ├── test1.c ├── testblit.asm └── testblit.c /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | Asmpure is a reimplementation and an enhancement of [SoftWire](http://gna.org/projects/softwire) in C for compiling assembly code. It can be used in projects to generate x86 machine code at run-time as an alternative to self-modifying code. Scripting languages might also benefit by using Asmpure as a JIT-compiler back-end. It also allows to eliminate jumps for variables which are temporarily constant during run-time, like for efficient graphics processing by constructing an optimised pipeline. Because of its possibility for 'instruction rewiring' by run-time conditional compilation, I named it "Asmpure". It is targeted only at developers with a good knowledge of C++ and x86 assembly. 4 | 5 | Examples 6 | ======== 7 | 8 | CrossProduct 9 | ------------ 10 | ```cpp 11 | /* 12 | void CrossProduct(float *V0, float *V1, float *V2) 13 | { 14 | V2[0] = V0[1] * V1[2] - V0[2] * V1[1]; 15 | V2[1] = V0[2] * V1[0] - V0[0] * V1[2]; 16 | V2[2] = V0[0] * V1[1] - V0[1] * V1[0]; 17 | }*/ 18 | 19 | const char *CrossProductAsm = 20 | " mov ecx, [esp+8]\n" 21 | " mov eax, [esp+4]\n" 22 | " mov edx, [esp+12]\n" 23 | "\n" 24 | " fld DWORD [ecx+8]\n" 25 | " fmul DWORD [eax+4]\n" 26 | " fld DWORD [eax+8]\n" 27 | " fmul DWORD [ecx+4]\n" 28 | " fsubp st1, st0\n" 29 | " fstp DWORD [edx]\n" 30 | "\n" 31 | " fld DWORD [eax+8]\n" 32 | " fmul DWORD [ecx]\n" 33 | " fld DWORD [eax]\n" 34 | " fmul DWORD [ecx+8]\n" 35 | " fsubp st1, st0\n" 36 | " fstp DWORD [edx+4]\n" 37 | "\n" 38 | " fld DWORD [eax]\n" 39 | " fmul DWORD [ecx+4]\n" 40 | " fld DWORD [ecx]\n" 41 | " fmul DWORD [eax+4]\n" 42 | " fsubp st1, st0\n" 43 | " fstp DWORD [edx+8]\n" 44 | " \n" 45 | " ret\n"; 46 | 47 | 48 | void testCrossProduct(void) 49 | { 50 | CAssembler *casm; 51 | int size, c; 52 | 53 | void (*CrossProductPtr)(float*, float*, float*); 54 | 55 | // create assembler 56 | casm = casm_create(); 57 | 58 | // append assembly source 59 | casm_source(casm, CrossProductAsm); 60 | 61 | // calculate size 62 | size = casm_compile(casm, NULL, 0); 63 | 64 | if (size < 0) { 65 | printf("compile error: %s\n", casm->error); 66 | casm_release(casm); 67 | return; 68 | } 69 | 70 | CrossProductPtr = (void (*)(float*, float*, float*))malloc(size); 71 | 72 | casm_compile(casm, (unsigned char*)CrossProductPtr, size); 73 | 74 | printf("==================== Cross Product ====================\n"); 75 | 76 | casm_dumpinst(casm, stdout); 77 | 78 | printf("\nExecute code (y/n)?\n\n"); 79 | 80 | do 81 | { 82 | c = getch(); 83 | } 84 | while(c != 'y' && c != 'n'); 85 | 86 | if(c == 'y') 87 | { 88 | float V0[3] = {1, 0, 0}; 89 | float V1[3] = {0, 1, 0}; 90 | float V2[3]; 91 | 92 | CrossProductPtr(V0, V1, V2); 93 | 94 | printf("output: (%.3f, %.3f, %.3f)\n\n", V2[0], V2[1], V2[2]); 95 | } 96 | 97 | free(CrossProductPtr); 98 | casm_release(casm); 99 | } 100 | ``` 101 | 102 | *output: (0.000, 0.000, 1.000) * 103 | 104 | Hello World 105 | ----------- 106 | ```cpp 107 | const char *HelloWorldAsm = 108 | " mov eax, [esp+8]\n" 109 | " push eax\n" 110 | " call DWORD [esp+8]\n" 111 | " pop ecx\n" 112 | " ret\n"; 113 | 114 | void testHelloWorld(void) 115 | { 116 | CAssembler *casm; 117 | int size, c; 118 | 119 | void (*HelloWorldPtr)(void*, const char*); 120 | 121 | // create assembler 122 | casm = casm_create(); 123 | 124 | // append assembly source 125 | casm_source(casm, HelloWorldAsm); 126 | 127 | // calculate size 128 | size = casm_compile(casm, NULL, 0); 129 | 130 | if (size < 0) { 131 | printf("compile error: %s\n", casm->error); 132 | casm_release(casm); 133 | return; 134 | } 135 | 136 | HelloWorldPtr = (void (*)(void*, const char*))malloc(size); 137 | 138 | casm_compile(casm, (unsigned char*)HelloWorldPtr, size); 139 | 140 | printf("==================== Hello World ====================\n"); 141 | 142 | casm_dumpinst(casm, stdout); 143 | 144 | printf("\nExecute code (y/n)?\n\n"); 145 | 146 | do 147 | { 148 | c = getch(); 149 | } 150 | while(c != 'y' && c != 'n'); 151 | 152 | if(c == 'y') 153 | { 154 | HelloWorldPtr((void*)printf, "Hello, World !!\n"); 155 | } 156 | 157 | free(HelloWorldPtr); 158 | casm_release(casm); 159 | } 160 | ``` 161 | *output: Hello, World !! * 162 | 163 | Alpha Blend 164 | ----------- 165 | ```cpp 166 | const char *AlphaBlendAsm = 167 | "PROC C1:DWORD, C2:DWORD, A:DWORD\n" 168 | " movd mm0, A\n" 169 | " punpcklwd mm0, mm0\n" 170 | " punpckldq mm0, mm0\n" 171 | " pcmpeqb mm7, mm7\n" 172 | " psubw mm7, mm0\n" 173 | " \n" 174 | " punpcklbw mm1, C1\n" 175 | " psrlw mm1, 8\n" 176 | " punpcklbw mm2, C2\n" 177 | " psrlw mm2, 8\n" 178 | " \n" 179 | " pmullw mm1, mm7\n" 180 | " pmullw mm2, mm0\n" 181 | " paddw mm1, mm2\n" 182 | " \n" 183 | " psrlw mm1, 8\n" 184 | " packuswb mm1, mm1\n" 185 | " movd eax, mm1\n" 186 | " emms\n" 187 | " ret\n" 188 | "ENDP\n"; 189 | 190 | 191 | void testAlphaBlend(void) 192 | { 193 | CAssembler *casm; 194 | int c; 195 | 196 | int (*AlphaBlendPtr)(int, int, int); 197 | 198 | // create assembler 199 | casm = casm_create(); 200 | 201 | // append assembly source 202 | casm_source(casm, AlphaBlendAsm); 203 | 204 | 205 | AlphaBlendPtr = (int (*)(int, int, int))casm_callable(casm, NULL); 206 | 207 | if (AlphaBlendPtr == NULL) { 208 | printf("error: %s\n", casm->error); 209 | casm_release(casm); 210 | return; 211 | } 212 | 213 | printf("==================== Alpha Blend ====================\n"); 214 | 215 | casm_dumpinst(casm, stdout); 216 | 217 | printf("\nExecute code (y/n)?\n\n"); 218 | 219 | do 220 | { 221 | c = getch(); 222 | } 223 | while(c != 'y' && c != 'n'); 224 | 225 | if(c == 'y') 226 | { 227 | int x = AlphaBlendPtr(0x00FF00FF, 0xFF00FF00, 128); 228 | printf("output: %.8X\n\n", x); 229 | } 230 | 231 | free(AlphaBlendPtr); 232 | casm_release(casm); 233 | } 234 | ``` 235 | *output: 7f7f7f7f* 236 | 237 | -------------------------------------------------------------------------------- /source/casmpure.c: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // casmpure.c - assembly pure compiler 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #include "casmpure.h" 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #define IMAX_LINESIZE 4096 17 | 18 | //--------------------------------------------------------------------- 19 | // CORE INTERFACE 20 | //--------------------------------------------------------------------- 21 | 22 | // create assembler 23 | CAssembler *casm_create(void) 24 | { 25 | CAssembler *self; 26 | self = (CAssembler*)malloc(sizeof(CAssembler)); 27 | assert(self); 28 | self->parser = cparser_create(); 29 | assert(self->parser); 30 | self->loader = cloader_create(); 31 | assert(self->loader); 32 | self->source = (char*)malloc(1024 + 1); 33 | assert(self->source); 34 | self->srcblock = 1024; 35 | self->srcsize = 0; 36 | self->source[0] = 0; 37 | self->line = (char*)malloc(IMAX_LINESIZE + 10); 38 | assert(self->line); 39 | self->error = (char*)malloc(2048); 40 | assert(self->error); 41 | self->error[0] = 0; 42 | self->errcode = 0; 43 | return self; 44 | } 45 | 46 | // reset assembler 47 | void casm_reset(CAssembler *self) 48 | { 49 | assert(self); 50 | if (self->source) free(self->source); 51 | self->source = (char*)malloc(1024 + 1); 52 | assert(self->source); 53 | self->srcblock = 1024; 54 | self->srcsize = 0; 55 | self->source[0] = 0; 56 | cloader_reset(self->loader); 57 | cparser_reset(self->parser); 58 | } 59 | 60 | // release assembler 61 | void casm_release(CAssembler *self) 62 | { 63 | assert(self); 64 | if (self->parser) { 65 | cparser_release(self->parser); 66 | self->parser = NULL; 67 | } 68 | if (self->loader) { 69 | cloader_release(self->loader); 70 | self->loader = NULL; 71 | } 72 | if (self->source) { 73 | free(self->source); 74 | self->source = NULL; 75 | } 76 | if (self->line) { 77 | free(self->line); 78 | self->line = NULL; 79 | } 80 | if (self->error) { 81 | free(self->error); 82 | self->error = NULL; 83 | } 84 | self->srcblock = 0; 85 | self->srcsize = 0; 86 | free(self); 87 | } 88 | 89 | // add source to source buffer 90 | int casm_source(CAssembler *self, const char *text) 91 | { 92 | int datasize = (int)strlen(text); 93 | int newsize = datasize + self->srcsize; 94 | int newblock = 1; 95 | while (newblock < newsize) newblock <<= 1; 96 | if (newblock != self->srcblock) { 97 | char *buffer = (char*)malloc(newblock + 1); 98 | assert(buffer); 99 | memcpy(buffer, self->source, self->srcsize); 100 | buffer[self->srcsize] = 0; 101 | free(self->source); 102 | self->source = buffer; 103 | self->srcblock = newblock; 104 | } 105 | memcpy(self->source + self->srcsize, text, datasize); 106 | self->srcsize = newsize; 107 | self->source[newsize] = 0; 108 | return 0; 109 | } 110 | 111 | // prompt error 112 | static void casm_error(CAssembler *self, const char *msg, int code) 113 | { 114 | sprintf(self->error, "line(%d): error(%d): %s", self->lineno, code, msg); 115 | self->errcode = code; 116 | } 117 | 118 | // compile single line 119 | static int casm_compile_line(CAssembler *self, const char *line) 120 | { 121 | const CEncoding *encoding; 122 | 123 | assert(self); 124 | 125 | self->error[0] = 0; 126 | self->errcode = 0; 127 | 128 | encoding = cparser_parse_line(self->parser, line); 129 | 130 | if (encoding == NULL) { 131 | casm_error(self, self->parser->error, self->parser->errcode); 132 | return -1; 133 | } 134 | 135 | cloader_new_encoding(self->loader, encoding); 136 | 137 | return 0; 138 | } 139 | 140 | // compile source buffer 141 | // if (code == NULL) returns compiled code size 142 | // if (code != NULL) and (maxsize >= codesize) compile and returns codesize 143 | // if (code != NULL) and (maxsize < codesize) returns error 144 | int casm_compile(CAssembler *self, unsigned char *code, long maxsize) 145 | { 146 | int lineno, p1, p2; 147 | const char *text; 148 | long codesize; 149 | 150 | assert(self); 151 | 152 | text = self->source; 153 | 154 | cloader_reset(self->loader); 155 | cparser_reset(self->parser); 156 | 157 | for (lineno = 1, p1 = 0; p1 < self->srcsize; ) { 158 | for (p2 = p1; text[p2] != 0 && text[p2] != '\n'; p2++); 159 | self->lineno = lineno++; 160 | if (p2 - p1 >= IMAX_LINESIZE) { 161 | casm_error(self, "line size too long", 1); 162 | return -1; 163 | } 164 | 165 | memcpy(self->line, self->source + p1, p2 - p1); 166 | self->line[p2 - p1] = 0; 167 | p1 = p2 + 1; 168 | 169 | if (casm_compile_line(self, self->line) != 0) { 170 | return -2; 171 | } 172 | } 173 | 174 | codesize = cloader_get_codesize(self->loader) + 10; 175 | 176 | if (code == NULL) 177 | return codesize; 178 | 179 | if (maxsize < codesize) { 180 | casm_error(self, "need a larger memory block to get code", 2); 181 | return -3; 182 | } 183 | 184 | memset(code, 0xcc, codesize); 185 | 186 | if (cloader_output(self->loader, code) != 0) { 187 | self->lineno = self->loader->errcode; 188 | casm_error(self, self->loader->error, 3); 189 | return -4; 190 | } 191 | 192 | return codesize; 193 | } 194 | 195 | 196 | // get error 197 | const char *casm_geterror(const CAssembler *self, int *errcode) 198 | { 199 | if (errcode) *errcode = self->errcode; 200 | return self->error; 201 | } 202 | 203 | 204 | //--------------------------------------------------------------------- 205 | // HIGH LEVEL 206 | //--------------------------------------------------------------------- 207 | int casm_pushline(CAssembler *self, const char *fmt, ...) 208 | { 209 | char *buffer = self->error; 210 | va_list argptr; 211 | 212 | va_start(argptr, fmt); 213 | vsprintf(buffer, fmt, argptr); 214 | va_end(argptr); 215 | 216 | casm_source(self, buffer); 217 | casm_source(self, "\n"); 218 | 219 | self->error[0] = 0; 220 | 221 | return 0; 222 | } 223 | 224 | 225 | void *casm_callable(CAssembler *self, long *codesize) 226 | { 227 | unsigned char *code; 228 | long size; 229 | 230 | if (codesize) *codesize = 0; 231 | 232 | size = casm_compile(self, NULL, 0); 233 | 234 | if (size < 0) { 235 | return NULL; 236 | } 237 | 238 | code = (unsigned char*)malloc(size + 1); 239 | assert(code); 240 | 241 | if (casm_compile(self, code, size) < 0) { 242 | free(code); 243 | return NULL; 244 | } 245 | 246 | if (codesize) *codesize = size; 247 | 248 | return code; 249 | } 250 | 251 | 252 | // load assembly source file 253 | int casm_loadfile(CAssembler *self, const char *filename) 254 | { 255 | char line[80]; 256 | FILE *fp; 257 | casm_reset(self); 258 | if ((fp = fopen(filename, "r")) == NULL) 259 | return -1; 260 | while (!feof(fp)) { 261 | int size = (int)fread(line, 1, 60, fp); 262 | line[size] = 0; 263 | casm_source(self, line); 264 | } 265 | fclose(fp); 266 | return 0; 267 | } 268 | 269 | int casm_savefile(CAssembler *self, const char *filename) 270 | { 271 | char *codedata, *p; 272 | long codesize; 273 | FILE *fp; 274 | 275 | codedata = (char*)casm_callable(self, &codesize); 276 | if (codedata == NULL) return -1; 277 | 278 | if ((fp = fopen(filename, "wb")) == NULL) { 279 | free(codedata); 280 | return -2; 281 | } 282 | 283 | for (p = codedata; p < codedata + codesize; ) { 284 | int canwrite = codesize - (int)(p - codedata); 285 | int hr = (int)fwrite(p, 1, canwrite, fp); 286 | if (hr > 0) p += hr; 287 | } 288 | 289 | fclose(fp); 290 | 291 | free(codedata); 292 | 293 | return 0; 294 | } 295 | 296 | int casm_dumpinst(CAssembler *self, FILE *fp) 297 | { 298 | CLoader *loader = self->loader; 299 | int lineno, p1, p2, maxsize, pos; 300 | const char *text; 301 | char *codedata; 302 | iqueue_head *node; 303 | 304 | text = self->source; 305 | 306 | codedata = (char*)casm_callable(self, NULL); 307 | if (codedata == NULL) return -1; 308 | free(codedata); 309 | 310 | node = loader->head.next; 311 | 312 | for (maxsize = 0; node != &loader->head; node = node->next) { 313 | CLink *link = iqueue_entry(node, CLink, head); 314 | int length = cencoding_length(&link->encoding); 315 | if (length > maxsize) maxsize = length; 316 | } 317 | 318 | node = loader->head.next; 319 | fp = (fp != NULL)? fp : stdout; 320 | 321 | for (lineno = 1, p1 = 0, pos = 0; p1 < self->srcsize; lineno++) { 322 | for (p2 = p1; text[p2] != 0 && text[p2] != '\n'; p2++); 323 | if (p2 - p1 >= IMAX_LINESIZE) { 324 | casm_error(self, "line size too long", 1); 325 | return -1; 326 | } 327 | 328 | memcpy(self->line, self->source + p1, p2 - p1); 329 | self->line[p2 - p1] = 0; 330 | p1 = p2 + 1; 331 | 332 | while (node != &loader->head) { 333 | CLink *link = iqueue_entry(node, CLink, head); 334 | if (link->lineno >= lineno) break; 335 | node = node->next; 336 | } 337 | 338 | if (node != &loader->head) { 339 | CLink *link = iqueue_entry(node, CLink, head); 340 | if (link->lineno == lineno) { 341 | static char output[4096]; 342 | int length, size; 343 | length = cencoding_length(&link->encoding); 344 | if (link->encoding.align > 0) { 345 | int align, i, k; 346 | align = link->encoding.align; 347 | length = align - (pos % align); 348 | for (i = length, k = 0; i > 0; ) { 349 | if (i >= 2) { 350 | output[k++] = '6'; 351 | output[k++] = '6'; 352 | output[k++] = ' '; 353 | i--; 354 | } 355 | output[k++] = '9'; 356 | output[k++] = '0'; 357 | output[k++] = ' '; 358 | i--; 359 | } 360 | output[k++] = 0; 361 | } else { 362 | cencoding_to_string(&link->encoding, output); 363 | } 364 | for (size = (int)strlen(output); size < (maxsize) * 3; ) 365 | output[size++] = ' '; 366 | output[size] = 0; 367 | if (length == 0) fprintf(fp, " "); 368 | else fprintf(fp, "%08X:", pos); 369 | pos += length; 370 | fprintf(fp, " %s\t%s\n", output, self->line); 371 | } 372 | } 373 | } 374 | 375 | return 0; 376 | } 377 | 378 | 379 | -------------------------------------------------------------------------------- /source/casmpure.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // casmpure.h - assembly pure compiler 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #ifndef __CASMPURE_H__ 10 | #define __CASMPURE_H__ 11 | 12 | #include "cloader.h" 13 | #include "cparser.h" 14 | 15 | 16 | //--------------------------------------------------------------------- 17 | // CAssembler Definition 18 | //--------------------------------------------------------------------- 19 | struct CAssembler 20 | { 21 | CParser *parser; 22 | CLoader *loader; 23 | int srcblock; 24 | int srcsize; 25 | char *line; 26 | char *source; 27 | char *error; 28 | int errcode; 29 | int lineno; 30 | }; 31 | 32 | typedef struct CAssembler CAssembler; 33 | 34 | 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | //--------------------------------------------------------------------- 40 | // Interface 41 | //--------------------------------------------------------------------- 42 | 43 | // create assembler 44 | CAssembler *casm_create(void); 45 | 46 | // delete assembler 47 | void casm_release(CAssembler *self); 48 | 49 | // reset compiler state and clean source buffer 50 | void casm_reset(CAssembler *self); 51 | 52 | // add source to assembler source buffer 53 | int casm_source(CAssembler *self, const char *text); 54 | 55 | // compile source buffer 56 | // if (code == NULL) returns compiled code size 57 | // if (code != NULL) and (maxsize >= codesize) compile and returns codesize 58 | // if (code != NULL) and (maxsize < codesize) returns error 59 | int casm_compile(CAssembler *self, unsigned char *code, long maxsize); 60 | 61 | // get error 62 | const char *casm_geterror(const CAssembler *self, int *errcode); 63 | 64 | 65 | // HIGH LEVEL interface: 66 | 67 | // add a single line to assembly 68 | int casm_pushline(CAssembler *self, const char *fmt, ...); 69 | 70 | // compile and write execode into a memory block 71 | // you can call free() when you need to dispose 72 | void *casm_callable(CAssembler *self, long *codesize); 73 | 74 | 75 | // load assembly source file (will reset source buffer) 76 | int casm_loadfile(CAssembler *self, const char *filename); 77 | 78 | // save compiled code into file 79 | int casm_savefile(CAssembler *self, const char *filename); 80 | 81 | // dump instructions and source line 82 | int casm_dumpinst(CAssembler *self, FILE *fp); 83 | 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | 89 | #endif 90 | 91 | 92 | -------------------------------------------------------------------------------- /source/cencoding.c: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cencoding.c - x86 instruction encoding 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #include "cencoding.h" 10 | 11 | #ifdef _MSC_VER 12 | #pragma warning(disable: 4996) 13 | #pragma warning(disable: 4311) 14 | #endif 15 | 16 | void cencoding_reset(CEncoding *self) 17 | { 18 | if (self->label) free(self->label); 19 | self->label = NULL; 20 | if (self->reference) free(self->reference); 21 | self->reference = NULL; 22 | if (self->data) free(self->data); 23 | self->data = NULL; 24 | 25 | self->format.P1 = 0; 26 | self->format.P2 = 0; 27 | self->format.P3 = 0; 28 | self->format.P4 = 0; 29 | self->format.REX = 0; 30 | self->format.O3 = 0; 31 | self->format.O2 = 0; 32 | self->format.O1 = 0; 33 | self->format.modRM = 0; 34 | self->format.SIB = 0; 35 | self->format.D1 = 0; 36 | self->format.D2 = 0; 37 | self->format.D3 = 0; 38 | self->format.D4 = 0; 39 | self->format.I1 = 0; 40 | self->format.I2 = 0; 41 | self->format.I3 = 0; 42 | self->format.I4 = 0; 43 | 44 | self->P1 = 0; 45 | self->P2 = 0; 46 | self->P3 = 0; 47 | self->P4 = 0; 48 | self->REX.b = 0; 49 | self->O3 = 0; 50 | self->O2 = 0; 51 | self->O1 = 0; 52 | self->modRM.b = 0; 53 | self->SIB.b = 0; 54 | self->D1 = 0; 55 | self->D2 = 0; 56 | self->D3 = 0; 57 | self->D4 = 0; 58 | self->I1 = 0; 59 | self->I2 = 0; 60 | self->I3 = 0; 61 | self->I4 = 0; 62 | 63 | self->immediate = 0; 64 | self->displacement = 0; 65 | self->message = (char*)""; 66 | self->size = 0; 67 | self->align = 0; 68 | self->relative = 0; 69 | } 70 | 71 | void cencoding_init(CEncoding *self) 72 | { 73 | self->label = 0; 74 | self->reference = 0; 75 | self->data = 0; 76 | self->size = 0; 77 | cencoding_reset(self); 78 | self->O1 = 0xCC; // breakpoint 79 | self->format.O1 = 1; 80 | } 81 | 82 | void cencoding_destroy(CEncoding *self) 83 | { 84 | cencoding_reset(self); 85 | } 86 | 87 | const char *cencoding_get_label(const CEncoding *self) 88 | { 89 | return self->label; 90 | } 91 | 92 | const char *cencoding_get_reference(const CEncoding *self) 93 | { 94 | return self->reference; 95 | } 96 | 97 | int cencoding_length(const CEncoding *self) 98 | { 99 | int length = 0; 100 | if (self->data && self->size > 0) 101 | return self->size; 102 | if (self->align > 0) 103 | return self->align; 104 | if (self->format.P1) length++; 105 | if (self->format.P2) length++; 106 | if (self->format.P3) length++; 107 | if (self->format.P4) length++; 108 | if (self->format.REX) length++; 109 | if (self->format.O3) length++; 110 | if (self->format.O2) length++; 111 | if (self->format.O1) length++; 112 | if (self->format.modRM) length++; 113 | if (self->format.SIB) length++; 114 | if (self->format.D1) length++; 115 | if (self->format.D2) length++; 116 | if (self->format.D3) length++; 117 | if (self->format.D4) length++; 118 | if (self->format.I1) length++; 119 | if (self->format.I2) length++; 120 | if (self->format.I3) length++; 121 | if (self->format.I4) length++; 122 | 123 | return length; 124 | } 125 | 126 | int cencoding_new_copy(CEncoding *self, const CEncoding *src) 127 | { 128 | *self = *src; 129 | if (src->label) { 130 | long size = (long)strlen(src->label); 131 | self->label = (char*)malloc(size + 1); 132 | assert(self->label); 133 | memcpy(self->label, src->label, size + 1); 134 | self->label[size] = 0; 135 | } 136 | if (src->reference) { 137 | long size = (long)strlen(src->reference); 138 | self->reference = (char*)malloc(size + 1); 139 | assert(self->reference); 140 | memcpy(self->reference, src->reference, size + 1); 141 | self->reference[size] = 0; 142 | } 143 | if (src->data) { 144 | self->data = (char*)malloc(src->size + 1); 145 | assert(self->data); 146 | memcpy(self->data, src->data, src->size); 147 | self->size = src->size; 148 | } 149 | return 0; 150 | } 151 | 152 | int cencoding_add_prefix(CEncoding *self, cbyte p) 153 | { 154 | if (!self->format.P1) { 155 | self->P1 = p; 156 | self->format.P1 = 1; 157 | } 158 | else if (!self->format.P2) { 159 | self->P2 = p; 160 | self->format.P2 = 1; 161 | } 162 | else if (!self->format.P3) { 163 | self->P3 = p; 164 | self->format.P3 = 1; 165 | } 166 | else if (!self->format.P4) { 167 | self->P4 = p; 168 | self->format.P4 = 1; 169 | } else { 170 | return -1; 171 | } 172 | return 0; 173 | } 174 | 175 | int cencoding_set_immediate(CEncoding *self, int immediate) 176 | { 177 | self->immediate = immediate; 178 | return 0; 179 | } 180 | 181 | int cencoding_set_jump_offset(CEncoding *self, int offset) 182 | { 183 | if ((char)offset != offset && self->format.I2 == 0) { 184 | self->message = (char*)"Jump offset range too big"; 185 | return -1; 186 | } 187 | self->immediate = offset; 188 | return 0; 189 | } 190 | 191 | void cencoding_set_label(CEncoding *self, const char *label) 192 | { 193 | int size = (int)strlen(label); 194 | if (self->label) free(self->label); 195 | self->label = (char*)malloc(size + 1); 196 | assert(self->label); 197 | memcpy(self->label, label, size + 1); 198 | } 199 | 200 | void cencoding_set_reference(CEncoding *self, const char *ref) 201 | { 202 | int size = (int)strlen(ref); 203 | if (self->reference) free(self->reference); 204 | self->reference = (char*)malloc(size + 1); 205 | assert(self->reference); 206 | memcpy(self->reference, ref, size + 1); 207 | } 208 | 209 | void cencoding_set_data(CEncoding *self, const void *data, int size) 210 | { 211 | if (self->data) free(self->data); 212 | self->data = NULL; 213 | self->size = 0; 214 | if (data && size > 0) { 215 | self->data = (char*)malloc(size + 1); 216 | assert(self->data); 217 | self->size = size; 218 | memcpy(self->data, data, size); 219 | } 220 | } 221 | 222 | int cencoding_check_format(const CEncoding *self) 223 | { 224 | // Bytes cannot be changed without updating format, 225 | // except immediate and displacement 226 | if ((self->P1 && !self->format.P1) || 227 | (self->P2 && !self->format.P2) || 228 | (self->P3 && !self->format.P3) || 229 | (self->P4 && !self->format.P4) || 230 | (self->REX.b && !self->format.REX) || 231 | (self->O2 && !self->format.O2) || 232 | (self->O1 && !self->format.O1) || 233 | (self->modRM.b && !self->format.modRM) || 234 | (self->SIB.b && !self->format.SIB)) { 235 | return -1; 236 | } 237 | 238 | if ((self->format.P4 && !self->format.P3) || 239 | (self->format.P3 && !self->format.P2) || 240 | (self->format.P2 && !self->format.P1)) { 241 | return -2; 242 | } 243 | 244 | if (self->format.O2 && 245 | (self->O2 != 0x0F && 246 | self->O2 != 0xD8 && 247 | self->O2 != 0xD9 && 248 | self->O2 != 0xDA && 249 | self->O2 != 0xDB && 250 | self->O2 != 0xDC && 251 | self->O2 != 0xDD && 252 | self->O2 != 0xDE && 253 | self->O2 != 0xDF)) { 254 | return -3; 255 | } 256 | 257 | if (self->format.SIB) { 258 | if(!self->format.modRM) { 259 | return -4; 260 | } 261 | if(self->modRM.r_m != E_ESP) { 262 | return -5; 263 | } 264 | } 265 | 266 | // Byte, word or doubleword 267 | if ((self->format.D4 && !self->format.D3) || 268 | (self->format.D3 && !self->format.D4) || 269 | (self->format.D3 && !self->format.D2) || 270 | (self->format.D2 && !self->format.D1)) { 271 | return -6; 272 | } 273 | 274 | // Byte, word or doubleword 275 | if ((self->format.I4 && !self->format.I3) || 276 | (self->format.I3 && !self->format.I4) || 277 | (self->format.I3 && !self->format.I2) || 278 | (self->format.I2 && !self->format.I1)) { 279 | return -7; 280 | } 281 | 282 | return 0; 283 | } 284 | 285 | int cencoding_write_code(const CEncoding *self, unsigned char *output) 286 | { 287 | unsigned char *start = output; 288 | 289 | #define cencoding_output(b) { if (start) *output = (b); output++; } 290 | 291 | if (self->data && self->size > 0) { 292 | if (output) memcpy(output, self->data, self->size); 293 | return (int)self->size; 294 | } 295 | 296 | if (self->align > 0) { 297 | unsigned long linear = (((unsigned long)output) & 0xfffffffful); 298 | int size = self->align - (linear % self->align); 299 | for (; size >= 2; size -= 2) { 300 | cencoding_output(0x66); 301 | cencoding_output(0x90); 302 | } 303 | for (; size > 0; size--) { 304 | cencoding_output(0x90); 305 | } 306 | return (int)(output - start); 307 | } 308 | 309 | if (self->format.P1) cencoding_output(self->P1); 310 | if (self->format.P2) cencoding_output(self->P2); 311 | if (self->format.P3) cencoding_output(self->P3); 312 | if (self->format.P4) cencoding_output(self->P4); 313 | if (self->format.REX) cencoding_output(self->REX.b); 314 | if (self->format.O3) cencoding_output(self->O3); 315 | if (self->format.O2) cencoding_output(self->O2); 316 | if (self->format.O1) cencoding_output(self->O1); 317 | if (self->format.modRM) cencoding_output(self->modRM.b); 318 | if (self->format.SIB) cencoding_output(self->SIB.b); 319 | if (self->format.D1) cencoding_output(self->D1); 320 | if (self->format.D2) cencoding_output(self->D2); 321 | if (self->format.D3) cencoding_output(self->D3); 322 | if (self->format.D4) cencoding_output(self->D4); 323 | if (self->format.I1) cencoding_output(self->I1); 324 | if (self->format.I2) cencoding_output(self->I2); 325 | if (self->format.I3) cencoding_output(self->I3); 326 | if (self->format.I4) cencoding_output(self->I4); 327 | 328 | #undef cencoding_output 329 | 330 | return (int)(output - start); 331 | } 332 | 333 | 334 | void cencoding_to_string(const CEncoding *self, char *output) 335 | { 336 | const char *fmt = "0123456789ABCDEF"; 337 | int hr = cencoding_check_format(self); 338 | 339 | assert(hr == 0); 340 | 341 | #define cencoding_format(data) { \ 342 | if (output) { \ 343 | unsigned char ch = (unsigned char)(data & 0xff); \ 344 | *output++ = fmt[ch / 16]; \ 345 | *output++ = fmt[ch % 16]; \ 346 | *output++ = ' '; \ 347 | } \ 348 | } 349 | 350 | if (self->data) { 351 | long i; 352 | for (i = 0; i < self->size; i++) { 353 | unsigned int bb = (unsigned char)self->data[i]; 354 | cencoding_format(bb); 355 | } 356 | *output++ = '\0'; 357 | return; 358 | } 359 | 360 | if (self->align > 0) { 361 | *output++ = '\0'; 362 | return; 363 | } 364 | 365 | if (self->format.P1) cencoding_format(self->P1); 366 | if (self->format.P2) cencoding_format(self->P2); 367 | if (self->format.P3) cencoding_format(self->P3); 368 | if (self->format.P4) cencoding_format(self->P4); 369 | if (self->format.REX) cencoding_format(self->REX.b); 370 | if (self->format.O3) cencoding_format(self->O3); 371 | if (self->format.O2) cencoding_format(self->O2); 372 | if (self->format.O1) cencoding_format(self->O1); 373 | if (self->format.modRM) cencoding_format(self->modRM.b); 374 | if (self->format.SIB) cencoding_format(self->SIB.b); 375 | if (self->format.D1) cencoding_format(self->D1); 376 | if (self->format.D2) cencoding_format(self->D2); 377 | if (self->format.D3) cencoding_format(self->D3); 378 | if (self->format.D4) cencoding_format(self->D4); 379 | if (self->format.I1) cencoding_format(self->I1); 380 | if (self->format.I2) cencoding_format(self->I2); 381 | if (self->format.I3) cencoding_format(self->I3); 382 | if (self->format.I4) cencoding_format(self->I4); 383 | 384 | #undef cencoding_format 385 | 386 | *output++ = '\0'; 387 | } 388 | 389 | 390 | void cencoding_to_stdout(const CEncoding *self) 391 | { 392 | static char text[8192]; 393 | cencoding_to_string(self, text); 394 | printf("%s\n", text); 395 | } 396 | 397 | -------------------------------------------------------------------------------- /source/cencoding.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cencoding.h - x86 instruction encoding 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | 10 | #ifndef __CENCODING_H__ 11 | #define __CENCODING_H__ 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | //--------------------------------------------------------------------- 20 | // Platform Word Size Detect 21 | //--------------------------------------------------------------------- 22 | #if (!defined(__CUINT32_DEFINED)) && (!defined(__CINT32_DEFINED)) 23 | #define __CUINT32_DEFINED 24 | #define __CINT32_DEFINED 25 | #if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) \ 26 | || defined(__i386__) || defined(__i386) || defined(_M_X86) 27 | typedef unsigned int cuint32; 28 | typedef int cint32; 29 | #elif defined(__MACOS__) 30 | typedef UInt32 cuint32; 31 | typedef Int32 cint32; 32 | #elif defined(__APPLE__) && defined(__MACH__) 33 | #include 34 | typedef u_int32_t cuint32; 35 | typedef int32_t cint32; 36 | #elif defined(__BEOS__) 37 | #include 38 | typedef u_int32_t cuint32; 39 | typedef int32_t cint32; 40 | #elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || \ 41 | defined(__amd64) || defined(_M_IA64) || defined(_M_AMD64) 42 | typedef unsigned int cuint32; 43 | typedef int cint32; 44 | #elif defined(_MSC_VER) || defined(__BORLANDC__) 45 | typedef unsigned __int32 cuint32; 46 | typedef __int32 cint32; 47 | #elif defined(__GNUC__) 48 | #include 49 | typedef uint32_t cuint32; 50 | typedef int32_t cint32; 51 | #else 52 | typedef unsigned long cuint32; 53 | typedef long cint32; 54 | #endif 55 | #endif 56 | 57 | #ifndef __CINT8_DEFINED 58 | #define __CINT8_DEFINED 59 | typedef char cint8; 60 | #endif 61 | 62 | #ifndef __CUINT8_DEFINED 63 | #define __CUINT8_DEFINED 64 | typedef unsigned char cuint8; 65 | #endif 66 | 67 | #ifndef __CUINT16_DEFINED 68 | #define __CUINT16_DEFINED 69 | typedef unsigned short cuint16; 70 | #endif 71 | 72 | #ifndef __CINT16_DEFINED 73 | #define __CINT16_DEFINED 74 | typedef short CINT16; 75 | #endif 76 | 77 | #ifndef __CINT64_DEFINED 78 | #define __CINT64_DEFINED 79 | #if defined(_MSC_VER) || defined(__BORLANDC__) 80 | typedef __int64 cint64; 81 | #else 82 | typedef long long cint64; 83 | #endif 84 | #endif 85 | 86 | #ifndef __CUINT64_DEFINED 87 | #define __CUINT64_DEFINED 88 | #if defined(_MSC_VER) || defined(__BORLANDC__) 89 | typedef unsigned __int64 cuint64; 90 | #else 91 | typedef unsigned long long cuint64; 92 | #endif 93 | #endif 94 | 95 | #ifndef INLINE 96 | #ifdef __GNUC__ 97 | 98 | #if __GNUC_MINOE__ >= 1 && __GNUC_MINOE__ < 4 99 | #define INLINE __inline__ __attribute__((always_inline)) 100 | #else 101 | #define INLINE __inline__ 102 | #endif 103 | 104 | #elif (defined(_MSC_VER) || defined(__BORLANDC__) || defined(__WATCOMC__)) 105 | #define INLINE __inline 106 | #else 107 | #define INLINE 108 | #endif 109 | #endif 110 | 111 | #ifndef inline 112 | #define inline INLINE 113 | #endif 114 | 115 | typedef cuint8 cbyte; 116 | 117 | 118 | //--------------------------------------------------------------------- 119 | // CReg 120 | //--------------------------------------------------------------------- 121 | enum CRegID 122 | { 123 | REG_UNKNOWN = -1, 124 | E_AL = 0, E_AX = 0, E_EAX = 0, E_ST0 = 0, E_MM0 = 0, E_XMM0 = 0, 125 | E_CL = 1, E_CX = 1, E_ECX = 1, E_ST1 = 1, E_MM1 = 1, E_XMM1 = 1, 126 | E_DL = 2, E_DX = 2, E_EDX = 2, E_ST2 = 2, E_MM2 = 2, E_XMM2 = 2, 127 | E_BL = 3, E_BX = 3, E_EBX = 3, E_ST3 = 3, E_MM3 = 3, E_XMM3 = 3, 128 | E_AH = 4, E_SP = 4, E_ESP = 4, E_ST4 = 4, E_MM4 = 4, E_XMM4 = 4, 129 | E_CH = 5, E_BP = 5, E_EBP = 5, E_ST5 = 5, E_MM5 = 5, E_XMM5 = 5, 130 | E_DH = 6, E_SI = 6, E_ESI = 6, E_ST6 = 6, E_MM6 = 6, E_XMM6 = 6, 131 | E_BH = 7, E_DI = 7, E_EDI = 7, E_ST7 = 7, E_MM7 = 7, E_XMM7 = 7, 132 | E_R0 = 0, E_R1 = 1, E_R2 = 2, E_R3 = 3, E_R4 = 4, E_R5 = 5, 133 | E_R6 = 6, E_R7 = 7, E_R8 = 8, E_R9 = 9, E_R10 = 10, E_R11 = 11, 134 | E_R12 = 12, E_R13 = 13, E_R14 = 14, E_R15 = 15 135 | }; 136 | 137 | enum CSMod 138 | { 139 | MOD_NO_DISP = 0, 140 | MOD_BYTE_DISP = 1, 141 | MOD_DWORD_DISP = 2, 142 | MOD_REG = 3 143 | }; 144 | 145 | enum CScale 146 | { 147 | SCALE_UNKNOWN = 0, 148 | SCALE_1 = 0, 149 | SCALE_2 = 1, 150 | SCALE_4 = 2, 151 | SCALE_8 = 3 152 | }; 153 | 154 | 155 | //--------------------------------------------------------------------- 156 | // CEncoding 157 | //--------------------------------------------------------------------- 158 | struct CEncoding 159 | { 160 | char *label; 161 | char *reference; 162 | char *message; 163 | char *data; 164 | int size; 165 | int align; 166 | int relative; 167 | 168 | struct { 169 | unsigned char P1 : 1; 170 | unsigned char P2 : 1; 171 | unsigned char P3 : 1; 172 | unsigned char P4 : 1; 173 | unsigned char REX : 1; 174 | unsigned char O3 : 1; 175 | unsigned char O2 : 1; 176 | unsigned char O1 : 1; 177 | unsigned char modRM : 1; 178 | unsigned char SIB : 1; 179 | unsigned char D1 : 1; 180 | unsigned char D2 : 1; 181 | unsigned char D3 : 1; 182 | unsigned char D4 : 1; 183 | unsigned char I1 : 1; 184 | unsigned char I2 : 1; 185 | unsigned char I3 : 1; 186 | unsigned char I4 : 1; 187 | } format; 188 | 189 | unsigned char P1; // Prefixes 190 | unsigned char P2; 191 | unsigned char P3; 192 | unsigned char P4; 193 | 194 | struct { 195 | union { 196 | struct { 197 | unsigned char B : 1; 198 | unsigned char X : 1; 199 | unsigned char R : 1; 200 | unsigned char W : 1; 201 | unsigned char prefix : 4; 202 | }; 203 | unsigned char b; 204 | }; 205 | } REX; 206 | 207 | unsigned char O1; // Opcode 208 | unsigned char O2; 209 | unsigned char O3; 210 | 211 | struct { 212 | union { 213 | struct { 214 | unsigned char r_m : 3; 215 | unsigned char reg : 3; 216 | unsigned char mod : 2; 217 | }; 218 | unsigned char b; 219 | }; 220 | } modRM; 221 | 222 | struct { 223 | union { 224 | struct { 225 | unsigned char base : 3; 226 | unsigned char index : 3; 227 | unsigned char scale : 2; 228 | }; 229 | unsigned char b; 230 | }; 231 | } SIB; 232 | 233 | union { 234 | cint32 displacement; 235 | struct { 236 | unsigned char D1; 237 | unsigned char D2; 238 | unsigned char D3; 239 | unsigned char D4; 240 | }; 241 | }; 242 | 243 | union { 244 | cint32 immediate; 245 | struct { 246 | unsigned char I1; 247 | unsigned char I2; 248 | unsigned char I3; 249 | unsigned char I4; 250 | }; 251 | }; 252 | }; 253 | 254 | typedef struct CEncoding CEncoding; 255 | 256 | #ifdef __cplusplus 257 | extern "C" { 258 | #endif 259 | 260 | 261 | //--------------------------------------------------------------------- 262 | // CEncoding 263 | //--------------------------------------------------------------------- 264 | void cencoding_init(CEncoding *self); 265 | void cencoding_reset(CEncoding *self); 266 | void cencoding_destroy(CEncoding *self); 267 | 268 | const char *cencoding_get_label(const CEncoding *self); 269 | const char *cencoding_get_reference(const CEncoding *self); 270 | 271 | int cencoding_length(const CEncoding *self); 272 | int cencoding_new_copy(CEncoding *self, const CEncoding *src); 273 | 274 | int cencoding_add_prefix(CEncoding *self, unsigned char prefix); 275 | int cencoding_set_immediate(CEncoding *self, int immediate); 276 | int cencoding_set_jump_offset(CEncoding *self, int offset); 277 | void cencoding_set_label(CEncoding *self, const char *label); 278 | void cencoding_set_reference(CEncoding *self, const char *ref); 279 | 280 | void cencoding_set_data(CEncoding *self, const void *data, int size); 281 | 282 | int cencoding_check_format(const CEncoding *self); 283 | int cencoding_write_code(const CEncoding *self, unsigned char *output); 284 | 285 | void cencoding_to_string(const CEncoding *self, char *output); 286 | void cencoding_to_stdout(const CEncoding *self); 287 | 288 | 289 | #ifdef __cplusplus 290 | } 291 | #endif 292 | 293 | #endif 294 | 295 | 296 | -------------------------------------------------------------------------------- /source/cinstruct.c: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cinstruct.c - 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #include "cinstruct.h" 10 | 11 | #ifdef _MSC_VER 12 | #pragma warning(disable: 4996) 13 | #endif 14 | 15 | static void cinst_extract_operands(CInstruction *self, const char *syntax); 16 | 17 | CInstruction *cinst_create(const CInstSyntax *syntax) 18 | { 19 | CInstruction *self; 20 | self = (CInstruction*)malloc(sizeof(CInstruction)); 21 | assert(self); 22 | self->syntax = syntax; 23 | cinst_extract_operands(self, syntax->operands); 24 | self->syntaxMnemonic = 0; 25 | self->syntaxSpecifier = 0; 26 | self->syntaxFirstOperand = 0; 27 | self->syntaxSecondOperand = 0; 28 | self->syntaxThirdOperand = 0; 29 | self->flags = syntax->flags; 30 | self->next = NULL; 31 | return self; 32 | } 33 | 34 | void cinst_release(CInstruction *self) 35 | { 36 | if (self->next) cinst_release(self->next); 37 | self->next = NULL; 38 | memset(self, 0, sizeof(CInstruction)); 39 | free(self); 40 | } 41 | 42 | static void cinst_extract_operands(CInstruction *self, const char *syntax) 43 | { 44 | char *token; 45 | char *string; 46 | char *sep; 47 | 48 | assert(syntax && self); 49 | 50 | self->specifier = CS_UNKNOWN; 51 | self->firstOperand = O_VOID; 52 | self->secondOperand = O_VOID; 53 | self->thirdOperand = O_VOID; 54 | 55 | string = strdup(syntax); 56 | cstring_strip(string); 57 | 58 | sep = string; 59 | token = cstring_strsep(&sep, " ,"); 60 | 61 | if (token == NULL) return; 62 | 63 | cstring_strip(token); 64 | self->specifier = cspecifier_scan(token); 65 | 66 | if (self->specifier != CS_UNKNOWN) { 67 | token = cstring_strsep(&sep, " ,"); 68 | if (token == 0) { 69 | free(string); 70 | return; 71 | } 72 | } 73 | 74 | cstring_strip(token); 75 | self->firstOperand = coperand_scan_syntax(token); 76 | 77 | if (self->firstOperand != O_UNKNOWN) { 78 | token = cstring_strsep(&sep, " ,"); 79 | if (token == 0) { 80 | free(string); 81 | return; 82 | } 83 | } 84 | 85 | cstring_strip(token); 86 | self->secondOperand = coperand_scan_syntax(token); 87 | 88 | if (self->secondOperand != O_UNKNOWN) { 89 | token = cstring_strsep(&sep, " ,"); 90 | if (token == 0) { 91 | free(string); 92 | return; 93 | } 94 | } 95 | 96 | cstring_strip(token); 97 | self->thirdOperand = coperand_scan_syntax(token); 98 | 99 | if (self->thirdOperand != O_UNKNOWN) { 100 | token = cstring_strsep(&sep, " ,"); 101 | if (token == 0) { 102 | free(string); 103 | return; 104 | } 105 | } 106 | 107 | if (token != 0) { 108 | fprintf(stderr, "casm: Invalid operand encoding '%s'\n", syntax); 109 | fflush(stderr); 110 | assert(0); 111 | return; 112 | } 113 | 114 | free(string); 115 | } 116 | 117 | CInstruction *cinst_get_next(CInstruction *self) 118 | { 119 | assert(self); 120 | return self->next; 121 | } 122 | 123 | void cinst_attach_new(CInstruction *self, const CInstSyntax *instruction) 124 | { 125 | if (!self->next) { 126 | self->next = cinst_create(instruction); 127 | } else { 128 | cinst_attach_new(self->next, instruction); 129 | } 130 | } 131 | 132 | void cinst_reset_match(CInstruction *self) 133 | { 134 | self->syntaxMnemonic = 0; 135 | self->syntaxSpecifier = 0; 136 | self->syntaxFirstOperand = 0; 137 | self->syntaxSecondOperand = 0; 138 | self->syntaxThirdOperand = 0; 139 | 140 | if (self->next) { 141 | cinst_reset_match(self->next); 142 | } 143 | } 144 | 145 | int cinst_match_syntax(CInstruction *self) 146 | { 147 | return self->syntaxMnemonic != 0 && 148 | self->syntaxSpecifier != 0 && 149 | self->syntaxFirstOperand != 0 && 150 | self->syntaxSecondOperand != 0 && 151 | self->syntaxThirdOperand != 0; 152 | } 153 | 154 | void cinst_match_mnemonic(CInstruction *self, const char *mnemonic) 155 | { 156 | if (stricmp(self->syntax->mnemonic, mnemonic) == 0) { 157 | self->syntaxMnemonic = 1; 158 | } 159 | if (self->next) { 160 | cinst_match_mnemonic(self->next, mnemonic); 161 | } 162 | } 163 | 164 | void cinst_match_specifier(CInstruction *self, enum CSpecifierType specifier) 165 | { 166 | if (self->specifier == CS_UNKNOWN) 167 | { 168 | if (self->specifier != CS_UNKNOWN) { 169 | if (self->firstOperand == O_R_M8 || 170 | self->secondOperand == O_R_M8) { 171 | self->syntaxSpecifier = self->specifier == CS_BYTE; 172 | } 173 | else if (self->firstOperand == O_R_M16 || 174 | self->secondOperand == O_R_M16) { 175 | self->syntaxSpecifier = self->specifier == CS_WORD; 176 | } 177 | else if (self->firstOperand == O_R_M32 || 178 | self->secondOperand == O_R_M32) { 179 | self->syntaxSpecifier = self->specifier == CS_DWORD; 180 | } 181 | else if (self->firstOperand == O_R_M64 || 182 | self->secondOperand == O_R_M64) { 183 | self->syntaxSpecifier = 184 | (self->specifier == CS_QWORD || 185 | self->specifier == CS_MMWORD); 186 | } 187 | else if (self->firstOperand == O_R_M128 || 188 | self->secondOperand == O_R_M128) { 189 | self->syntaxSpecifier = self->specifier == CS_XMMWORD; 190 | } 191 | else { 192 | self->syntaxSpecifier = 1; 193 | } 194 | } else { 195 | self->syntaxSpecifier = 1; 196 | } 197 | } 198 | else if (self->specifier != CS_UNKNOWN) // Explicit specifier 199 | { 200 | if (self->specifier == specifier) { 201 | self->syntaxSpecifier = 1; 202 | } 203 | else if (specifier == CS_UNKNOWN) { 204 | self->syntaxSpecifier = 1; // Specifiers are optional 205 | } 206 | else { 207 | self->syntaxSpecifier = 0; 208 | } 209 | } 210 | 211 | if (self->next) { 212 | cinst_match_specifier(self->next, specifier); 213 | } 214 | } 215 | 216 | void cinst_match_first_operand(CInstruction *self, const COperand *operand) 217 | { 218 | if (coperand_is_subtype_of(operand, self->firstOperand)) { 219 | self->syntaxFirstOperand = 1; 220 | } 221 | else if (operand->type == O_MEM && self->firstOperand & O_MEM) { 222 | if(self->syntaxSpecifier) { // Explicit size specfier 223 | self->syntaxFirstOperand = 1; 224 | } 225 | else if(self->secondOperand != O_UNKNOWN) { //Implicit size specifier 226 | self->syntaxFirstOperand = 1; 227 | } 228 | } 229 | 230 | if (self->next) { 231 | cinst_match_first_operand(self->next, operand); 232 | } 233 | } 234 | 235 | void cinst_match_second_operand(CInstruction *self, const COperand *operand) 236 | { 237 | if (coperand_is_subtype_of(operand, self->secondOperand)) { 238 | self->syntaxSecondOperand = 1; 239 | } 240 | else if (operand->type == O_MEM && self->secondOperand & O_MEM) { 241 | if (self->syntaxSpecifier) { // Explicit size specfier 242 | self->syntaxSecondOperand = 1; 243 | } 244 | else if (self->firstOperand != O_UNKNOWN) { 245 | self->syntaxSecondOperand = 1; 246 | } 247 | } 248 | if (self->next) { 249 | cinst_match_second_operand(self->next, operand); 250 | } 251 | } 252 | 253 | void cinst_match_third_operand(CInstruction *self, const COperand *operand) 254 | { 255 | if (coperand_is_subtype_of(operand, self->thirdOperand)) { 256 | self->syntaxThirdOperand = 1; 257 | } 258 | if (self->next) { 259 | cinst_match_third_operand(self->next, operand); 260 | } 261 | } 262 | 263 | enum COperandType cinst_getFirstOperand(CInstruction *self) 264 | { 265 | return self->firstOperand; 266 | } 267 | 268 | enum COperandType cinst_getSecondOperand(CInstruction *self) 269 | { 270 | return self->secondOperand; 271 | } 272 | 273 | enum COperandType cinst_getThirdOperand(CInstruction *self) 274 | { 275 | return self->thirdOperand; 276 | } 277 | 278 | const char *cinst_getMnemonic(CInstruction *self) 279 | { 280 | return self->syntax->mnemonic; 281 | } 282 | 283 | const char *cinst_getOperandSyntax(CInstruction *self) 284 | { 285 | return self->syntax->operands; 286 | } 287 | 288 | const char *cinst_getEncoding(CInstruction *self) 289 | { 290 | return self->syntax->encoding; 291 | } 292 | 293 | int cinst_is_32bit(CInstruction *self) 294 | { 295 | return (self->flags & CT_CPU_386) == CT_CPU_386; 296 | } 297 | 298 | 299 | 300 | -------------------------------------------------------------------------------- /source/cinstruct.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cinstruct.h - 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #ifndef __CINSTRUCT_H__ 10 | #define __CINSTRUCT_H__ 11 | 12 | #include "ckeywords.h" 13 | 14 | //--------------------------------------------------------------------- 15 | // CInstructionType 16 | //--------------------------------------------------------------------- 17 | enum CInstructionType 18 | { 19 | CT_CPU_UNKNOWN = 0x00000000, 20 | 21 | CT_CPU_8086 = 0x00000001, 22 | CT_CPU_186 = 0x00000002, 23 | CT_CPU_286 = 0x00000004, 24 | CT_CPU_386 = 0x00000008, 25 | CT_CPU_486 = 0x00000010, 26 | CT_CPU_PENT = 0x00000020, // Pentium 27 | CT_CPU_P6 = 0x00000040, // Pentium Pro 28 | 29 | CT_CPU_FPU = 0x00000080, 30 | CT_CPU_MMX = 0x00000100, 31 | CT_CPU_KATMAI = 0x00000200, 32 | CT_CPU_SSE = 0x00000400, 33 | 34 | // CT_CPU_AMD = 0x00000800, // AMD specific system calls 35 | CT_CPU_CYRIX = 0x00001000, 36 | CT_CPU_3DNOW = 0x00002000, 37 | CT_CPU_ATHLON = 0x00004000, 38 | // CT_CPU_SMM = 0x00008000, // System Management Mode, standby mode 39 | 40 | CT_CPU_P7 = 0x00010000 | CT_CPU_SSE, 41 | CT_CPU_WILLAMETTE = CT_CPU_P7, 42 | CT_CPU_SSE2 = 0x00020000 | CT_CPU_WILLAMETTE, 43 | CT_CPU_PNI = 0x00040000, 44 | CT_CPU_SSE3 = 0x00080000, 45 | 46 | // Undocumented, also not supported by inline assembler 47 | // CT_CPU_UNDOC = 0x00010000, 48 | // Priviledged, run-time compiled OS kernel anyone? 49 | // CT_CPU_PRIV = 0x00020000 50 | }; 51 | 52 | 53 | //--------------------------------------------------------------------- 54 | // CInstructionSyntax 55 | //--------------------------------------------------------------------- 56 | struct CInstSyntax 57 | { 58 | const char *mnemonic; 59 | const char *operands; 60 | const char *encoding; 61 | int flags; 62 | }; 63 | 64 | typedef struct CInstSyntax CInstSyntax; 65 | 66 | 67 | //--------------------------------------------------------------------- 68 | // CInstruction 69 | //--------------------------------------------------------------------- 70 | struct CInstruction 71 | { 72 | int syntaxMnemonic : 1; 73 | int syntaxSpecifier : 1; 74 | int syntaxFirstOperand : 1; 75 | int syntaxSecondOperand : 1; 76 | int syntaxThirdOperand : 1; 77 | 78 | const struct CInstSyntax *syntax; 79 | enum CSpecifierType specifier; 80 | enum COperandType firstOperand; 81 | enum COperandType secondOperand; 82 | enum COperandType thirdOperand; 83 | int flags; 84 | 85 | struct CInstruction *next; 86 | }; 87 | 88 | typedef struct CInstruction CInstruction; 89 | 90 | 91 | #ifdef __cplusplus 92 | extern "C" { 93 | #endif 94 | 95 | 96 | CInstruction *cinst_create(const CInstSyntax *syntax); 97 | void cinst_release(CInstruction *self); 98 | 99 | CInstruction *cinst_get_next(CInstruction *self); 100 | 101 | void cinst_attach_new(CInstruction *self, const CInstSyntax *instruction); 102 | 103 | void cinst_reset_match(CInstruction *self); 104 | int cinst_match_syntax(CInstruction *self); 105 | void cinst_match_mnemonic(CInstruction *self, const char *mnemonic); 106 | void cinst_match_specifier(CInstruction *self, enum CSpecifierType sizeSpec); 107 | void cinst_match_first_operand(CInstruction *self, const COperand *operand); 108 | void cinst_match_second_operand(CInstruction *self, const COperand *operand); 109 | void cinst_match_third_operand(CInstruction *self, const COperand *operand); 110 | 111 | enum COperandType cinst_getFirstOperand(CInstruction *self); 112 | enum COperandType cinst_getSecondOperand(CInstruction *self); 113 | enum COperandType cinst_getThirdOperand(CInstruction *self); 114 | 115 | const char *cinst_getMnemonic(CInstruction *self); 116 | const char *cinst_getOperandSyntax(CInstruction *self); 117 | const char *cinst_getEncoding(CInstruction *self); 118 | 119 | int cinst_is_32bit(CInstruction *self); 120 | 121 | 122 | 123 | #ifdef __cplusplus 124 | } 125 | #endif 126 | 127 | #endif 128 | 129 | 130 | -------------------------------------------------------------------------------- /source/cinstset.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cinstset.h - 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #ifndef __CINSTSET_H__ 10 | #define __CINSTSET_H__ 11 | 12 | #include "cinstruct.h" 13 | 14 | 15 | struct CInstructionEntry 16 | { 17 | const char *mnemonic; 18 | CInstruction *instruction; 19 | }; 20 | 21 | typedef struct CInstructionEntry CInstructionEntry; 22 | 23 | struct CInstructionSet 24 | { 25 | CInstructionEntry *instructionMap; 26 | }; 27 | 28 | typedef struct CInstructionSet CInstructionSet; 29 | 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | extern CInstSyntax cinstruction_set[]; 36 | 37 | int cinstset_num_instructions(void); 38 | int cinstset_num_mnemonics(void); 39 | 40 | CInstructionSet *cinstset_create(void); 41 | void cinstset_release(CInstructionSet *self); 42 | 43 | CInstruction *cinstset_query(const CInstructionSet *self, const char *name); 44 | 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | 50 | #endif 51 | 52 | 53 | -------------------------------------------------------------------------------- /source/ckeywords.c: -------------------------------------------------------------------------------- 1 | #include "ckeywords.h" 2 | 3 | 4 | const CSpecifier cspecifier_set[] = 5 | { 6 | {CS_UNKNOWN, ""}, 7 | {CS_NEAR, "NEAR"}, 8 | {CS_SHORT, "SHORT"}, 9 | // {FAR, "FAR"}, 10 | {CS_BYTE, "BYTE"}, 11 | {CS_WORD, "WORD"}, 12 | {CS_DWORD, "DWORD"}, 13 | {CS_QWORD, "QWORD"}, 14 | {CS_MMWORD, "MMWORD"}, 15 | {CS_XMMWORD, "XMMWORD"}, 16 | }; 17 | 18 | enum CSpecifierType cspecifier_scan(const char *string) 19 | { 20 | if (string) { 21 | int i; 22 | for(i = 0; i < sizeof(cspecifier_set) / sizeof(CSpecifier); i++) { 23 | if(cstring_strcmp(string, cspecifier_set[i].notation, 1) == 0) { 24 | return cspecifier_set[i].type; 25 | } 26 | } 27 | } 28 | return CS_UNKNOWN; 29 | } 30 | 31 | 32 | int coperand_is_subtype_of(const COperand *self, enum COperandType baseType) 33 | { 34 | return (self->type & baseType) == self->type; 35 | } 36 | 37 | int coperand_type_is_void(enum COperandType type) 38 | { 39 | return type == O_VOID; 40 | } 41 | 42 | int coperand_type_is_imm(enum COperandType type) 43 | { 44 | return (type & O_IMM) == type; 45 | } 46 | 47 | int coperand_type_is_reg(enum COperandType type) 48 | { 49 | return (type & O_REG) == type; 50 | } 51 | 52 | int coperand_type_is_mem(enum COperandType type) 53 | { 54 | return (type & O_MEM) == type; 55 | } 56 | 57 | int coperand_type_is_R_M(enum COperandType type) 58 | { 59 | return (type & O_R_M) == type; 60 | } 61 | 62 | int coperand_is_void(const COperand *operand) 63 | { 64 | return coperand_type_is_void(operand->type); 65 | } 66 | 67 | int coperand_is_imm(const COperand *operand) 68 | { 69 | return coperand_type_is_imm(operand->type); 70 | } 71 | 72 | int coperand_is_reg(const COperand *operand) 73 | { 74 | return coperand_type_is_reg(operand->type); 75 | } 76 | 77 | int coperand_is_mem(const COperand *operand) 78 | { 79 | return coperand_type_is_mem(operand->type); 80 | } 81 | 82 | int coperand_is_R_M(const COperand *operand) 83 | { 84 | return coperand_type_is_R_M(operand->type); 85 | } 86 | 87 | const COperand cregister_set[] = 88 | { 89 | {O_VOID, ""}, 90 | 91 | {O_AL, "AL", { 0 } }, 92 | {O_CL, "CL", { 1 } }, 93 | {O_REG8, "DL", { 2 } }, 94 | {O_REG8, "BL", { 3 } }, 95 | {O_REG8, "AH", { 4 } }, 96 | {O_REG8, "CH", { 5 } }, 97 | {O_REG8, "DH", { 6 } }, 98 | {O_REG8, "BH", { 7 } }, 99 | 100 | {O_AX, "AX", { 0 } }, 101 | {O_CX, "CX", { 1 } }, 102 | {O_DX, "DX", { 2 } }, 103 | {O_REG16, "BX", { 3 } }, 104 | {O_REG16, "SP", { 4 } }, 105 | {O_REG16, "BP", { 5 } }, 106 | {O_REG16, "SI", { 6 } }, 107 | {O_REG16, "DI", { 7 } }, 108 | 109 | {O_EAX, "EAX", { 0 } }, 110 | {O_ECX, "ECX", { 1 } }, 111 | {O_REG32, "EDX", { 2 } }, 112 | {O_REG32, "EBX", { 3 } }, 113 | {O_REG32, "ESP", { 4 } }, 114 | {O_REG32, "EBP", { 5 } }, 115 | {O_REG32, "ESI", { 6 } }, 116 | {O_REG32, "EDI", { 7 } }, 117 | 118 | {O_ES, "ES", { 0 } }, 119 | {O_CS, "CS", { 1 } }, 120 | {O_SS, "SS", { 2 } }, 121 | {O_DS, "DS", { 3 } }, 122 | {O_FS, "FS", { 4 } }, 123 | {O_GS, "GS", { 5 } }, 124 | 125 | {O_ST0, "ST0", { 0 } }, 126 | {O_FPUREG, "ST1", { 1 } }, 127 | {O_FPUREG, "ST2", { 2 } }, 128 | {O_FPUREG, "ST3", { 3 } }, 129 | {O_FPUREG, "ST4", { 4 } }, 130 | {O_FPUREG, "ST5", { 5 } }, 131 | {O_FPUREG, "ST6", { 6 } }, 132 | {O_FPUREG, "ST7", { 7 } }, 133 | 134 | {O_MMREG, "MM0", { 0 } }, 135 | {O_MMREG, "MM1", { 1 } }, 136 | {O_MMREG, "MM2", { 2 } }, 137 | {O_MMREG, "MM3", { 3 } }, 138 | {O_MMREG, "MM4", { 4 } }, 139 | {O_MMREG, "MM5", { 5 } }, 140 | {O_MMREG, "MM6", { 6 } }, 141 | {O_MMREG, "MM7", { 7 } }, 142 | 143 | {O_XMMREG, "XMM0", { 0 } }, 144 | {O_XMMREG, "XMM1", { 1 } }, 145 | {O_XMMREG, "XMM2", { 2 } }, 146 | {O_XMMREG, "XMM3", { 3 } }, 147 | {O_XMMREG, "XMM4", { 4 } }, 148 | {O_XMMREG, "XMM5", { 5 } }, 149 | {O_XMMREG, "XMM6", { 6 } }, 150 | {O_XMMREG, "XMM7", { 7 } } 151 | }; 152 | 153 | const COperand csyntax_set[] = 154 | { 155 | {O_VOID, ""}, 156 | 157 | {O_ONE, "1"}, 158 | {O_IMM, "imm"}, 159 | {O_IMM8, "imm8"}, 160 | {O_IMM16, "imm16"}, 161 | {O_IMM32, "imm32"}, 162 | 163 | {O_AL, "AL"}, 164 | {O_AX, "AX"}, 165 | {O_EAX, "EAX"}, 166 | {O_DX, "DX"}, 167 | {O_CL, "CL"}, 168 | {O_CX, "CX"}, 169 | {O_ECX, "ECX"}, 170 | {O_CS, "CS"}, 171 | {O_DS, "DS"}, 172 | {O_ES, "ES"}, 173 | {O_SS, "SS"}, 174 | {O_FS, "FS"}, 175 | {O_GS, "GS"}, 176 | {O_ST0, "ST0"}, 177 | 178 | {O_REG8, "reg8"}, 179 | {O_REG16, "reg16"}, 180 | {O_REG32, "reg32"}, 181 | {O_SEGREG, "segreg"}, 182 | {O_FPUREG, "fpureg"}, 183 | {O_CR, "CR0/2/3/4"}, 184 | {O_DR, "DR0/1/2/3/6/7"}, 185 | {O_TR, "TR3/4/5/6/7"}, 186 | {O_MMREG, "mmreg"}, 187 | {O_XMMREG, "xmmreg"}, 188 | 189 | {O_MEM, "mem"}, 190 | {O_MEM8, "mem8"}, 191 | {O_MEM16, "mem16"}, 192 | {O_MEM32, "mem32"}, 193 | {O_MEM64, "mem64"}, 194 | {O_MEM80, "mem80"}, 195 | {O_MEM128, "mem128"}, 196 | 197 | {O_R_M8, "r/m8"}, 198 | {O_R_M16, "r/m16"}, 199 | {O_R_M32, "r/m32"}, 200 | {O_R_M64, "r/m64"}, 201 | {O_R_M128, "r/m128"}, 202 | 203 | {O_XMM32, "xmmreg/mem32"}, 204 | {O_XMM32, "xmmreg/mem64"}, 205 | {O_M512B, "m512byte"}, 206 | {O_MOFF8, "memoffs8"}, 207 | {O_MOFF16, "memoffs16"}, 208 | {O_MOFF32, "memoffs32"} 209 | }; 210 | 211 | const COperand CINIT = { O_VOID }; 212 | const COperand CNOT_FOUND = { O_UNKNOWN }; 213 | 214 | COperand coperand_scan_reg(const char *string) 215 | { 216 | if (string) { 217 | size_t i; 218 | for (i = 0; i < sizeof(cregister_set) / sizeof(COperand); i++) { 219 | if (cstring_strcmp(string, cregister_set[i].notation, 1) == 0) { 220 | return cregister_set[i]; 221 | } 222 | } 223 | } 224 | return CNOT_FOUND; 225 | } 226 | 227 | enum COperandType coperand_scan_syntax(const char *string) 228 | { 229 | if (string) { 230 | size_t i; 231 | for (i = 0; i < sizeof(csyntax_set) / sizeof(COperand); i++) { 232 | if (cstring_strcmp(string, csyntax_set[i].notation, 1) == 0) { 233 | return csyntax_set[i].type; 234 | } 235 | } 236 | } 237 | return O_UNKNOWN; 238 | } 239 | 240 | 241 | //--------------------------------------------------------------------- 242 | // string operation 243 | //--------------------------------------------------------------------- 244 | char *cstring_strip(char *str) 245 | { 246 | int size = (int)strlen(str); 247 | char *p = str; 248 | int i; 249 | while (size > 0) { 250 | if (!isspace(str[size - 1])) break; 251 | size--; 252 | } 253 | str[size] = '\0'; 254 | while (p[0]) { 255 | if (!isspace(p[0])) break; 256 | p++; 257 | } 258 | if (p == str) return str; 259 | for (i = 0; p[i]; i++) str[i] = p[i]; 260 | str[i] = '\0'; 261 | return str; 262 | } 263 | 264 | int cstring_strcmp(const char *s1, const char *s2, int caseoff) 265 | { 266 | const char *p1, *p2, *p3, *p4; 267 | int k1, k2, i; 268 | for (p1 = s1; isspace(*p1); p1++); 269 | for (p2 = s2; isspace(*p2); p2++); 270 | for (k1 = (int)strlen(p1); k1 > 0; k1--) if (!isspace(p1[k1 - 1])) break; 271 | for (k2 = (int)strlen(p2); k2 > 0; k2--) if (!isspace(p2[k2 - 1])) break; 272 | p3 = p1 + k1; 273 | p4 = p2 + k2; 274 | for (i = 0; i < k1 && i < k2; i++) { 275 | char c1 = p1[i]; 276 | char c2 = p2[i]; 277 | if (caseoff && c1 >= 'a' && c1 <= 'z') c1 -= 'a' - 'A'; 278 | if (caseoff && c2 >= 'a' && c2 <= 'z') c2 -= 'a' - 'A'; 279 | if (c1 < c2) return -1; 280 | if (c1 > c2) return 1; 281 | } 282 | if (k1 < k2) return -1; 283 | if (k1 > k2) return 1; 284 | return 0; 285 | } 286 | 287 | char *cstring_strsep(char **stringp, const char *delim) 288 | { 289 | register char *s; 290 | register const char *spanp; 291 | register int c, sc; 292 | char *tok; 293 | 294 | if ((s = *stringp) == NULL) 295 | return (NULL); 296 | for (tok = s;;) { 297 | c = *s++; 298 | spanp = delim; 299 | do { 300 | if ((sc = *spanp++) == c) { 301 | if (c == 0) s = NULL; 302 | else s[-1] = 0; 303 | *stringp = s; 304 | return tok; 305 | } 306 | } while (sc != 0); 307 | } 308 | } 309 | 310 | 311 | 312 | -------------------------------------------------------------------------------- /source/ckeywords.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // ckeywords.h - 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #ifndef __CKEYWORDS_H__ 10 | #define __CKEYWORDS_H__ 11 | 12 | #include "cencoding.h" 13 | 14 | //--------------------------------------------------------------------- 15 | // CSpecifierType 16 | //--------------------------------------------------------------------- 17 | enum CSpecifierType 18 | { 19 | CS_UNKNOWN = 0, 20 | CS_NEAR, 21 | CS_SHORT = CS_NEAR, 22 | // FAR, 23 | CS_BYTE, 24 | CS_WORD, 25 | CS_DWORD, 26 | CS_QWORD, 27 | CS_MMWORD = CS_QWORD, 28 | CS_XMMWORD 29 | }; 30 | 31 | //--------------------------------------------------------------------- 32 | // CSpecifier 33 | //--------------------------------------------------------------------- 34 | struct CSpecifier 35 | { 36 | enum CSpecifierType type; 37 | const char *notation; 38 | }; 39 | 40 | typedef struct CSpecifier CSpecifier; 41 | 42 | 43 | //--------------------------------------------------------------------- 44 | // COperandType 45 | //--------------------------------------------------------------------- 46 | enum COperandType 47 | { 48 | O_UNKNOWN = 0, 49 | 50 | O_VOID = 0x00000001, 51 | 52 | O_ONE = 0x00000002, 53 | O_IMM8 = 0x00000004 | O_ONE, 54 | O_IMM16 = 0x00000008 | O_IMM8 | O_ONE, 55 | O_IMM32 = 0x00000010 | O_IMM16 | O_IMM8 | O_ONE, 56 | O_IMM = O_IMM32 | O_IMM16 | O_IMM8 | O_ONE, 57 | 58 | O_AL = 0x00000020, 59 | O_CL = 0x00000040, 60 | O_REG8 = O_CL | O_AL, 61 | 62 | O_AX = 0x00000080, 63 | O_DX = 0x00000100, 64 | O_CX = 0x00000200, 65 | O_REG16 = O_CX | O_DX | O_AX, 66 | 67 | O_EAX = 0x00000400, 68 | O_ECX = 0x00000800, 69 | O_REG32 = O_ECX | O_EAX, 70 | 71 | // No need to touch these in 32-bit protected mode 72 | O_CS = O_UNKNOWN, 73 | O_DS = O_UNKNOWN, 74 | O_ES = O_UNKNOWN, 75 | O_SS = O_UNKNOWN, 76 | O_FS = O_UNKNOWN, 77 | O_GS = O_UNKNOWN, 78 | O_SEGREG = O_GS | O_FS | O_SS | O_ES | O_DS | O_CS, 79 | 80 | O_ST0 = 0x00001000, 81 | O_FPUREG = 0x00002000 | O_ST0, 82 | 83 | // You won't need these in a JIT assembler 84 | O_CR = O_UNKNOWN, 85 | O_DR = O_UNKNOWN, 86 | O_TR = O_UNKNOWN, 87 | 88 | O_MMREG = 0x00004000, 89 | O_XMMREG = 0x00008000, 90 | 91 | O_REG = O_XMMREG | O_MMREG | O_TR | O_DR | O_CR | O_FPUREG | 92 | O_SEGREG | O_REG32 | O_REG16 | O_REG8, 93 | O_MEM8 = 0x00010000, 94 | O_MEM16 = 0x00020000, 95 | O_MEM32 = 0x00040000, 96 | O_MEM64 = 0x00080000, 97 | O_MEM80 = O_UNKNOWN, // Extended double not supported by NT 98 | O_MEM128 = 0x00100000, 99 | O_M512B = O_UNKNOWN, // Only for state save/restore instructions 100 | O_MEM = O_M512B | O_MEM128 | O_MEM80 | O_MEM64 | O_MEM32 | 101 | O_MEM16 | O_MEM8, 102 | 103 | O_XMM32 = O_MEM32 | O_XMMREG, 104 | O_XMM64 = O_MEM64 | O_XMMREG, 105 | 106 | O_R_M8 = O_MEM8 | O_REG8, 107 | O_R_M16 = O_MEM16 | O_REG16, 108 | O_R_M32 = O_MEM32 | O_REG32, 109 | O_R_M64 = O_MEM64 | O_MMREG, 110 | O_R_M128 = O_MEM128 | O_XMMREG, 111 | O_R_M = O_MEM | O_REG, 112 | 113 | O_MOFF8 = O_UNKNOWN, // Not supported 114 | O_MOFF16 = O_UNKNOWN, // Not supported 115 | O_MOFF32 = O_UNKNOWN // Not supported 116 | }; 117 | 118 | 119 | //--------------------------------------------------------------------- 120 | // COperand 121 | //--------------------------------------------------------------------- 122 | struct COperand 123 | { 124 | enum COperandType type; 125 | const char *notation; 126 | union 127 | { 128 | cint32 value; // For immediates 129 | enum CRegID reg; // For registers 130 | }; 131 | }; 132 | 133 | typedef struct COperand COperand; 134 | 135 | 136 | #ifdef __cplusplus 137 | extern "C" { 138 | #endif 139 | 140 | //--------------------------------------------------------------------- 141 | // interface 142 | //--------------------------------------------------------------------- 143 | extern const CSpecifier cspecifier_set[]; 144 | extern const COperand cregister_set[]; 145 | extern const COperand csyntax_set[]; 146 | extern const COperand CINIT; 147 | extern const COperand CNOT_FOUND; 148 | 149 | enum CSpecifierType cspecifier_scan(const char *string); 150 | 151 | int coperand_is_subtype_of(const COperand *self, enum COperandType baseType); 152 | 153 | int coperand_type_is_void(enum COperandType type); 154 | int coperand_type_is_imm(enum COperandType type); 155 | int coperand_type_is_reg(enum COperandType type); 156 | int coperand_type_is_mem(enum COperandType type); 157 | int coperand_type_is_R_M(enum COperandType type); 158 | 159 | int coperand_is_void(const COperand *operand); 160 | int coperand_is_imm(const COperand *operand); 161 | int coperand_is_reg(const COperand *operand); 162 | int coperand_is_mem(const COperand *operand); 163 | int coperand_is_R_M(const COperand *operand); 164 | 165 | COperand coperand_scan_reg(const char *string); 166 | enum COperandType coperand_scan_syntax(const char *string); 167 | 168 | 169 | char *cstring_strip(char *str); 170 | int cstring_strcmp(const char *s1, const char *s2, int caseoff); 171 | char *cstring_strsep(char **stringp, const char *delim); 172 | 173 | 174 | #ifdef __cplusplus 175 | } 176 | #endif 177 | 178 | 179 | #endif 180 | 181 | 182 | /* 183 | Encoding syntax: 184 | ---------------- 185 | +r Add register value to opcode 186 | /# Value for Mod R/M register field encoding 187 | /r Effective address encoding 188 | ib Byte immediate 189 | iw Word immediate 190 | id Dword immediate 191 | -b Byte relative address 192 | -i Word or dword relative address 193 | p0 LOCK instruction prefix (F0h) 194 | p2 REPNE/REPNZ instruction prefix (F2h) 195 | p3 REP/REPE/REPZ instruction prefix (F3h) (also SSE prefix) 196 | po Offset override prefix (66h) 197 | pa Address override prefix (67h) 198 | 199 | {"JMP", "imm", "E9 -i", CT_CPU_8086}, 200 | {"JMP", "SHORT imm", "EB -b", CT_CPU_8086}, 201 | // {"JMP", "imm:imm16", "po EA iw iw", CT_CPU_8086}, 202 | // {"JMP", "imm:imm32", "po EA id iw", CT_CPU_386}, 203 | {"JMP", "mem", "po FF /5", CT_CPU_8086}, 204 | // {"JMP", "FAR mem", "po FF /5", CT_CPU_386}, 205 | {"JMP", "WORD r/m16", "po FF /4", CT_CPU_8086}, 206 | {"JMP", "DWORD r/m32", "po FF /4", CT_CPU_386}, 207 | {"MOV", "r/m8,reg8", "88 /r", CT_CPU_8086}, 208 | {"MOV", "r/m16,reg16", "po 89 /r", CT_CPU_8086}, 209 | {"MOV", "r/m32,reg32", "po 89 /r", CT_CPU_386}, 210 | {"MOV", "reg8,r/m8", "8A /r", CT_CPU_8086}, 211 | {"MOV", "reg16,r/m16", "po 8B /r", CT_CPU_8086}, 212 | {"MOV", "reg32,r/m32", "po 8B /r", CT_CPU_386}, 213 | {"MOV", "reg8,imm8", "B0 +r ib", CT_CPU_8086}, 214 | {"MOV", "reg16,imm16", "po B8 +r iw", CT_CPU_8086}, 215 | {"MOV", "reg32,imm32", "po B8 +r id", CT_CPU_386}, 216 | {"MOV", "r/m8,imm8", "C6 /0 ib", CT_CPU_8086}, 217 | {"MOV", "r/m16,imm16", "po C7 /0 iw", CT_CPU_8086}, 218 | {"MOV", "r/m32,imm32", "po C7 /0 id", CT_CPU_386}, 219 | */ 220 | 221 | -------------------------------------------------------------------------------- /source/cloader.c: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cloader.c - source loader 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #include "cloader.h" 10 | 11 | //--------------------------------------------------------------------- 12 | // CLink interface 13 | //--------------------------------------------------------------------- 14 | static CLink *clink_create(const CEncoding *encoding) 15 | { 16 | CLink *link; 17 | 18 | link = (CLink*)malloc(sizeof(CLink)); 19 | assert(link); 20 | 21 | iqueue_init(&link->head); 22 | cencoding_new_copy(&link->encoding, encoding); 23 | link->offset = 0; 24 | link->size = 0; 25 | 26 | return link; 27 | } 28 | 29 | static void clink_release(CLink *link) 30 | { 31 | assert(link); 32 | cencoding_destroy(&link->encoding); 33 | } 34 | 35 | 36 | //--------------------------------------------------------------------- 37 | // CLoader interface 38 | //--------------------------------------------------------------------- 39 | CLoader *cloader_create(void) 40 | { 41 | CLoader *loader; 42 | loader = (CLoader*)malloc(sizeof(CLoader)); 43 | assert(loader); 44 | iqueue_init(&loader->head); 45 | loader->error = (char*)malloc(1024); 46 | assert(loader->error); 47 | loader->error[0] = 0; 48 | loader->errcode = 0; 49 | loader->linear = 0; 50 | loader->output = NULL; 51 | loader->lineno = 0; 52 | return loader; 53 | } 54 | 55 | void cloader_reset(CLoader *loader) 56 | { 57 | assert(loader); 58 | while (!iqueue_is_empty(&loader->head)) { 59 | CLink *link = iqueue_entry(loader->head.next, CLink, head); 60 | iqueue_del(&link->head); 61 | clink_release(link); 62 | } 63 | loader->error[0] = 0; 64 | loader->errcode = 0; 65 | loader->linear = 0; 66 | loader->output = NULL; 67 | loader->lineno = 0; 68 | } 69 | 70 | void cloader_release(CLoader *loader) 71 | { 72 | assert(loader); 73 | cloader_reset(loader); 74 | if (loader->error) { 75 | free(loader->error); 76 | loader->error = NULL; 77 | } 78 | } 79 | 80 | int cloader_new_encoding(CLoader *loader, const CEncoding *encoding) 81 | { 82 | CLink *link; 83 | link = clink_create(encoding); 84 | assert(link); 85 | link->lineno = ++loader->lineno; 86 | iqueue_add_tail(&link->head, &loader->head); 87 | return 0; 88 | } 89 | 90 | int cloader_get_codesize(CLoader *loader) 91 | { 92 | struct IQUEUEHEAD *p; 93 | int size = 0; 94 | assert(loader); 95 | for (p = loader->head.next; p != &loader->head; p = p->next) { 96 | CLink *link = iqueue_entry(p, CLink, head); 97 | size += cencoding_length(&link->encoding); 98 | } 99 | return size; 100 | } 101 | 102 | unsigned long cloader_resolve_label(CLoader *loader, const char *label) 103 | { 104 | struct IQUEUEHEAD *p; 105 | for (p = loader->head.next; p != &loader->head; p = p->next) { 106 | CLink *link = iqueue_entry(p, CLink, head); 107 | CEncoding *encoding = &link->encoding; 108 | if (cencoding_get_label(encoding)) { 109 | if (strcmp(encoding->label, label) == 0) { 110 | return (long)link->offset; 111 | } 112 | } 113 | } 114 | return 0; 115 | } 116 | 117 | int cloader_output(CLoader *loader, unsigned char *output) 118 | { 119 | struct IQUEUEHEAD *p; 120 | assert(loader); 121 | 122 | loader->output = output; 123 | loader->linear = (cuint32)output; 124 | 125 | // encoding instructions 126 | for (p = loader->head.next; p != &loader->head; p = p->next) { 127 | CLink *link = iqueue_entry(p, CLink, head); 128 | CEncoding *encoding = &link->encoding; 129 | int size; 130 | size = cencoding_write_code(encoding, loader->output); 131 | link->offset = loader->linear; 132 | link->size = size; 133 | loader->linear += size; 134 | loader->output += size; 135 | } 136 | 137 | // resolve labels 138 | for (p = loader->head.next; p != &loader->head; p = p->next) { 139 | CLink *link = iqueue_entry(p, CLink, head); 140 | CEncoding *encoding = &link->encoding; 141 | unsigned char *offset = (unsigned char*)link->offset; 142 | if (cencoding_get_reference(encoding)) { 143 | const char *label = cencoding_get_reference(encoding); 144 | long linear = cloader_resolve_label(loader, label); 145 | if (linear == 0) { 146 | strncpy(loader->error, "not find label: ", 40); 147 | strncat(loader->error, label, 100); 148 | loader->errcode = link->lineno; 149 | return -1; 150 | } 151 | if (encoding->relative == 0) { 152 | cencoding_set_immediate(encoding, linear); 153 | } 154 | else { 155 | long diff = linear - (link->offset + link->size); 156 | cencoding_set_jump_offset(encoding, diff); 157 | } 158 | cencoding_write_code(encoding, offset); 159 | } 160 | } 161 | 162 | return 0; 163 | } 164 | 165 | 166 | void cloader_print(const CLoader *loader) 167 | { 168 | struct IQUEUEHEAD *p; 169 | static char line[400]; 170 | for (p = loader->head.next; p != &loader->head; p = p->next) { 171 | CLink *link = iqueue_entry(p, CLink, head); 172 | CEncoding *encoding = &link->encoding; 173 | cencoding_to_string(encoding, line); 174 | printf("%s\n", line); 175 | } 176 | } 177 | 178 | 179 | -------------------------------------------------------------------------------- /source/cloader.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cloader.h - source loader 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #ifndef __CLOADER_H__ 10 | #define __CLOADER_H__ 11 | 12 | #include "cencoding.h" 13 | 14 | 15 | /*====================================================================*/ 16 | /* QUEUE DEFINITION */ 17 | /*====================================================================*/ 18 | #ifndef __IQUEUE_DEF__ 19 | #define __IQUEUE_DEF__ 20 | 21 | struct IQUEUEHEAD { 22 | struct IQUEUEHEAD *next, *prev; 23 | }; 24 | 25 | typedef struct IQUEUEHEAD iqueue_head; 26 | 27 | 28 | /*--------------------------------------------------------------------*/ 29 | /* queue init */ 30 | /*--------------------------------------------------------------------*/ 31 | #define IQUEUE_HEAD_INIT(name) { &(name), &(name) } 32 | #define IQUEUE_HEAD(name) \ 33 | struct IQUEUEHEAD name = IQUEUE_HEAD_INIT(name) 34 | 35 | #define IQUEUE_INIT(ptr) ( \ 36 | (ptr)->next = (ptr), (ptr)->prev = (ptr)) 37 | 38 | #define IOFFSETOF(TYPE, MEMBER) ((unsigned long) &((TYPE *)0)->MEMBER) 39 | 40 | #define ICONTAINEROF(ptr, type, member) ( \ 41 | (type*)( ((char*)((type*)ptr)) - IOFFSETOF(type, member)) ) 42 | 43 | #define IQUEUE_ENTRY(ptr, type, member) ICONTAINEROF(ptr, type, member) 44 | 45 | 46 | /*--------------------------------------------------------------------*/ 47 | /* queue operation */ 48 | /*--------------------------------------------------------------------*/ 49 | #define IQUEUE_ADD(node, head) ( \ 50 | (node)->prev = (head), (node)->next = (head)->next, \ 51 | (head)->next->prev = (node), (head)->next = (node)) 52 | 53 | #define IQUEUE_ADD_TAIL(node, head) ( \ 54 | (node)->prev = (head)->prev, (node)->next = (head), \ 55 | (head)->prev->next = (node), (head)->prev = (node)) 56 | 57 | #define IQUEUE_DEL_BETWEEN(p, n) ((n)->prev = (p), (p)->next = (n)) 58 | 59 | #define IQUEUE_DEL(entry) (\ 60 | (entry)->next->prev = (entry)->prev, \ 61 | (entry)->prev->next = (entry)->next, \ 62 | (entry)->next = 0, (entry)->prev = 0) 63 | 64 | #define IQUEUE_DEL_INIT(entry) do { \ 65 | IQUEUE_DEL(entry); IQUEUE_INIT(entry); } while (0) 66 | 67 | #define IQUEUE_IS_EMPTY(entry) ((entry) == (entry)->next) 68 | 69 | #define iqueue_init IQUEUE_INIT 70 | #define iqueue_entry IQUEUE_ENTRY 71 | #define iqueue_add IQUEUE_ADD 72 | #define iqueue_add_tail IQUEUE_ADD_TAIL 73 | #define iqueue_del IQUEUE_DEL 74 | #define iqueue_del_init IQUEUE_DEL_INIT 75 | #define iqueue_is_empty IQUEUE_IS_EMPTY 76 | 77 | #define IQUEUE_FOREACH(iterator, head, TYPE, MEMBER) \ 78 | for ((iterator) = iqueue_entry((head)->next, TYPE, MEMBER); \ 79 | &((iterator)->MEMBER) != (head); \ 80 | (iterator) = iqueue_entry((iterator)->MEMBER.next, TYPE, MEMBER)) 81 | 82 | #define iqueue_foreach(iterator, head, TYPE, MEMBER) \ 83 | IQUEUE_FOREACH(iterator, head, TYPE, MEMBER) 84 | 85 | #define iqueue_foreach_entry(pos, head) \ 86 | for( (pos) = (head)->next; (pos) != (head) ; (pos) = (pos)->next ) 87 | 88 | 89 | #define __iqueue_splice(list, head) do { \ 90 | iqueue_head *first = (list)->next, *last = (list)->prev; \ 91 | iqueue_head *at = (head)->next; \ 92 | (first)->prev = (head), (head)->next = (first); \ 93 | (last)->next = (at), (at)->prev = (last); } while (0) 94 | 95 | #define iqueue_splice(list, head) do { \ 96 | if (!iqueue_is_empty(list)) __iqueue_splice(list, head); } while (0) 97 | 98 | #define iqueue_splice_init(list, head) do { \ 99 | iqueue_splice(list, head); iqueue_init(list); } while (0) 100 | 101 | 102 | #ifdef _MSC_VER 103 | #pragma warning(disable:4311) 104 | #pragma warning(disable:4312) 105 | #pragma warning(disable:4996) 106 | #endif 107 | 108 | #endif 109 | 110 | 111 | //--------------------------------------------------------------------- 112 | // CLink 113 | //--------------------------------------------------------------------- 114 | struct CLink 115 | { 116 | struct IQUEUEHEAD head; 117 | CEncoding encoding; 118 | unsigned long offset; 119 | int size; 120 | int lineno; 121 | }; 122 | 123 | typedef struct CLink CLink; 124 | 125 | 126 | //--------------------------------------------------------------------- 127 | // CLoader Structure 128 | //--------------------------------------------------------------------- 129 | struct CLoader 130 | { 131 | struct IQUEUEHEAD head; // link head; 132 | char *error; 133 | int errcode; 134 | int lineno; 135 | unsigned long linear; 136 | unsigned char *output; 137 | }; 138 | 139 | typedef struct CLoader CLoader; 140 | 141 | #ifdef __cplusplus 142 | extern "C" { 143 | #endif 144 | //--------------------------------------------------------------------- 145 | // CLoader interface 146 | //--------------------------------------------------------------------- 147 | CLoader *cloader_create(void); 148 | 149 | void cloader_release(CLoader *loader); 150 | 151 | void cloader_reset(CLoader *loader); 152 | 153 | int cloader_new_encoding(CLoader *loader, const CEncoding *encoding); 154 | 155 | int cloader_get_codesize(CLoader *loader); 156 | 157 | int cloader_output(CLoader *loader, unsigned char *output); 158 | 159 | void cloader_print(const CLoader *loader); 160 | 161 | 162 | #ifdef __cplusplus 163 | } 164 | #endif 165 | 166 | #endif 167 | 168 | 169 | -------------------------------------------------------------------------------- /source/cparser.c: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cparser.c - source parser 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #include "cparser.h" 10 | 11 | #ifdef _MSC_VER 12 | #pragma warning(disable: 4996) 13 | #endif 14 | 15 | #define IMAX_DATA 65536 16 | 17 | CParser *cparser_create(void) 18 | { 19 | CParser *parser; 20 | parser = (CParser*)malloc(sizeof(CParser)); 21 | assert(parser); 22 | parser->token = cscanner_create(); 23 | assert(parser->token); 24 | parser->instruction = NULL; 25 | parser->instructionset = cinstset_create(); 26 | csynth_init(&parser->synthesizer); 27 | parser->data = (char*)malloc(IMAX_DATA); 28 | assert(parser->data); 29 | parser->error = (char*)malloc(1024); 30 | assert(parser->error); 31 | parser->error[0] = 0; 32 | parser->errcode = 0; 33 | parser->vars = NULL; 34 | parser->inproc = 0; 35 | parser->stack = 0; 36 | return parser; 37 | } 38 | 39 | void cparser_release(CParser *parser) 40 | { 41 | assert(parser); 42 | if (parser->token) { 43 | cscanner_release(parser->token); 44 | parser->token = NULL; 45 | } 46 | if (parser->instructionset) { 47 | cinstset_release(parser->instructionset); 48 | parser->instructionset = NULL; 49 | } 50 | if (parser->error) { 51 | free(parser->error); 52 | parser->error = NULL; 53 | } 54 | if (parser->data) { 55 | free(parser->data); 56 | parser->data = NULL; 57 | } 58 | while (parser->vars) { 59 | CVariable *var = parser->vars; 60 | parser->vars = parser->vars->next; 61 | free(var->name); 62 | free(var); 63 | } 64 | csynth_destroy(&parser->synthesizer); 65 | free(parser); 66 | } 67 | 68 | void cparser_reset(CParser *parser) 69 | { 70 | cscanner_macro_reset(parser->token); 71 | while (parser->vars) { 72 | CVariable *var = parser->vars; 73 | parser->vars = parser->vars->next; 74 | free(var->name); 75 | free(var); 76 | } 77 | parser->inproc = 0; 78 | parser->stack = 0; 79 | } 80 | 81 | static int cparser_parse_label(CParser *parser); 82 | static int cparser_parse_mnemonic(CParser *parser); 83 | static int cparser_parse_specifier(CParser *parser); 84 | static int cparser_parse_first_operand(CParser *parser); 85 | static int cparser_parse_second_operand(CParser *parser); 86 | static int cparser_parse_third_operand(CParser *parser); 87 | static COperand cparser_parse_immediate(CParser *parser); 88 | static COperand cparser_parse_register(CParser *parser); 89 | static COperand cparser_parse_memory_reference(CParser *parser); 90 | 91 | static int cparser_parse_data(CParser *parser); 92 | static int cparser_parse_align(CParser *parser); 93 | static int cparser_parse_prefix(CParser *parser); 94 | 95 | static int cparser_parse_proc(CParser *parser); 96 | 97 | static void cparser_error(CParser *parser, const char *error, int code) 98 | { 99 | strncpy(parser->error, error, 100); 100 | parser->errcode = code; 101 | } 102 | 103 | const CEncoding *cparser_parse_line(CParser *parser, const char *source) 104 | { 105 | int retval; 106 | 107 | if (source == NULL) { 108 | cparser_error(parser, "empty source line", 1); 109 | return NULL; 110 | } 111 | 112 | retval = cscanner_set_source(parser->token, source); 113 | 114 | if (retval != 0) { 115 | cparser_error(parser, parser->token->error, 2); 116 | return NULL; 117 | } 118 | 119 | parser->instruction = NULL; 120 | csynth_reset(&parser->synthesizer); 121 | 122 | parser->error[0] = 0; 123 | parser->errcode = 0; 124 | 125 | // parse label 126 | if (!cscanner_is_endl(parser->token)) { 127 | if (cparser_parse_label(parser)) { 128 | cparser_error(parser, "label error", 3); 129 | return NULL; 130 | } 131 | } 132 | 133 | // parse inline data 134 | if (!cscanner_is_endl(parser->token)) { 135 | if (cparser_parse_data(parser)) { 136 | return NULL; 137 | } 138 | if (parser->synthesizer.encoding.data != NULL) { 139 | return &parser->synthesizer.encoding; 140 | } 141 | } 142 | 143 | // parse align 144 | if (!cscanner_is_endl(parser->token)) { 145 | if (cparser_parse_align(parser)) { 146 | return NULL; 147 | } 148 | if (parser->synthesizer.encoding.align != 0) { 149 | return &parser->synthesizer.encoding; 150 | } 151 | } 152 | 153 | // parse proc 154 | if (!cscanner_is_endl(parser->token)) { 155 | if (cparser_parse_proc(parser)) { 156 | return NULL; 157 | } 158 | } 159 | 160 | // parse repnz, repz 161 | if (!cscanner_is_endl(parser->token)) { 162 | if (cparser_parse_prefix(parser)) { 163 | return NULL; 164 | } 165 | } 166 | 167 | // parse mnemonic 168 | if (!cscanner_is_endl(parser->token)) { 169 | if (cparser_parse_mnemonic(parser)) { 170 | cparser_error(parser, "mnemonic syntax error", 4); 171 | return NULL; 172 | } 173 | 174 | if (!parser->instruction) { 175 | cparser_error(parser, "mnemonic error", 5); 176 | return NULL; 177 | } 178 | 179 | if (cparser_parse_first_operand(parser)) { 180 | if (parser->errcode == 0) 181 | cparser_error(parser, "first operand error", 6); 182 | return NULL; 183 | } 184 | 185 | if (cparser_parse_second_operand(parser)) { 186 | if (parser->errcode == 0) 187 | cparser_error(parser, "second operand error", 7); 188 | return NULL; 189 | } 190 | 191 | if (cparser_parse_third_operand(parser)) { 192 | if (parser->errcode == 0) 193 | cparser_error(parser, "third operand error", 8); 194 | return NULL; 195 | } 196 | } 197 | 198 | if (parser->instruction) { 199 | do { 200 | if (cinst_match_syntax(parser->instruction)) { 201 | break; 202 | } 203 | parser->instruction = parser->instruction->next; 204 | } while (parser->instruction); 205 | 206 | if (parser->instruction == NULL) { 207 | cparser_error(parser, "operands mismatch", 9); 208 | return NULL; 209 | } 210 | #if 0 211 | printf("%s (%s) (%s) specifier=%d\n", 212 | parser->instruction->syntax->mnemonic, 213 | parser->instruction->syntax->operands, 214 | parser->instruction->syntax->encoding, 215 | parser->instruction->specifier); 216 | #endif 217 | } 218 | 219 | return csynth_encode_instruction(&parser->synthesizer, 220 | parser->instruction); 221 | } 222 | 223 | 224 | static int cparser_parse_label(CParser *parser) 225 | { 226 | const CTOKEN *current = cscanner_token_current(parser->token); 227 | const CTOKEN *next = cscanner_token_lookahead(parser->token); 228 | if (ctoken_is_ident(current) && ctoken_get_char(next) == ':') { 229 | csynth_define_label(&parser->synthesizer, current->str); 230 | cscanner_token_advance(parser->token, 2); 231 | } 232 | else if (ctoken_get_char(current) == '.' && ctoken_is_ident(next)) { 233 | csynth_define_label(&parser->synthesizer, next->str); 234 | cscanner_token_advance(parser->token, 2); 235 | } 236 | return 0; 237 | } 238 | 239 | static int cparser_parse_mnemonic(CParser *parser) 240 | { 241 | const char *name = cscanner_get_string(parser->token); 242 | parser->instruction = cinstset_query(parser->instructionset, name); 243 | if (parser->instruction) { 244 | cinst_match_mnemonic(parser->instruction, name); 245 | cscanner_token_advance(parser->token, 1); 246 | } else { 247 | cparser_error(parser, "Mnemonic not recognised", 10); 248 | return -1; 249 | } 250 | return 0; 251 | } 252 | 253 | static int cparser_parse_specifier(CParser *parser) 254 | { 255 | enum CSpecifierType type = CS_UNKNOWN; 256 | 257 | if (cscanner_is_ident(parser->token)) { 258 | type = cspecifier_scan(cscanner_get_string(parser->token)); 259 | } 260 | 261 | cinst_match_specifier(parser->instruction, type); 262 | 263 | if (type != CS_UNKNOWN) { 264 | cscanner_token_advance(parser->token, 1); 265 | } 266 | 267 | return 0; 268 | } 269 | 270 | static int cparser_parse_first_operand(CParser *parser) 271 | { 272 | COperand firstOperand = CINIT; 273 | 274 | assert(parser->instruction); 275 | 276 | cparser_parse_specifier(parser); 277 | 278 | if (cscanner_is_endl(parser->token)) { 279 | } 280 | else if (cscanner_is_operator(parser->token)) { 281 | switch (cscanner_get_char(parser->token)) { 282 | case '[': 283 | firstOperand = cparser_parse_memory_reference(parser); 284 | if (parser->errcode) return -5; 285 | break; 286 | case '+': 287 | case '-': 288 | case '~': 289 | firstOperand = cparser_parse_immediate(parser); 290 | break; 291 | default: 292 | cparser_error(parser, "Unexpected punctuator after mnemonic", 1); 293 | return -1; 294 | break; 295 | } 296 | } 297 | else if (cscanner_is_int(parser->token)) { 298 | firstOperand = cparser_parse_immediate(parser); 299 | if (parser->errcode) return -5; 300 | } 301 | else if (cscanner_is_ident(parser->token)) { 302 | firstOperand = cparser_parse_register(parser); 303 | if (parser->errcode) return -5; 304 | } 305 | else { 306 | cparser_error(parser, "Invalid destination operand", 11); 307 | return -2; 308 | } 309 | 310 | cinst_match_first_operand(parser->instruction, &firstOperand); 311 | csynth_encode_first_operand(&parser->synthesizer, &firstOperand); 312 | 313 | return 0; 314 | } 315 | 316 | static int cparser_parse_second_operand(CParser *parser) 317 | { 318 | COperand secondOperand = CINIT; 319 | assert(parser->instruction); 320 | 321 | if (cscanner_get_char(parser->token) == ',') { 322 | cscanner_token_advance(parser->token, 1); 323 | } 324 | else if (!cscanner_is_endl(parser->token)) { 325 | cparser_error(parser, "Operands must be separated by comma", 12); 326 | return -3; 327 | } 328 | else { 329 | cinst_match_second_operand(parser->instruction, &secondOperand); 330 | return 0; 331 | } 332 | 333 | cparser_parse_specifier(parser); 334 | 335 | if (cscanner_is_endl(parser->token)) { 336 | } 337 | else if (cscanner_is_operator(parser->token)) { 338 | switch (cscanner_get_char(parser->token)) { 339 | case '[': 340 | secondOperand = cparser_parse_memory_reference(parser); 341 | if (parser->errcode) return -5; 342 | break; 343 | case '+': 344 | case '-': 345 | case '~': 346 | secondOperand = cparser_parse_immediate(parser); 347 | if (parser->errcode) return -5; 348 | break; 349 | default: 350 | cparser_error(parser, "Unexpected punctuator after mnemonic", 1); 351 | return -1; 352 | break; 353 | } 354 | } 355 | else if (cscanner_is_int(parser->token)) { 356 | secondOperand = cparser_parse_immediate(parser); 357 | if (parser->errcode) return -5; 358 | } 359 | else if (cscanner_is_ident(parser->token)) { 360 | secondOperand = cparser_parse_register(parser); 361 | if (parser->errcode) return -5; 362 | } 363 | else { 364 | cparser_error(parser, "Invalid source operand", 13); 365 | return -2; 366 | } 367 | 368 | cinst_match_second_operand(parser->instruction, &secondOperand); 369 | csynth_encode_second_operand(&parser->synthesizer, &secondOperand); 370 | 371 | return 0; 372 | } 373 | 374 | static int cparser_parse_third_operand(CParser *parser) 375 | { 376 | COperand thirdOperand = CINIT; 377 | 378 | assert(parser->instruction); 379 | 380 | if (cscanner_get_char(parser->token) == ',') { 381 | cscanner_token_advance(parser->token, 1); 382 | } 383 | else if (!cscanner_is_endl(parser->token)) { 384 | cparser_error(parser, "Operands must be separated by comma", 14); 385 | return -3; 386 | } 387 | else { 388 | cinst_match_third_operand(parser->instruction, &thirdOperand); 389 | return 0; 390 | } 391 | 392 | if (cscanner_is_endl(parser->token)) { 393 | } 394 | else if (cscanner_is_operator(parser->token)) { 395 | switch (cscanner_get_char(parser->token)) { 396 | case '+': 397 | case '-': 398 | case '~': 399 | thirdOperand = cparser_parse_immediate(parser); 400 | if (parser->errcode) return -5; 401 | break; 402 | default: 403 | cparser_error(parser, "Unexpected punctuator after mnemonic", 1); 404 | return -1; 405 | break; 406 | } 407 | } 408 | else if (cscanner_is_int(parser->token)) { 409 | thirdOperand = cparser_parse_immediate(parser); 410 | if (parser->errcode) return -5; 411 | } 412 | else { 413 | cparser_error(parser, "Too many operands", 15); 414 | return -2; 415 | } 416 | 417 | cinst_match_third_operand(parser->instruction, &thirdOperand); 418 | csynth_encode_third_operand(&parser->synthesizer, &thirdOperand); 419 | 420 | return 0; 421 | } 422 | 423 | static COperand cparser_parse_immediate(CParser *parser) 424 | { 425 | COperand imm = CINIT; 426 | if (cscanner_is_operator(parser->token)) { 427 | int ch = cscanner_get_char(parser->token); 428 | if (ch == '+') { 429 | cscanner_token_advance(parser->token, 1); 430 | imm.value = +cscanner_get_value(parser->token); 431 | } 432 | else if (ch == '-') { 433 | cscanner_token_advance(parser->token, 1); 434 | imm.value = -cscanner_get_value(parser->token); 435 | } 436 | else if (ch == '~') { 437 | cscanner_token_advance(parser->token, 1); 438 | imm.value = ~cscanner_get_value(parser->token); 439 | } 440 | else { 441 | cparser_error(parser, "error operator", 16); 442 | return imm; 443 | } 444 | } 445 | else if (cscanner_is_int(parser->token)) { 446 | imm.value = cscanner_get_value(parser->token); 447 | } 448 | else { 449 | cparser_error(parser, "immediate error", 17); 450 | return imm; 451 | } 452 | 453 | if ((unsigned char)imm.value == imm.value) { 454 | imm.type = O_IMM8; 455 | } 456 | else if ((unsigned short)imm.value == imm.value) { 457 | imm.type = O_IMM16; 458 | } 459 | else { 460 | imm.type = O_IMM32; 461 | } 462 | 463 | cscanner_token_advance(parser->token, 1); 464 | 465 | return imm; 466 | } 467 | 468 | static COperand cparser_parse_register(CParser *parser) 469 | { 470 | COperand reg = CINIT; 471 | const char *name; 472 | 473 | name = cscanner_get_string(parser->token); 474 | reg = coperand_scan_reg(name); 475 | 476 | // It's not a register, so it must be a reference 477 | if (reg.type == O_UNKNOWN) { 478 | csynth_reference_label(&parser->synthesizer, name); 479 | // first operand should be immediate 480 | reg.type = O_IMM8; // also matchs IMM32 481 | cinst_match_first_operand(parser->instruction, ®); 482 | } 483 | 484 | cscanner_token_advance(parser->token, 1); 485 | 486 | return reg; 487 | } 488 | 489 | static COperand cparser_parse_memory_reference(CParser *parser) 490 | { 491 | COperand mem = CINIT; 492 | 493 | for (; ; ) { 494 | const CTOKEN *next; 495 | const CTOKEN *prev; 496 | const CTOKEN *token; 497 | int type; 498 | 499 | type = cscanner_token_lookahead(parser->token)->type; 500 | if (type == CTokenENDL || type == CTokenENDF) break; 501 | 502 | prev = cscanner_token_current(parser->token); 503 | cscanner_token_advance(parser->token, 1); 504 | next = cscanner_token_lookahead(parser->token); 505 | token = cscanner_token_current(parser->token); 506 | 507 | if (token->type == CTokenIDENT) { 508 | COperand reg = coperand_scan_reg(token->str); 509 | if (reg.type == O_UNKNOWN) { 510 | cparser_error(parser, "unknow reg reference", 18); 511 | return reg; 512 | } 513 | if (ctoken_get_char(prev) == '*' || ctoken_get_char(next) == '*') 514 | { 515 | csynth_encode_index(&parser->synthesizer, ®); 516 | } 517 | else 518 | { 519 | csynth_encode_base(&parser->synthesizer, ®); 520 | } 521 | } 522 | else if (token->type == CTokenOPERATOR) { 523 | switch (ctoken_get_char(token)) { 524 | case ']': 525 | mem.type = O_MEM; 526 | cscanner_token_advance(parser->token, 1); 527 | return mem; 528 | break; 529 | case '+': 530 | if ((prev->type != CTokenINT && prev->type != CTokenIDENT) || 531 | (next->type != CTokenINT && next->type != CTokenIDENT)) { 532 | cparser_error(parser, 533 | "Syntax error '+' in memory reference", 19); 534 | return mem; 535 | } 536 | break; 537 | case '-': 538 | if ((prev->type != CTokenINT && prev->type != CTokenIDENT && 539 | ctoken_get_char(prev) != '[') || 540 | next->type != CTokenINT) { 541 | cparser_error(parser, 542 | "Syntax error '-' in memory reference", 20); 543 | return mem; 544 | } 545 | break; 546 | case '*': 547 | if ((prev->type != CTokenINT || next->type != CTokenIDENT) && 548 | (next->type != CTokenINT || prev->type != CTokenIDENT)) { 549 | cparser_error(parser, 550 | "Syntax error '*' in memory reference", 21); 551 | return mem; 552 | } 553 | break; 554 | default: 555 | cparser_error(parser, 556 | "Unexpected punctuator in memory reference", 22); 557 | return mem; 558 | break; 559 | } 560 | } 561 | else if (token->type == CTokenINT) { 562 | int prevch = ctoken_get_char(prev); 563 | int nextch = ctoken_get_char(next); 564 | int value = ctoken_get_int(token); 565 | if (prevch == '*' || nextch == '*') { 566 | if (value == 1 || value == 2 || value == 4 || value == 8) { 567 | csynth_encode_scale(&parser->synthesizer, value); 568 | } else { 569 | cparser_error(parser, 570 | "Invalid scale in memory reference", 23); 571 | return mem; 572 | } 573 | } 574 | else if (prevch == '-') { 575 | csynth_encode_displacement(&parser->synthesizer, -value); 576 | } 577 | else if (prevch == '+' || nextch == '+') { 578 | csynth_encode_displacement(&parser->synthesizer, value); 579 | } 580 | else if (prevch == '[' && nextch == ']') { 581 | cparser_error(parser, 582 | "Invalid number in memory reference", 30); 583 | return mem; 584 | } 585 | else { 586 | cparser_error(parser, 587 | "Invalid number in memory reference", 24); 588 | return mem; 589 | } 590 | } 591 | else { 592 | cparser_error(parser, 593 | "Unexpected token in memory reference", 25); 594 | return mem; 595 | } 596 | } 597 | 598 | cparser_error(parser, "Unexpected end of line in memory reference", 26); 599 | 600 | return mem; 601 | } 602 | 603 | 604 | static int cparser_parse_data(CParser *parser) 605 | { 606 | const char *name; 607 | long pos = 0; 608 | int size = -1; 609 | 610 | if (cscanner_is_ident(parser->token) == 0) { 611 | return 0; 612 | } 613 | 614 | name = cscanner_get_string(parser->token); 615 | 616 | if (stricmp(name, "DB") == 0) size = 1; 617 | else if (stricmp(name, "DW") == 0) size = 2; 618 | else if (stricmp(name, "DD") == 0) size = 4; 619 | 620 | if (size < 0) return 0; 621 | 622 | cscanner_token_advance(parser->token, 1); 623 | 624 | for (pos = 0; ; ) { 625 | unsigned char *ptr = (unsigned char*)parser->data; 626 | const CTOKEN *token; 627 | 628 | if (cscanner_is_endl(parser->token)) break; 629 | 630 | token = cscanner_token_current(parser->token); 631 | 632 | if (token->type == CTokenINT) { 633 | cuint32 value = (cuint32)token->intval; 634 | if (pos + size >= IMAX_DATA) { 635 | cparser_error(parser, "data too long", 41); 636 | return -1; 637 | } 638 | if (size == 1) { 639 | ptr[pos++] = (unsigned char)((value >> 0) & 0xff); 640 | } 641 | else if (size == 2) { 642 | ptr[pos++] = (unsigned char)((value >> 0) & 0xff); 643 | ptr[pos++] = (unsigned char)((value >> 8) & 0xff); 644 | } 645 | else if (size == 4) { 646 | ptr[pos++] = (unsigned char)((value >> 0) & 0xff); 647 | ptr[pos++] = (unsigned char)((value >> 8) & 0xff); 648 | ptr[pos++] = (unsigned char)((value >> 16) & 0xff); 649 | ptr[pos++] = (unsigned char)((value >> 24) & 0xff); 650 | } 651 | } 652 | else if (token->type == CTokenSTR) { 653 | const char *text = token->str; 654 | long size, i, c; 655 | char hex[3]; 656 | size = (long)strlen(text); 657 | for (i = 0; i < size; ) { 658 | if (i + 1 >= IMAX_DATA) { 659 | cparser_error(parser, "data too long", 41); 660 | return -2; 661 | } 662 | if (text[i] == '\\') { 663 | switch (text[i + 1]) 664 | { 665 | case '\\': ptr[pos++] = '\\'; i += 2; break; 666 | case 'n' : ptr[pos++] = '\n'; i += 2; break; 667 | case 'r' : ptr[pos++] = '\r'; i += 2; break; 668 | case 't' : ptr[pos++] = '\t'; i += 2; break; 669 | case '0' : ptr[pos++] = '\0'; i += 2; break; 670 | case '?' : ptr[pos++] = '?'; i += 2; break; 671 | case '\'': ptr[pos++] = '\''; i += 2; break; 672 | case '\"': ptr[pos++] = '\"'; i += 2; break; 673 | case 'a' : ptr[pos++] = '\a'; i += 2; break; 674 | case 'b' : ptr[pos++] = '\b'; i += 2; break; 675 | case 'f' : ptr[pos++] = '\f'; i += 2; break; 676 | case 'v' : ptr[pos++] = '\v'; i += 2; break; 677 | case 'x' : 678 | i += 2; 679 | hex[0] = text[i++]; 680 | hex[1] = text[i++]; 681 | hex[2] = 0; 682 | c = strtol(hex, NULL, 16); 683 | ptr[pos++] = (unsigned char)(c & 255); 684 | break; 685 | default: 686 | cparser_error(parser, "string format error", 42); 687 | return -3; 688 | break; 689 | } 690 | } 691 | else if (text[i] == '\'') { 692 | if (text[i + 1] == '\'') { 693 | ptr[pos++] = '\''; 694 | i += 2; 695 | } else { 696 | ptr[pos++] = '\''; 697 | i += 1; 698 | } 699 | } 700 | else if (text[i] == '\"') { 701 | if (text[i + 1] == '\"') { 702 | ptr[pos++] = '\"'; 703 | i += 2; 704 | } else { 705 | ptr[pos++] = '\"'; 706 | i += 1; 707 | } 708 | } 709 | else { 710 | ptr[pos++] = (unsigned char)text[i++]; 711 | } 712 | } 713 | } 714 | else { 715 | cparser_error(parser, "unrecongnize data", 43); 716 | return -4; 717 | } 718 | 719 | cscanner_token_advance(parser->token, 1); 720 | 721 | if (!cscanner_is_endl(parser->token)) { 722 | if (cscanner_get_char(parser->token) != ',') { 723 | cparser_error(parser, "expected comma", 40); 724 | return -3; 725 | } 726 | cscanner_token_advance(parser->token, 1); 727 | } 728 | } 729 | 730 | if (pos > 0) { 731 | cencoding_set_data(&parser->synthesizer.encoding, parser->data, pos); 732 | } 733 | 734 | return 0; 735 | } 736 | 737 | 738 | static int cparser_parse_prefix(CParser *parser) 739 | { 740 | const char *name; 741 | 742 | if (cscanner_is_ident(parser->token) == 0) { 743 | return 0; 744 | } 745 | 746 | name = cscanner_get_string(parser->token); 747 | 748 | if (stricmp(name, "REP") == 0 || 749 | stricmp(name, "REPE") == 0 || 750 | stricmp(name, "REPZ") == 0) { 751 | if (csynth_encode_prefix(&parser->synthesizer, 0xf3)) { 752 | cparser_error(parser, parser->synthesizer.error, 70); 753 | return -1; 754 | } 755 | cscanner_token_advance(parser->token, 1); 756 | } 757 | else if (stricmp(name, "REPNE") == 0 || stricmp(name, "REPNZ") == 0) { 758 | parser->synthesizer.prefix = 0xf2; 759 | if (csynth_encode_prefix(&parser->synthesizer, 0xf2)) { 760 | cparser_error(parser, parser->synthesizer.error, 71); 761 | return -2; 762 | } 763 | cscanner_token_advance(parser->token, 1); 764 | } 765 | else if (stricmp(name, "LOCK") == 0) { 766 | if (csynth_encode_prefix(&parser->synthesizer, 0xf0)) { 767 | cparser_error(parser, parser->synthesizer.error, 72); 768 | return -3; 769 | } 770 | cscanner_token_advance(parser->token, 1); 771 | } 772 | 773 | return 0; 774 | } 775 | 776 | static int cparser_parse_align(CParser *parser) 777 | { 778 | const char *name; 779 | 780 | if (cscanner_is_ident(parser->token) == 0) { 781 | return 0; 782 | } 783 | 784 | name = cscanner_get_string(parser->token); 785 | 786 | if (stricmp(name, "ALIGN") == 0) { 787 | int align = 4; 788 | cscanner_token_advance(parser->token, 1); 789 | if (cscanner_is_int(parser->token)) { 790 | align = cscanner_get_value(parser->token); 791 | } 792 | while (!cscanner_is_endf(parser->token)) { 793 | cscanner_token_advance(parser->token, 1); 794 | } 795 | if (align < 1) { 796 | cparser_error(parser, "error align size", 80); 797 | return -1; 798 | } 799 | parser->synthesizer.encoding.align = align; 800 | } 801 | 802 | return 0; 803 | } 804 | 805 | static int cparser_parse_size(CParser *parser) 806 | { 807 | const CTOKEN *token = cscanner_token_current(parser->token); 808 | cscanner_token_advance(parser->token, 1); 809 | if (token->type == CTokenIDENT) { 810 | if (stricmp(token->str, "BYTE") == 0) return 1; 811 | if (stricmp(token->str, "CHAR") == 0) return 1; 812 | if (stricmp(token->str, "INT8") == 0) return 1; 813 | if (stricmp(token->str, "UINT8") == 0) return 1; 814 | if (stricmp(token->str, "WORD") == 0) return 2; 815 | if (stricmp(token->str, "SHORT") == 0) return 2; 816 | if (stricmp(token->str, "USHORT") == 0) return 2; 817 | if (stricmp(token->str, "INT16") == 0) return 2; 818 | if (stricmp(token->str, "UINT16") == 0) return 2; 819 | if (stricmp(token->str, "DWORD") == 0) return 4; 820 | if (stricmp(token->str, "INT") == 0) return 4; 821 | if (stricmp(token->str, "UINT") == 0) return 4; 822 | if (stricmp(token->str, "LONG") == 0) return 4; 823 | if (stricmp(token->str, "ULONG") == 0) return 4; 824 | if (stricmp(token->str, "INT32") == 0) return 4; 825 | if (stricmp(token->str, "UINT32") == 0) return 4; 826 | } 827 | return 0; 828 | } 829 | 830 | static int cparser_parse_newvar(CParser *parser, const char *name, int stack) 831 | { 832 | char *macro = (char*)parser->data; 833 | CVariable *var; 834 | 835 | if (stricmp(name, "RET") == 0) { 836 | sprintf(macro, "'%s' conflicted with keyword", name); 837 | cparser_error(parser, macro, 96); 838 | return -1; 839 | } 840 | 841 | for (var = parser->vars; var; var = var->next) { 842 | if (strcmp(var->name, name) == 0) { 843 | sprintf(macro, "'%s' redefined", name); 844 | cparser_error(parser, macro, 97); 845 | return -2; 846 | } 847 | } 848 | 849 | if (stack >= 0) sprintf(macro, "[EBP + %d]", stack); 850 | else sprintf(macro, "[EBP - %d]", -stack); 851 | 852 | if (cscanner_macro_set(parser->token, name, macro)) { 853 | sprintf(macro, "name '%s' redefined", name); 854 | cparser_error(parser, macro, 95); 855 | return -3; 856 | } 857 | 858 | var = (CVariable*)malloc(sizeof(CVariable)); 859 | assert(var); 860 | var->name = strdup(name); 861 | assert(var->name); 862 | var->pos = stack; 863 | 864 | var->next = parser->vars; 865 | parser->vars = var; 866 | 867 | return 0; 868 | } 869 | 870 | static int cparser_parse_proc(CParser *parser) 871 | { 872 | unsigned char instruction[20]; 873 | const char *name; 874 | 875 | if (cscanner_is_ident(parser->token) == 0) { 876 | return 0; 877 | } 878 | 879 | name = cscanner_get_string(parser->token); 880 | 881 | if (stricmp(name, "PROC") == 0) { 882 | const char *replace = "DB 0x8B, 0xE5, 0x5D, 0xC3\n"; 883 | int stack = 8; 884 | 885 | if (parser->inproc) { 886 | cparser_error(parser, "cannot define proc in a proc block", 90); 887 | return -1; 888 | } 889 | parser->inproc = 1; 890 | parser->stack = 0; 891 | 892 | // replace ret to "mov esp, ebp; pop ebp; ret" 893 | cscanner_macro_set(parser->token, "ret", replace); 894 | cscanner_macro_set(parser->token, "RET", replace); 895 | cscanner_macro_set(parser->token, "Ret", replace); 896 | cscanner_macro_set(parser->token, "rEt", replace); 897 | cscanner_macro_set(parser->token, "reT", replace); 898 | cscanner_macro_set(parser->token, "rET", replace); 899 | cscanner_macro_set(parser->token, "ReT", replace); 900 | cscanner_macro_set(parser->token, "REt", replace); 901 | 902 | cscanner_token_advance(parser->token, 1); 903 | 904 | for (stack = 8; !cscanner_is_endl(parser->token); ) { 905 | const CTOKEN *token = cscanner_token_current(parser->token); 906 | const CTOKEN *next = cscanner_token_lookahead(parser->token); 907 | char *macro = (char*)parser->data; 908 | if (ctoken_get_char(token) == ',') { 909 | cscanner_token_advance(parser->token, 1); 910 | } 911 | else if (token->type == CTokenIDENT && next->ch == ':') { 912 | int size; 913 | cscanner_token_advance(parser->token, 2); 914 | size = cparser_parse_size(parser); 915 | if (size == 0) { 916 | cparser_error(parser, "variable type unknown", 93); 917 | return -1; 918 | } 919 | if (cparser_parse_newvar(parser, token->str, stack)) { 920 | return -4; 921 | } 922 | stack += size; 923 | } 924 | else { 925 | if (token->type == CTokenIDENT) { 926 | sprintf(macro, "parameter '%s' error", token->str); 927 | } else { 928 | sprintf(macro, "parameter error"); 929 | } 930 | cparser_error(parser, macro, 93); 931 | return -3; 932 | } 933 | } 934 | 935 | instruction[0] = 0x55; // push ebp 936 | instruction[1] = 0x8B; // mov ebp, esp 937 | instruction[2] = 0xEC; 938 | 939 | cencoding_set_data(&parser->synthesizer.encoding, instruction, 3); 940 | } 941 | else if (stricmp(name, "LOCAL") == 0) { 942 | int localsize = 0; 943 | int IS; 944 | 945 | cscanner_token_advance(parser->token, 1); 946 | 947 | if (parser->inproc == 0) { 948 | cparser_error(parser, "local is forbbiden outside a proc", 90); 949 | return -5; 950 | } 951 | 952 | for (; !cscanner_is_endl(parser->token); ) { 953 | const CTOKEN *token = cscanner_token_current(parser->token); 954 | const CTOKEN *next = cscanner_token_lookahead(parser->token); 955 | char *macro = (char*)parser->data; 956 | if (ctoken_get_char(token) == ',') { 957 | cscanner_token_advance(parser->token, 1); 958 | } 959 | else if (token->type == CTokenIDENT && next->ch == ':') { 960 | int pos, size; 961 | cscanner_token_advance(parser->token, 2); 962 | size = cparser_parse_size(parser); 963 | if (size == 0) { 964 | cparser_error(parser, "variable type unknown", 93); 965 | return -1; 966 | } 967 | pos = -(parser->stack + size); 968 | if (cparser_parse_newvar(parser, token->str, pos)) { 969 | return -6; 970 | } 971 | parser->stack += size; 972 | localsize += size; 973 | //printf("LOCAL %s=[EBP+(%d)]\n", token->str, pos); 974 | } 975 | else { 976 | if (token->type == CTokenIDENT) { 977 | sprintf(macro, "parameter '%s' error", token->str); 978 | } else { 979 | sprintf(macro, "parameter error"); 980 | } 981 | cparser_error(parser, macro, 92); 982 | return -3; 983 | } 984 | } 985 | 986 | if (localsize <= 127) { 987 | instruction[0] = 0x83; // sub esp, imm8 988 | instruction[1] = 0xEC; 989 | instruction[2] = (unsigned char)(localsize & 0xff); 990 | IS = 3; 991 | } else { 992 | instruction[0] = 0x81; // sub esp, imm32 993 | instruction[1] = 0xEC; 994 | instruction[2] = (unsigned char)((localsize >> 0) & 0xff); 995 | instruction[3] = (unsigned char)((localsize >> 8) & 0xff); 996 | instruction[4] = (unsigned char)((localsize >> 16) & 0xff); 997 | instruction[5] = (unsigned char)((localsize >> 24) & 0xff); 998 | IS = 6; 999 | } 1000 | 1001 | cencoding_set_data(&parser->synthesizer.encoding, instruction, IS); 1002 | } 1003 | else if (stricmp(name, "ENDP") == 0) { 1004 | if (parser->inproc == 0) { 1005 | cparser_error(parser, "not find proc definition", 91); 1006 | return -2; 1007 | } 1008 | parser->inproc = 0; 1009 | parser->stack = 0; 1010 | while (parser->vars) { 1011 | CVariable *var = parser->vars; 1012 | parser->vars = parser->vars->next; 1013 | cscanner_macro_del(parser->token, var->name); 1014 | free(var->name); 1015 | free(var); 1016 | } 1017 | cscanner_macro_del(parser->token, "ret"); 1018 | cscanner_macro_del(parser->token, "RET"); 1019 | cscanner_macro_del(parser->token, "Ret"); 1020 | cscanner_macro_del(parser->token, "rEt"); 1021 | cscanner_macro_del(parser->token, "reT"); 1022 | cscanner_macro_del(parser->token, "rET"); 1023 | cscanner_macro_del(parser->token, "ReT"); 1024 | cscanner_macro_del(parser->token, "REt"); 1025 | } 1026 | else { 1027 | return 0; 1028 | } 1029 | 1030 | while (!cscanner_is_endf(parser->token)) { 1031 | cscanner_token_advance(parser->token, 1); 1032 | } 1033 | 1034 | return 0; 1035 | } 1036 | 1037 | -------------------------------------------------------------------------------- /source/cparser.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cparser.h - source parser 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #ifndef __CPARSER_H__ 10 | #define __CPARSER_H__ 11 | 12 | #include "csynthesis.h" 13 | #include "cinstset.h" 14 | #include "cscanner.h" 15 | 16 | 17 | //--------------------------------------------------------------------- 18 | // CVariable 19 | //--------------------------------------------------------------------- 20 | struct CVariable 21 | { 22 | char *name; 23 | int pos; 24 | struct CVariable *next; 25 | }; 26 | 27 | typedef struct CVariable CVariable; 28 | 29 | 30 | //--------------------------------------------------------------------- 31 | // CParser 32 | //--------------------------------------------------------------------- 33 | struct CParser 34 | { 35 | char *data; 36 | char *error; 37 | int errcode; 38 | int inproc; 39 | int stack; 40 | CScanner *token; 41 | CVariable *vars; 42 | CInstruction *instruction; 43 | CInstructionSet *instructionset; 44 | CSynthesizer synthesizer; 45 | }; 46 | 47 | typedef struct CParser CParser; 48 | 49 | 50 | #ifdef __cplusplus 51 | extern "C" { 52 | #endif 53 | //--------------------------------------------------------------------- 54 | // interfaces 55 | //--------------------------------------------------------------------- 56 | CParser *cparser_create(void); 57 | void cparser_release(CParser *parser); 58 | 59 | void cparser_reset(CParser *parser); 60 | 61 | const CEncoding *cparser_parse_line(CParser *parser, const char *source); 62 | 63 | 64 | #ifdef __cplusplus 65 | } 66 | #endif 67 | 68 | 69 | #endif 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /source/cscanner.c: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cscanner.c - source scanner 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #include "cscanner.h" 10 | 11 | #ifdef _MSC_VER 12 | #pragma warning(disable: 4996) 13 | #endif 14 | 15 | //--------------------------------------------------------------------- 16 | // compatible 17 | //--------------------------------------------------------------------- 18 | int cstricmp(const char *dst, const char *src) { 19 | int ch1, ch2; 20 | do { 21 | if ( ((ch1 = (unsigned char)(*(dst++))) >= 'A') && (ch1 <= 'Z') ) 22 | ch1 += 0x20; 23 | if ( ((ch2 = (unsigned char)(*(src++))) >= 'A') && (ch2 <= 'Z') ) 24 | ch2 += 0x20; 25 | } while ( ch1 && (ch1 == ch2) ); 26 | return(ch1 - ch2); 27 | } 28 | 29 | 30 | //--------------------------------------------------------------------- 31 | // Token Reader 32 | //--------------------------------------------------------------------- 33 | CTokenReader *ctoken_reader_create(int (*readch)(void*), void *fp) 34 | { 35 | CTokenReader *reader; 36 | reader = (CTokenReader*)malloc(sizeof(CTokenReader)); 37 | assert(reader); 38 | reader->readch = readch; 39 | reader->fp = fp; 40 | reader->ch = ' '; 41 | reader->unch = -1; 42 | reader->saved = -1; 43 | reader->lineno = 1; 44 | reader->colno = 0; 45 | reader->state = 0; 46 | reader->buffer = (char*)malloc(CMAX_IDENT * 2); 47 | assert(reader->buffer); 48 | reader->error = (char*)malloc(8192); 49 | assert(reader->error); 50 | reader->pos = 0; 51 | reader->keywords = NULL; 52 | reader->eof = 0; 53 | reader->error[0] = 0; 54 | reader->errcode = 0; 55 | return reader; 56 | } 57 | 58 | void ctoken_reader_release(CTokenReader *reader) 59 | { 60 | assert(reader); 61 | if (reader->buffer) { 62 | free(reader->buffer); 63 | reader->buffer = NULL; 64 | } 65 | if (reader->error) { 66 | free(reader->error); 67 | reader->error = NULL; 68 | } 69 | free(reader); 70 | } 71 | 72 | int ctoken_reader_getch(CTokenReader *reader) 73 | { 74 | assert(reader); 75 | if (reader->unch >= 0) { 76 | reader->ch = reader->unch; 77 | reader->unch = -1; 78 | } else { 79 | reader->saved = reader->ch; 80 | reader->ch = reader->readch(reader->fp); 81 | if (reader->ch == '\n') reader->lineno++, reader->colno = 1; 82 | else if (reader->ch >= 0) reader->colno++; 83 | } 84 | return reader->ch; 85 | } 86 | 87 | static int ctoken_reader_ungetch(CTokenReader *reader, int ch) 88 | { 89 | assert(reader->unch < 0); 90 | reader->unch = ch; 91 | return 0; 92 | } 93 | 94 | static int ctoken_reader_skip_space(CTokenReader *reader) 95 | { 96 | while (1) { 97 | int skip = 0; 98 | for (; isspace(reader->ch) && reader->ch != '\n'; skip++) 99 | ctoken_reader_getch(reader); 100 | if (reader->ch == ';' || reader->ch == '#') { 101 | skip++; 102 | while (reader->ch != '\n' && reader->ch >= 0) { 103 | ctoken_reader_getch(reader); 104 | skip++; 105 | } 106 | } 107 | else if (reader->ch == '/') { 108 | ctoken_reader_getch(reader); 109 | if (reader->ch == '/') { 110 | skip++; 111 | while (reader->ch != '\n' && reader->ch >= 0) { 112 | ctoken_reader_getch(reader); 113 | skip++; 114 | } 115 | } else { 116 | ctoken_reader_ungetch(reader, reader->ch); 117 | reader->ch = '/'; 118 | } 119 | } 120 | if (skip == 0) break; 121 | } 122 | return 0; 123 | } 124 | 125 | static CTOKEN *ctoken_reader_read_string(CTokenReader *reader, int *state) 126 | { 127 | CTOKEN *token = NULL; 128 | 129 | if (reader->ch == '\'' || reader->ch == '\"') { 130 | int mode = (reader->ch == '\"')? 0 : 1; 131 | reader->pos = 0; 132 | while (1) { 133 | int ch = ctoken_reader_getch(reader); 134 | if (ch == '\\') { 135 | ctoken_reader_getch(reader); 136 | reader->buffer[reader->pos++] = '\\'; 137 | reader->buffer[reader->pos++] = (char)reader->ch; 138 | } 139 | else if (mode == 0 && ch == '\'') { 140 | reader->buffer[reader->pos++] = '\''; 141 | } 142 | else if (mode == 1 && ch == '\"') { 143 | reader->buffer[reader->pos++] = '\"'; 144 | } 145 | else if (mode == 0 && ch == '\"') { 146 | ch = ctoken_reader_getch(reader); 147 | if (ch == '\"') { 148 | reader->buffer[reader->pos++] = '\"'; 149 | reader->buffer[reader->pos++] = '\"'; 150 | } else { 151 | *state = 1; 152 | reader->buffer[reader->pos] = 0; 153 | token = ctoken_new_string(reader->buffer); 154 | break; 155 | } 156 | } 157 | else if (mode == 1 && ch == '\'') { 158 | ch = ctoken_reader_getch(reader); 159 | if (ch == '\'') { 160 | reader->buffer[reader->pos++] = '\''; 161 | reader->buffer[reader->pos++] = '\''; 162 | } else { 163 | *state = 1; 164 | reader->buffer[reader->pos] = 0; 165 | token = ctoken_new_string(reader->buffer); 166 | break; 167 | } 168 | } 169 | else if (ch == '\n') { 170 | *state = -1; 171 | break; 172 | } 173 | else if (ch >= 0) { 174 | reader->buffer[reader->pos++] = (char)ch; 175 | } 176 | else { // ch < 0 177 | *state = -2; 178 | break; 179 | } 180 | 181 | if (reader->pos >= CMAX_IDENT) { 182 | *state = -3; 183 | strncpy(reader->error, "string too long", 64); 184 | break; 185 | } 186 | } 187 | } 188 | 189 | return token; 190 | } 191 | 192 | static CTOKEN *ctoken_reader_read_number(CTokenReader *reader, int *state) 193 | { 194 | int lineno = reader->lineno; 195 | char *text = reader->buffer; 196 | CTOKEN *token; 197 | int ec1, ec2, pos; 198 | long value; 199 | 200 | if (reader->ch < '0' || reader->ch > '9') { 201 | *state = 0; 202 | return NULL; 203 | } 204 | 205 | for (reader->pos = 0; isalnum(reader->ch) || reader->ch == '.'; ) { 206 | reader->buffer[reader->pos++] = (char)reader->ch; 207 | ctoken_reader_getch(reader); 208 | if (reader->pos >= CMAX_IDENT) { 209 | strncpy(reader->error, "number too long", 64); 210 | *state = -1; 211 | reader->errcode = 1; 212 | return NULL; 213 | } 214 | } 215 | 216 | reader->buffer[reader->pos] = 0; 217 | for (pos = reader->pos; pos > 0; pos--) { 218 | if (isdigit(text[pos - 1]) || text[pos - 1] == '.') { 219 | break; 220 | } 221 | else if (text[pos - 1] >= 'a' && text[pos - 1] <= 'f') { 222 | break; 223 | } 224 | else if (text[pos - 1] >= 'A' && text[pos - 1] <= 'F') { 225 | break; 226 | } 227 | } 228 | 229 | if (reader->pos - pos > 2) { 230 | strncpy(reader->error, "number format error", 64); 231 | *state = -2; 232 | reader->errcode = 2; 233 | return NULL; 234 | } 235 | 236 | if (reader->pos - pos == 2) ec1 = text[pos], ec2 = text[pos + 1]; 237 | else if (reader->pos - pos == 1) ec1 = text[pos], ec2 = 0; 238 | else ec1 = ec2 = 0; 239 | text[pos] = 0; 240 | 241 | // hex 242 | if (text[0] == '0' && (text[1] == 'x' || text[1] == 'X')) { 243 | value = (long)strtoul(text + 2, NULL, 16); 244 | token = ctoken_new_int(value); 245 | } // hex 246 | else if (ec1 == 'h' && ec2 == 0) { 247 | value = (long)strtoul(text, NULL, 16); 248 | token = ctoken_new_int(value); 249 | } // binary 250 | else if (ec1 == 'b' && ec2 == 0) { 251 | value = (long)strtoul(text, NULL, 2); 252 | token = ctoken_new_int(value); 253 | } // octal 254 | else if (ec1 == 'q' && ec2 == 0) { 255 | value = (long)strtol(text, NULL, 8); 256 | token = ctoken_new_int(value); 257 | } // decimal or float 258 | else { 259 | int decimal = 1; 260 | int i; 261 | for (i = 0; text[i]; i++) 262 | if (text[i] == '.') decimal = 0; 263 | if (decimal) { 264 | value = (long)strtoul(text, NULL, 10); 265 | token = ctoken_new_int(value); 266 | } else { 267 | float ff; 268 | sscanf(text, "%f", &ff); 269 | token = ctoken_new_float(ff); 270 | } 271 | } 272 | token->lineno = lineno; 273 | *state = 0; 274 | return token; 275 | } 276 | 277 | CTOKEN *ctoken_reader_read(CTokenReader *reader) 278 | { 279 | CTOKEN *token = NULL; 280 | 281 | assert(reader); 282 | 283 | // skip memo and space 284 | ctoken_reader_skip_space(reader); 285 | 286 | // this is a endl 287 | if (reader->ch == '\n') { 288 | int lineno = reader->lineno - 1; 289 | token = ctoken_new_endl(); 290 | token->lineno = lineno; 291 | ctoken_reader_getch(reader); 292 | return token; 293 | } 294 | 295 | // this is a endf 296 | if (reader->ch < 0) { 297 | if (reader->eof++) return NULL; 298 | token = ctoken_new_endf(); 299 | token->lineno = reader->lineno; 300 | return token; 301 | } 302 | 303 | // this is a string 304 | if (reader->ch == '\'' || reader->ch == '\"') { 305 | int lineno = reader->lineno; 306 | int state; 307 | token = ctoken_reader_read_string(reader, &state); 308 | if (state < 0) { 309 | strncpy(reader->error, "expected closing quotation mark", 100); 310 | reader->errcode = 3; 311 | return NULL; 312 | } 313 | token->lineno = lineno; 314 | return token; 315 | } 316 | 317 | #define issym2f(c) ((c) == '_' || isalpha(c) || (c) == '$' || (c) == '@') 318 | #define issym2x(c) ((c) == '_' || isalnum(c) || (c) == '$' || (c) == '@') 319 | 320 | // this is a identity or a keyword 321 | if (issym2f(reader->ch)) { 322 | int lineno = reader->lineno; 323 | for (reader->pos = 0; issym2x(reader->ch); ) { 324 | reader->buffer[reader->pos++] = (char)reader->ch; 325 | ctoken_reader_getch(reader); 326 | if (reader->pos >= CMAX_IDENT) { 327 | strncpy(reader->error, "ident too long", 100); 328 | reader->errcode = 4; 329 | return NULL; 330 | } 331 | } 332 | reader->buffer[reader->pos] = 0; 333 | if (reader->keywords) { 334 | int i; 335 | for (i = 0; reader->keywords[i]; i++) { 336 | if (stricmp(reader->buffer, reader->keywords[i]) == 0) { 337 | token = ctoken_new_keyword(i); 338 | token->lineno = lineno; 339 | return token; 340 | } 341 | } 342 | } 343 | token = ctoken_new_ident(reader->buffer); 344 | token->lineno = lineno; 345 | //printf("{%s:%d:%d}\n", token->str, token->lineno, reader->lineno); 346 | return token; 347 | } 348 | 349 | #undef issym2f 350 | #undef issym2x 351 | 352 | // this is a number 353 | if (reader->ch >= '0' && reader->ch <= '9') { 354 | int lineno = reader->lineno; 355 | int state; 356 | //printf("number\n"); 357 | token = ctoken_reader_read_number(reader, &state); 358 | if (state < 0) { 359 | strncpy(reader->error, "number format error", 100); 360 | reader->errcode = 5; 361 | return NULL; 362 | } 363 | token->lineno = lineno; 364 | return token; 365 | } 366 | 367 | // operators 368 | token = ctoken_new_operator(reader->ch); 369 | assert(token); 370 | token->lineno = reader->lineno; 371 | ctoken_reader_getch(reader); 372 | 373 | return token; 374 | } 375 | 376 | 377 | //--------------------------------------------------------------------- 378 | // token stream 379 | //--------------------------------------------------------------------- 380 | static int ctoken_stream_text_getch(void *fp) { 381 | char **ptr = (char**)fp; 382 | if (**ptr == 0) return -1; 383 | return *((*ptr)++); 384 | } 385 | 386 | static void ctoken_stream_free(CTOKEN *root) { 387 | assert(root); 388 | while (!ctoken_list_is_empty(root)) { 389 | CTOKEN *token = root->next; 390 | ctoken_list_del(token); 391 | ctoken_delete(token); 392 | } 393 | ctoken_delete(root); 394 | } 395 | 396 | static CTOKEN *ctoken_stream_load(const char *text, char *error) 397 | { 398 | CTokenReader *reader; 399 | CTOKEN *root; 400 | char *string; 401 | int retval; 402 | 403 | string = (char*)text; 404 | reader = ctoken_reader_create(ctoken_stream_text_getch, &string); 405 | assert(reader); 406 | 407 | root = ctoken_new_endf(); 408 | assert(root); 409 | 410 | for (retval = 0, *error = 0; ; ) { 411 | CTOKEN *token; 412 | token = ctoken_reader_read(reader); 413 | if (token == NULL) { 414 | if (error) strncpy(error, reader->error, 100); 415 | retval = -1; 416 | break; 417 | } 418 | if (token->type == CTokenENDF) { 419 | break; 420 | } 421 | ctoken_list_add_tail(token, root); 422 | } 423 | 424 | ctoken_reader_release(reader); 425 | 426 | if (retval != 0) { 427 | ctoken_stream_free(root); 428 | return NULL; 429 | } 430 | 431 | return root; 432 | } 433 | 434 | 435 | //--------------------------------------------------------------------- 436 | // Scanner 437 | //--------------------------------------------------------------------- 438 | CScanner *cscanner_create(void) 439 | { 440 | CScanner *scan; 441 | scan = (CScanner*)malloc(sizeof(CScanner)); 442 | assert(scan); 443 | scan->root = NULL; 444 | scan->link = NULL; 445 | scan->source = NULL; 446 | scan->length = 0; 447 | scan->position = 0; 448 | scan->reader = NULL; 449 | scan->error = (char*)malloc(1024); 450 | assert(scan->error); 451 | scan->errcode = 0; 452 | scan->macros = NULL; 453 | scan->endf.type = CTokenENDF; 454 | scan->endf.lineno = 0; 455 | scan->endf.fileno = 0; 456 | scan->jmplabel = 0; 457 | return scan; 458 | } 459 | 460 | static void cscanner_token_reset(CScanner *scan) 461 | { 462 | assert(scan); 463 | if (scan->root) { 464 | while (!ctoken_list_is_empty(scan->root)) { 465 | CTOKEN *token = scan->root->next; 466 | ctoken_list_del(token); 467 | ctoken_delete(token); 468 | } 469 | ctoken_delete(scan->root); 470 | scan->root = NULL; 471 | scan->link = NULL; 472 | } 473 | if (scan->reader) { 474 | ctoken_reader_release(scan->reader); 475 | scan->reader = NULL; 476 | } 477 | scan->source = NULL; 478 | scan->length = 0; 479 | scan->position = 0; 480 | scan->errcode = 0; 481 | } 482 | 483 | void cscanner_macro_reset(CScanner *scan) 484 | { 485 | while (scan->macros) { 486 | CMacro *macro = scan->macros; 487 | scan->macros = scan->macros->next; 488 | free(macro->ident); 489 | free(macro->value); 490 | free(macro); 491 | } 492 | scan->jmplabel = 0; 493 | } 494 | 495 | void cscanner_release(CScanner *scan) 496 | { 497 | cscanner_token_reset(scan); 498 | cscanner_macro_reset(scan); 499 | if (scan->error) { 500 | free(scan->error); 501 | scan->error = NULL; 502 | } 503 | free(scan); 504 | } 505 | 506 | int cscanner_macro_set(CScanner *scan, const char *name, const char *value) 507 | { 508 | CMacro *macro; 509 | 510 | for (macro = scan->macros; macro; macro = macro->next) { 511 | if (strcmp(macro->ident, name) == 0) { 512 | return -1; 513 | } 514 | } 515 | 516 | macro = (CMacro*)malloc(sizeof(CMacro)); 517 | assert(macro); 518 | 519 | macro->ident = strdup(name); 520 | macro->value = strdup(value); 521 | 522 | assert(macro->ident); 523 | assert(macro->value); 524 | 525 | macro->next = scan->macros; 526 | scan->macros = macro; 527 | 528 | return 0; 529 | } 530 | 531 | int cscanner_macro_del(CScanner *scan, const char *name) 532 | { 533 | CMacro *macro; 534 | CMacro *prev; 535 | 536 | for (macro = scan->macros, prev = NULL; macro; ) { 537 | if (strcmp(macro->ident, name) == 0) { 538 | break; 539 | } 540 | prev = macro; 541 | macro = macro->next; 542 | } 543 | 544 | if (macro == NULL) { // not find macro 545 | return -1; 546 | } 547 | 548 | if (prev) prev->next = macro->next; 549 | else scan->macros = macro->next; 550 | 551 | free(macro->ident); 552 | free(macro->value); 553 | free(macro); 554 | 555 | return 0; 556 | } 557 | 558 | const char *cscanner_macro_search(CScanner *scan, const char *name) 559 | { 560 | CMacro *macro; 561 | for (macro = scan->macros; macro; macro = macro->next) { 562 | if (strcmp(macro->ident, name) == 0) { 563 | return macro->value; 564 | } 565 | } 566 | 567 | return NULL; 568 | } 569 | 570 | static int cscanner_reader_getch(void *fp) 571 | { 572 | CScanner *scan = (CScanner*)fp; 573 | if (scan->source == NULL) return -1; 574 | if (scan->position >= scan->length) return -1; 575 | return scan->source[scan->position++]; 576 | } 577 | 578 | int cscanner_set_source(CScanner *scan, const char *source) 579 | { 580 | int retval = 0; 581 | 582 | cscanner_token_reset(scan); 583 | scan->source = source; 584 | scan->length = (int)strlen(source); 585 | scan->position = 0; 586 | scan->reader = ctoken_reader_create(cscanner_reader_getch, scan); 587 | 588 | scan->root = ctoken_new_endf(); 589 | scan->link = scan->root; 590 | scan->error[0] = 0; 591 | scan->errcode = 0; 592 | 593 | for (; ; ) { 594 | CTOKEN *token; 595 | 596 | token = ctoken_reader_read(scan->reader); 597 | 598 | if (token == NULL) { 599 | scan->lineno = scan->reader->lineno; 600 | scan->errcode = scan->reader->errcode; 601 | strncpy(scan->error, scan->reader->error, 80); 602 | retval = -1; 603 | break; 604 | } 605 | 606 | if (token->type == CTokenIDENT) { 607 | const char *macro = cscanner_macro_search(scan, token->str); 608 | if (macro != NULL) { 609 | CTOKEN *ts = ctoken_stream_load(macro, scan->error); 610 | if (ts == NULL) { 611 | scan->lineno = scan->reader->lineno; 612 | scan->errcode = 88; 613 | retval = -2; 614 | break; 615 | } 616 | while (!ctoken_list_is_empty(ts)) { 617 | CTOKEN *next = ts->next; 618 | if (next->type == CTokenENDF) break; 619 | ctoken_list_del(next); 620 | next->lineno = scan->lineno; 621 | ctoken_list_add_tail(next, scan->root); 622 | } 623 | ctoken_stream_free(ts); 624 | continue; 625 | } 626 | else { 627 | if (strcmp(token->str, "@@") == 0) { 628 | scan->jmplabel++; 629 | free(token->str); 630 | token->str = (char*)malloc(20); 631 | assert(token->str); 632 | sprintf(token->str, "@@%d", scan->jmplabel); 633 | //printf("label %d\n", scan->jmplabel); 634 | } 635 | else if (stricmp(token->str, "@b") == 0) { 636 | free(token->str); 637 | token->str = (char*)malloc(20); 638 | assert(token->str); 639 | sprintf(token->str, "@@%d", scan->jmplabel); 640 | } 641 | else if (stricmp(token->str, "@f") == 0) { 642 | free(token->str); 643 | token->str = (char*)malloc(20); 644 | assert(token->str); 645 | sprintf(token->str, "@@%d", scan->jmplabel + 1); 646 | } 647 | } 648 | } 649 | 650 | ctoken_list_add_tail(token, scan->root); 651 | if (token->type == CTokenENDF) { 652 | break; 653 | } 654 | } 655 | 656 | if (retval != 0) { 657 | cscanner_token_reset(scan); 658 | return retval; 659 | } 660 | 661 | scan->link = scan->root->next; 662 | scan->root->lineno = scan->root->prev->lineno; 663 | 664 | return 0; 665 | } 666 | 667 | const CTOKEN *cscanner_token_current(const CScanner *scan) 668 | { 669 | if (scan->root == NULL || scan->link == NULL) { 670 | return &(scan->endf); 671 | } 672 | return scan->link; 673 | } 674 | 675 | const CTOKEN *cscanner_token_lookahead(const CScanner *scan) 676 | { 677 | if (scan->root == NULL || scan->link == NULL) { 678 | return &(scan->endf); 679 | } 680 | if (scan->link == scan->root) { 681 | return &(scan->endf); 682 | } 683 | return scan->link->next; 684 | } 685 | 686 | const CTOKEN *cscanner_token_advance(CScanner *scan, int n) 687 | { 688 | if (n < 0) n = 0; 689 | if (scan->root == NULL || scan->link == NULL) { 690 | return &(scan->endf); 691 | } 692 | while (n--) { 693 | if (scan->link == scan->root) { 694 | return &(scan->endf); 695 | } 696 | scan->link = scan->link->next; 697 | } 698 | return scan->link; 699 | } 700 | 701 | int cscanner_get_type(const CScanner *scan) 702 | { 703 | return cscanner_token_current(scan)->type; 704 | } 705 | 706 | const char *cscanner_get_string(const CScanner *scan) 707 | { 708 | const CTOKEN *token = cscanner_token_current(scan); 709 | if (token->type != CTokenIDENT && token->type != CTokenSTR) { 710 | return ""; 711 | } 712 | return token->str; 713 | } 714 | 715 | int cscanner_get_char(const CScanner *scan) 716 | { 717 | const CTOKEN *token = cscanner_token_current(scan); 718 | if (token->type != CTokenOPERATOR) { 719 | return '\0'; 720 | } 721 | return token->ch; 722 | } 723 | 724 | int cscanner_get_value(const CScanner *scan) 725 | { 726 | const CTOKEN *token = cscanner_token_current(scan); 727 | if (token->type != CTokenINT) { 728 | return 0; 729 | } 730 | return token->intval; 731 | } 732 | 733 | int cscanner_get_lineno(const CScanner *scan) 734 | { 735 | const CTOKEN *token = cscanner_token_current(scan); 736 | return token->lineno; 737 | } 738 | 739 | int cscanner_is_endl(const CScanner *scan) { 740 | const CTOKEN *token = cscanner_token_current(scan); 741 | return (token->type == CTokenENDF || token->type == CTokenENDL); 742 | } 743 | 744 | int cscanner_is_endf(const CScanner *scan) { 745 | return cscanner_get_type(scan) == CTokenENDF; 746 | } 747 | 748 | int cscanner_is_ident(const CScanner *scan) { 749 | return cscanner_get_type(scan) == CTokenIDENT; 750 | } 751 | 752 | int cscanner_is_operator(const CScanner *scan) { 753 | return cscanner_get_type(scan) == CTokenOPERATOR; 754 | } 755 | 756 | int cscanner_is_int(const CScanner *scan) { 757 | return cscanner_get_type(scan) == CTokenINT; 758 | } 759 | 760 | int cscanner_is_string(const CScanner *scan) { 761 | return cscanner_get_type(scan) == CTokenSTR; 762 | } 763 | 764 | -------------------------------------------------------------------------------- /source/cscanner.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // cscanner.h - source scanner 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #ifndef __CSCANNER_H__ 10 | #define __CSCANNER_H__ 11 | 12 | #include "ctoken.h" 13 | 14 | 15 | #define CMAX_IDENT 8192 16 | 17 | //--------------------------------------------------------------------- 18 | // CTokenReader (assembly) 19 | //--------------------------------------------------------------------- 20 | struct CTokenReader 21 | { 22 | int (*readch)(void *fp); 23 | void *fp; 24 | int ch; 25 | int unch; 26 | int saved; 27 | char *buffer; 28 | char *error; 29 | char **keywords; 30 | int state; 31 | int pos; 32 | int lineno; 33 | int eof; 34 | int colno; 35 | int errcode; 36 | }; 37 | 38 | typedef struct CTokenReader CTokenReader; 39 | 40 | #ifdef __cplusplus 41 | extern "C" { 42 | #endif 43 | //--------------------------------------------------------------------- 44 | // Assembly Token Reader 45 | //--------------------------------------------------------------------- 46 | CTokenReader *ctoken_reader_create(int (*getch)(void*), void *fp); 47 | 48 | void ctoken_reader_release(CTokenReader *reader); 49 | 50 | CTOKEN *ctoken_reader_read(CTokenReader *reader); 51 | 52 | #ifdef __cplusplus 53 | } 54 | #endif 55 | 56 | 57 | //--------------------------------------------------------------------- 58 | // CMacro 59 | //--------------------------------------------------------------------- 60 | struct CMacro 61 | { 62 | char *ident; 63 | char *value; 64 | struct CMacro *next; 65 | }; 66 | 67 | typedef struct CMacro CMacro; 68 | 69 | //--------------------------------------------------------------------- 70 | // CScanner 71 | //--------------------------------------------------------------------- 72 | struct CScanner 73 | { 74 | const char *source; 75 | long length; 76 | long position; 77 | int errcode; 78 | char *error; 79 | int jmplabel; 80 | int lineno; 81 | CTOKEN endf; 82 | CTOKEN *root; 83 | CMacro *macros; 84 | const CTOKEN *link; 85 | CTokenReader *reader; 86 | }; 87 | 88 | typedef struct CScanner CScanner; 89 | 90 | 91 | #ifdef __cplusplus 92 | extern "C" { 93 | #endif 94 | //--------------------------------------------------------------------- 95 | // Scanner 96 | //--------------------------------------------------------------------- 97 | CScanner *cscanner_create(void); 98 | 99 | void cscanner_release(CScanner *scan); 100 | 101 | void cscanner_macro_reset(CScanner *scan); 102 | 103 | int cscanner_macro_set(CScanner *scan, const char *name, const char *value); 104 | int cscanner_macro_del(CScanner *scan, const char *name); 105 | 106 | int cscanner_set_source(CScanner *scan, const char *source); 107 | 108 | const CTOKEN *cscanner_token_current(const CScanner *scan); 109 | const CTOKEN *cscanner_token_lookahead(const CScanner *scan); 110 | const CTOKEN *cscanner_token_advance(CScanner *scan, int n); 111 | 112 | int cscanner_get_type(const CScanner *scan); 113 | const char *cscanner_get_string(const CScanner *scan); 114 | int cscanner_get_char(const CScanner *scan); 115 | int cscanner_get_value(const CScanner *scan); 116 | int cscanner_get_lineno(const CScanner *scan); 117 | 118 | int cscanner_is_endl(const CScanner *scan); 119 | int cscanner_is_endf(const CScanner *scan); 120 | int cscanner_is_ident(const CScanner *scan); 121 | int cscanner_is_operator(const CScanner *scan); 122 | int cscanner_is_int(const CScanner *scan); 123 | int cscanner_is_string(const CScanner *scan); 124 | 125 | 126 | #ifdef __cplusplus 127 | } 128 | #endif 129 | 130 | #endif 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /source/csynthesis.c: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // csynthesis.c - source scanner 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | 10 | #include "csynthesis.h" 11 | 12 | #ifdef _MSC_VER 13 | #pragma warning(disable: 4996) 14 | #endif 15 | 16 | void csynth_init(CSynthesizer *synth) 17 | { 18 | cencoding_init(&synth->encoding); 19 | synth->error = (char*)malloc(1024); 20 | assert(synth->error); 21 | csynth_reset(synth); 22 | } 23 | 24 | void csynth_destroy(CSynthesizer *synth) 25 | { 26 | cencoding_destroy(&synth->encoding); 27 | if (synth->error) free(synth->error); 28 | synth->error = NULL; 29 | synth->errcode = 0; 30 | } 31 | 32 | void csynth_reset(CSynthesizer *synth) 33 | { 34 | cencoding_reset(&synth->encoding); 35 | synth->firstType = O_UNKNOWN; 36 | synth->secondType = O_UNKNOWN; 37 | synth->firstReg = REG_UNKNOWN; 38 | synth->secondReg = REG_UNKNOWN; 39 | synth->baseReg = REG_UNKNOWN; 40 | synth->indexReg = REG_UNKNOWN; 41 | synth->scale = 0; 42 | synth->prefix = 0; 43 | synth->error[0] = 0; 44 | synth->errcode = 0; 45 | } 46 | 47 | static void csynth_error(CSynthesizer *synth, const char *error, int code) 48 | { 49 | strncpy(synth->error, error, 100); 50 | synth->errcode = code; 51 | } 52 | 53 | int csynth_define_label(CSynthesizer *synth, const char *label) 54 | { 55 | if (synth->encoding.label != NULL) { 56 | csynth_error(synth, "Instruction can't have multiple label", 1); 57 | return -1; 58 | } 59 | cencoding_set_label(&synth->encoding, label); 60 | return 0; 61 | } 62 | 63 | int csynth_reference_label(CSynthesizer *synth, const char *label) 64 | { 65 | if (synth->encoding.reference != NULL) { 66 | csynth_error(synth, "Instruction can't have multiple refrence", 2); 67 | return -1; 68 | } 69 | cencoding_set_reference(&synth->encoding, label); 70 | return 0; 71 | } 72 | 73 | int csynth_encode_first_operand(CSynthesizer *synth, 74 | const COperand *firstOperand) 75 | { 76 | if (synth->firstType != O_UNKNOWN) { 77 | csynth_error(synth, "Instrucition destination already set", 3); 78 | return -1; 79 | } 80 | 81 | synth->firstType = firstOperand->type; 82 | 83 | if (coperand_is_reg(firstOperand) || coperand_is_mem(firstOperand)) { 84 | synth->firstReg = firstOperand->reg; 85 | } 86 | else if (coperand_is_imm(firstOperand)) { 87 | csynth_encode_immediate(synth, firstOperand->value); 88 | } 89 | else if (!coperand_is_void(firstOperand)) { 90 | csynth_error(synth, "csynth_encode_first_operand: error", 4); 91 | return -2; 92 | } 93 | 94 | return 0; 95 | } 96 | 97 | int csynth_encode_second_operand(CSynthesizer *synth, 98 | const COperand *secondOperand) 99 | { 100 | if (synth->secondType != O_UNKNOWN) { 101 | csynth_error(synth, "Instrucition source already set", 4); 102 | return -1; 103 | } 104 | 105 | synth->secondType = secondOperand->type; 106 | 107 | if (coperand_is_reg(secondOperand) || coperand_is_mem(secondOperand)) { 108 | synth->secondReg = secondOperand->reg; 109 | } 110 | else if (coperand_is_imm(secondOperand)) { 111 | csynth_encode_immediate(synth, secondOperand->value); 112 | } 113 | else if (!coperand_is_void(secondOperand)) { 114 | csynth_error(synth, "csynth_encode_second_operand: error", 5); 115 | return -2; 116 | } 117 | 118 | return 0; 119 | } 120 | 121 | int csynth_encode_third_operand(CSynthesizer *synth, 122 | const COperand *thirdOperand) 123 | { 124 | if (coperand_is_imm(thirdOperand)) { 125 | csynth_encode_immediate(synth, thirdOperand->value); 126 | } 127 | else if (!coperand_is_void(thirdOperand)) { 128 | csynth_error(synth, "csynth_encode_third_operand: error", 6); 129 | return -3; 130 | } 131 | return 0; 132 | } 133 | 134 | int csynth_encode_base(CSynthesizer *synth, const COperand *base) 135 | { 136 | if (synth->baseReg != REG_UNKNOWN) { 137 | int retval; 138 | // base already set, use as index with scale = 1 139 | retval = csynth_encode_index(synth, base); 140 | if (retval != 0) return -1; 141 | retval = csynth_encode_scale(synth, 1); 142 | if (retval != 0) return -2; 143 | return 0; 144 | } 145 | 146 | synth->baseReg = base->reg; 147 | return 0; 148 | } 149 | 150 | int csynth_encode_index(CSynthesizer *synth, const COperand *index) 151 | { 152 | if (synth->indexReg != REG_UNKNOWN) { 153 | csynth_error(synth, 154 | "Memory reference can't have multiple index registers", 7); 155 | return -1; 156 | } 157 | synth->indexReg = index->reg; 158 | return 0; 159 | } 160 | 161 | int csynth_encode_scale(CSynthesizer *synth, int scale) 162 | { 163 | if (synth->scale != 0) { 164 | csynth_error(synth, 165 | "Memory reference can't have multiple scale factors", 8); 166 | return -1; 167 | } 168 | if (scale != 1 && scale != 2 && scale != 4 && scale != 8) { 169 | csynth_error(synth, "Invalid scale value", 9); 170 | return -2; 171 | } 172 | synth->scale = scale; 173 | return 0; 174 | } 175 | 176 | int csynth_encode_immediate(CSynthesizer *synth, long immediate) 177 | { 178 | if (synth->encoding.immediate != 0) { 179 | csynth_error(synth, 180 | "Instruction can't have multiple immediate operands", 10); 181 | return -1; 182 | } 183 | synth->encoding.immediate = (cint32)immediate; 184 | return 0; 185 | } 186 | 187 | int csynth_encode_displacement(CSynthesizer *synth, long displacement) 188 | { 189 | synth->encoding.displacement += (cint32)displacement; 190 | return 0; 191 | } 192 | 193 | static int csynth_encode_mod_field(CSynthesizer *synth) 194 | { 195 | synth->encoding.format.modRM = 1; 196 | if (coperand_type_is_reg(synth->firstType) && ( 197 | coperand_type_is_reg(synth->secondType) || 198 | coperand_type_is_imm(synth->secondType) || 199 | coperand_type_is_void(synth->secondType))) { 200 | synth->encoding.modRM.mod = MOD_REG; 201 | } 202 | else if ((coperand_type_is_mem(synth->firstType) || 203 | coperand_type_is_mem(synth->secondType)) && 204 | (coperand_type_is_reg(synth->firstType) || 205 | coperand_type_is_reg(synth->secondType))) { 206 | if (!synth->encoding.displacement) { 207 | synth->encoding.modRM.mod = MOD_NO_DISP; 208 | } 209 | else if ((char)synth->encoding.displacement == 210 | synth->encoding.displacement) { 211 | synth->encoding.modRM.mod = MOD_BYTE_DISP; 212 | synth->encoding.format.D1 = 1; 213 | } 214 | else { 215 | synth->encoding.modRM.mod = MOD_DWORD_DISP; 216 | synth->encoding.format.D1 = 1; 217 | synth->encoding.format.D2 = 1; 218 | synth->encoding.format.D3 = 1; 219 | synth->encoding.format.D4 = 1; 220 | } 221 | } 222 | else { 223 | csynth_error(synth, "mod field error", 30); 224 | return -1; 225 | } 226 | return 0; 227 | } 228 | 229 | static int csynth_encode_sib_byte(CSynthesizer *synth) 230 | { 231 | if (synth->scale == 0 && synth->indexReg == REG_UNKNOWN) { 232 | if (synth->baseReg == REG_UNKNOWN || ( 233 | synth->encoding.modRM.r_m != E_ESP && 234 | synth->encoding.modRM.r_m != E_EBP)) { 235 | if (synth->encoding.format.SIB) { 236 | csynth_error(synth, "SIB byte error", 31); 237 | return -1; 238 | } 239 | // No SIB byte needed 240 | return 0; 241 | } 242 | } 243 | 244 | // Indicates use of SIB in mod R/M 245 | synth->encoding.format.SIB = 1; 246 | synth->encoding.modRM.r_m = E_ESP; 247 | 248 | if (synth->baseReg == E_EBP && synth->encoding.modRM.mod == MOD_NO_DISP) 249 | { 250 | synth->encoding.modRM.mod = MOD_BYTE_DISP; 251 | synth->encoding.format.D1 = 1; 252 | } 253 | 254 | if (synth->indexReg == E_ESP) { 255 | if (synth->scale != 1) { 256 | csynth_error(synth, 257 | "ESP can't be scaled index in memory reference", 32); 258 | return -2; 259 | } 260 | else { 261 | enum CRegID tempReg; 262 | tempReg = synth->indexReg; 263 | synth->indexReg = synth->baseReg; 264 | synth->baseReg = tempReg; 265 | } 266 | } 267 | 268 | if (synth->baseReg == REG_UNKNOWN) { 269 | synth->encoding.SIB.base = E_EBP; 270 | synth->encoding.modRM.mod = MOD_NO_DISP; 271 | synth->encoding.format.D1 = 1; 272 | synth->encoding.format.D2 = 1; 273 | synth->encoding.format.D3 = 1; 274 | synth->encoding.format.D4 = 1; 275 | } else { 276 | synth->encoding.SIB.base = synth->baseReg; 277 | } 278 | 279 | if (synth->indexReg != REG_UNKNOWN) { 280 | synth->encoding.SIB.index = synth->indexReg; 281 | } else { 282 | synth->encoding.SIB.index = E_ESP; 283 | } 284 | 285 | switch (synth->scale) 286 | { 287 | case 0: 288 | case 1: 289 | synth->encoding.SIB.scale = SCALE_1; 290 | break; 291 | case 2: 292 | synth->encoding.SIB.scale = SCALE_2; 293 | break; 294 | case 4: 295 | synth->encoding.SIB.scale = SCALE_4; 296 | break; 297 | case 8: 298 | synth->encoding.SIB.scale = SCALE_8; 299 | break; 300 | default: 301 | csynth_error(synth, "scale number error", 33); 302 | return -3; 303 | break; 304 | } 305 | 306 | return 0; 307 | } 308 | 309 | int csynth_encode_prefix(CSynthesizer *synth, int code) 310 | { 311 | if (cencoding_add_prefix(&synth->encoding, code)) { 312 | csynth_error(synth, "cannot add prefix", 90); 313 | return -1; 314 | } 315 | return 0; 316 | } 317 | 318 | const CEncoding *csynth_encode_instruction(CSynthesizer *synth, 319 | CInstruction *instruction) 320 | { 321 | enum COperandType p1, p2; 322 | const char *format; 323 | unsigned long O; 324 | 325 | if (!instruction) { 326 | return &synth->encoding; 327 | } 328 | 329 | format = cinst_getEncoding(instruction); 330 | 331 | if (!format) { 332 | csynth_error(synth, "csynth_encode_instruction: internal error", 11); 333 | return NULL; 334 | } 335 | 336 | #define IFORMAT_WORD(x, y) ( (((short)(x)) << 8) | ((short)(y)) ) 337 | 338 | while (*format) { 339 | int head = ((short)format[1] | format[0] << 8); 340 | switch (head) 341 | { 342 | case IFORMAT_WORD('p', '0'): 343 | if (cencoding_add_prefix(&synth->encoding, 0xf0)) { 344 | csynth_error(synth, "prefix error 0xf0", 40); 345 | return NULL; 346 | } 347 | break; 348 | case IFORMAT_WORD('p', '2'): 349 | if (cencoding_add_prefix(&synth->encoding, 0xf2)) { 350 | csynth_error(synth, "prefix error 0xf2", 41); 351 | return NULL; 352 | } 353 | break; 354 | case IFORMAT_WORD('p', '3'): 355 | if (cencoding_add_prefix(&synth->encoding, 0xf3)) { 356 | csynth_error(synth, "prefix error 0xf3", 42); 357 | return NULL; 358 | } 359 | break; 360 | case IFORMAT_WORD('p', 'o'): 361 | if (!cinst_is_32bit(instruction)) { 362 | if (cencoding_add_prefix(&synth->encoding, 0x66)) { 363 | csynth_error(synth, "prefix error 0x66", 43); 364 | return NULL; 365 | } 366 | } 367 | break; 368 | case IFORMAT_WORD('p', 'a'): 369 | if (!cinst_is_32bit(instruction)) { 370 | if (cencoding_add_prefix(&synth->encoding, 0x67)) { 371 | csynth_error(synth, "prefix error 0x67", 44); 372 | return NULL; 373 | } 374 | } 375 | break; 376 | case IFORMAT_WORD('+', 'r'): 377 | if (synth->encoding.format.O1) { 378 | if (coperand_type_is_reg(synth->firstType)) { 379 | synth->encoding.O1 += synth->firstReg; 380 | } 381 | else if(coperand_type_is_reg(synth->secondType)) { 382 | synth->encoding.O1 += synth->secondReg; 383 | } 384 | else { 385 | csynth_error(synth, 386 | "'+r' not compatible with operands", 12); 387 | return NULL; 388 | } 389 | } 390 | else { 391 | csynth_error(synth, "'+r' needs first opcode byte", 13); 392 | return NULL; 393 | } 394 | break; 395 | case IFORMAT_WORD('/', 'r'): 396 | if (csynth_encode_mod_field(synth) != 0) { 397 | return NULL; 398 | } 399 | p1 = cinst_getFirstOperand(instruction); 400 | p2 = cinst_getSecondOperand(instruction); 401 | if (coperand_type_is_reg(p1) && coperand_type_is_R_M(p2)) { 402 | if (coperand_type_is_mem(synth->secondType)) { 403 | synth->encoding.modRM.r_m = synth->baseReg; 404 | } 405 | else if (coperand_type_is_reg(synth->secondType)) { 406 | synth->encoding.modRM.r_m = synth->secondReg; 407 | } 408 | else { 409 | csynth_error(synth, "syntax error", 14); 410 | return NULL; 411 | } 412 | synth->encoding.modRM.reg = synth->firstReg; 413 | } 414 | else if (coperand_type_is_R_M(p1) && coperand_type_is_reg(p2)) { 415 | if (coperand_type_is_mem(synth->firstType)) { 416 | synth->encoding.modRM.r_m = synth->baseReg; 417 | } 418 | else if (coperand_type_is_reg(synth->firstType)) { 419 | synth->encoding.modRM.r_m = synth->firstReg; 420 | } 421 | else { 422 | csynth_error(synth, "syntax error", 15); 423 | return NULL; 424 | } 425 | synth->encoding.modRM.reg = synth->secondReg; 426 | } 427 | else { 428 | csynth_error(synth, "format error", 16); 429 | return NULL; 430 | } 431 | if (csynth_encode_sib_byte(synth) != 0) { 432 | return NULL; 433 | } 434 | break; 435 | case IFORMAT_WORD('/', '0'): 436 | case IFORMAT_WORD('/', '1'): 437 | case IFORMAT_WORD('/', '2'): 438 | case IFORMAT_WORD('/', '3'): 439 | case IFORMAT_WORD('/', '4'): 440 | case IFORMAT_WORD('/', '5'): 441 | case IFORMAT_WORD('/', '6'): 442 | case IFORMAT_WORD('/', '7'): 443 | if (csynth_encode_mod_field(synth) != 0) { 444 | return NULL; 445 | } 446 | synth->encoding.modRM.reg = format[1] - '0'; 447 | if (coperand_type_is_mem(synth->firstType)) { 448 | synth->encoding.modRM.r_m = synth->baseReg; 449 | } 450 | else if (coperand_type_is_reg(synth->firstType)) { 451 | synth->encoding.modRM.r_m = synth->firstReg; 452 | } 453 | else { 454 | csynth_error(synth, "syntax error", 17); 455 | return NULL; 456 | } 457 | if (csynth_encode_sib_byte(synth) != 0) { 458 | return NULL; 459 | } 460 | break; 461 | case IFORMAT_WORD('i', 'd'): 462 | synth->encoding.format.I1 = 1; 463 | synth->encoding.format.I2 = 1; 464 | synth->encoding.format.I3 = 1; 465 | synth->encoding.format.I4 = 1; 466 | synth->encoding.relative = 0; 467 | break; 468 | case IFORMAT_WORD('i', 'w'): 469 | synth->encoding.format.I1 = 1; 470 | synth->encoding.format.I2 = 1; 471 | synth->encoding.relative = 0; 472 | break; 473 | case IFORMAT_WORD('i', 'b'): 474 | synth->encoding.format.I1 = 1; 475 | synth->encoding.relative = 0; 476 | break; 477 | case IFORMAT_WORD('-', 'b'): 478 | synth->encoding.format.I1 = 1; 479 | synth->encoding.relative = 1; 480 | break; 481 | case IFORMAT_WORD('-', 'i'): 482 | synth->encoding.format.I1 = 1; 483 | synth->encoding.format.I2 = 1; 484 | synth->encoding.format.I3 = 1; 485 | synth->encoding.format.I4 = 1; 486 | synth->encoding.relative = 1; 487 | break; 488 | default: 489 | O = strtoul(format, 0, 16); 490 | if (O > 0xFF) { 491 | csynth_error(synth, "format error", 18); 492 | return NULL; 493 | } 494 | if (!synth->encoding.format.O1) { 495 | synth->encoding.O1 = (cbyte)O; 496 | synth->encoding.format.O1 = 1; 497 | } 498 | else if (synth->encoding.format.O2 == 0 && 499 | (synth->encoding.O1 == 0x0f || 500 | synth->encoding.O1 == 0xd8 || 501 | synth->encoding.O1 == 0xd9 || 502 | synth->encoding.O1 == 0xda || 503 | synth->encoding.O1 == 0xdb || 504 | synth->encoding.O1 == 0xdc || 505 | synth->encoding.O1 == 0xde || 506 | synth->encoding.O1 == 0xdf)) { 507 | synth->encoding.O2 = synth->encoding.O1; 508 | synth->encoding.O1 = (cbyte)O; 509 | synth->encoding.format.O2 = 1; 510 | } 511 | else { 512 | csynth_error(synth, "synth error", 19); 513 | return NULL; 514 | } 515 | break; 516 | } 517 | 518 | format += 2; 519 | if (*format == ' ') { 520 | format++; 521 | } 522 | else if (*format == '\0') { 523 | break; 524 | } 525 | else { 526 | csynth_error(synth, "instruction error", 20); 527 | return NULL; 528 | } 529 | #undef IFORMAT_WORD 530 | } 531 | 532 | return &synth->encoding; 533 | } 534 | 535 | -------------------------------------------------------------------------------- /source/csynthesis.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // csynthesis.h - source scanner 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #ifndef __CSYNTHESIS_H__ 10 | #define __CSYNTHESIS_H__ 11 | 12 | #include "cencoding.h" 13 | #include "ckeywords.h" 14 | #include "cinstruct.h" 15 | 16 | 17 | //--------------------------------------------------------------------- 18 | // CSynthesizer 19 | //--------------------------------------------------------------------- 20 | struct CSynthesizer 21 | { 22 | CEncoding encoding; 23 | enum COperandType firstType; 24 | enum COperandType secondType; 25 | enum CRegID firstReg; 26 | enum CRegID secondReg; 27 | enum CRegID baseReg; 28 | enum CRegID indexReg; 29 | int scale; 30 | int prefix; 31 | char *error; 32 | int errcode; 33 | }; 34 | 35 | typedef struct CSynthesizer CSynthesizer; 36 | 37 | 38 | 39 | #ifdef __cplusplus 40 | extern "C" { 41 | #endif 42 | //--------------------------------------------------------------------- 43 | // interface 44 | //--------------------------------------------------------------------- 45 | void csynth_init(CSynthesizer *synth); 46 | void csynth_destroy(CSynthesizer *synth); 47 | void csynth_reset(CSynthesizer *synth); 48 | 49 | int csynth_define_label(CSynthesizer *synth, const char *label); 50 | int csynth_reference_label(CSynthesizer *synth, const char *label); 51 | 52 | int csynth_encode_first_operand(CSynthesizer *synth, const COperand *); 53 | int csynth_encode_second_operand(CSynthesizer *synth, const COperand *); 54 | int csynth_encode_third_operand(CSynthesizer *synth, const COperand *); 55 | 56 | int csynth_encode_base(CSynthesizer *synth, const COperand *base); 57 | int csynth_encode_index(CSynthesizer *synth, const COperand *index); 58 | 59 | int csynth_encode_scale(CSynthesizer *synth, int scale); 60 | int csynth_encode_immediate(CSynthesizer *synth, long immediate); 61 | int csynth_encode_displacement(CSynthesizer *synth, long displacement); 62 | 63 | int csynth_encode_prefix(CSynthesizer *synth, int code); 64 | 65 | const CEncoding *csynth_encode_instruction(CSynthesizer *, CInstruction*); 66 | 67 | 68 | #ifdef __cplusplus 69 | } 70 | #endif 71 | 72 | #endif 73 | 74 | 75 | -------------------------------------------------------------------------------- /source/ctoken.c: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // ctoken.c - token definition 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "ctoken.h" 16 | 17 | 18 | //--------------------------------------------------------------------- 19 | // create a new token 20 | //--------------------------------------------------------------------- 21 | CTOKEN *ctoken_new(enum CTokenType type, const void *str, int size) 22 | { 23 | CTOKEN *token; 24 | 25 | token = (CTOKEN*)malloc(sizeof(CTOKEN)); 26 | assert(token); 27 | 28 | token->type = type; 29 | token->str = NULL; 30 | token->size = 0; 31 | token->lineno = -1; 32 | token->fileno = -1; 33 | token->keyword = -1; 34 | 35 | switch (type) 36 | { 37 | case CTokenENDL: 38 | break; 39 | case CTokenENDF: 40 | break; 41 | case CTokenSTR: 42 | case CTokenIDENT: 43 | token->str = (char*)malloc(size + 1); 44 | assert(token->str); 45 | memcpy(token->str, str, size); 46 | token->str[size] = 0; 47 | token->size = size; 48 | break; 49 | case CTokenKEYWORD: 50 | token->keyword = *(int*)str; 51 | break; 52 | case CTokenOPERATOR: 53 | token->ch = *(int*)str; 54 | break; 55 | case CTokenINT: 56 | token->intval = *(long*)str; 57 | break; 58 | case CTokenFLOAT: 59 | token->fltval = *(double*)str; 60 | break; 61 | default: 62 | token->type = CTokenERROR; 63 | token->errcode = *(int*)str; 64 | break; 65 | } 66 | 67 | token->next = token; 68 | token->prev = token; 69 | 70 | return token; 71 | } 72 | 73 | //--------------------------------------------------------------------- 74 | // release token 75 | //--------------------------------------------------------------------- 76 | void ctoken_delete(CTOKEN *token) 77 | { 78 | if (token->type == CTokenSTR || token->type == CTokenIDENT) { 79 | if (token->str) free(token->str); 80 | token->str = NULL; 81 | } 82 | token->type = CTokenERROR; 83 | free(token); 84 | } 85 | 86 | //--------------------------------------------------------------------- 87 | // create a new endl 88 | //--------------------------------------------------------------------- 89 | CTOKEN *ctoken_new_endl(void) { 90 | return ctoken_new(CTokenENDL, NULL, 0); 91 | } 92 | 93 | //--------------------------------------------------------------------- 94 | // create a new endf 95 | //--------------------------------------------------------------------- 96 | CTOKEN *ctoken_new_endf(void) { 97 | return ctoken_new(CTokenENDF, NULL, 0); 98 | } 99 | 100 | //--------------------------------------------------------------------- 101 | // create a new identity 102 | //--------------------------------------------------------------------- 103 | CTOKEN *ctoken_new_ident(const char *ident) { 104 | return ctoken_new(CTokenIDENT, ident, (int)strlen(ident)); 105 | } 106 | 107 | //--------------------------------------------------------------------- 108 | // create a new keyword 109 | //--------------------------------------------------------------------- 110 | CTOKEN *ctoken_new_keyword(int keyid) { 111 | return ctoken_new(CTokenKEYWORD, &keyid, sizeof(int)); 112 | } 113 | 114 | //--------------------------------------------------------------------- 115 | // create a new string 116 | //--------------------------------------------------------------------- 117 | CTOKEN *ctoken_new_string(const char *string) { 118 | return ctoken_new(CTokenSTR, string, (int)strlen(string)); 119 | } 120 | 121 | //--------------------------------------------------------------------- 122 | // create a new integer 123 | //--------------------------------------------------------------------- 124 | CTOKEN *ctoken_new_int(long intval) { 125 | return ctoken_new(CTokenINT, &intval, sizeof(long)); 126 | } 127 | 128 | //--------------------------------------------------------------------- 129 | // create a new float 130 | //--------------------------------------------------------------------- 131 | CTOKEN *ctoken_new_float(double fltval) { 132 | return ctoken_new(CTokenFLOAT, &fltval, sizeof(double)); 133 | } 134 | 135 | //--------------------------------------------------------------------- 136 | // create a new operator 137 | //--------------------------------------------------------------------- 138 | CTOKEN *ctoken_new_operator(int op) { 139 | return ctoken_new(CTokenOPERATOR, &op, sizeof(int)); 140 | } 141 | 142 | //--------------------------------------------------------------------- 143 | // create a new error 144 | //--------------------------------------------------------------------- 145 | CTOKEN *ctoken_new_error(int code) { 146 | return ctoken_new(CTokenERROR, &code, sizeof(int)); 147 | } 148 | 149 | //--------------------------------------------------------------------- 150 | // token copy 151 | //--------------------------------------------------------------------- 152 | CTOKEN *ctoken_new_copy(const CTOKEN *token) { 153 | CTOKEN *newtoken; 154 | 155 | newtoken = (CTOKEN*)malloc(sizeof(CTOKEN)); 156 | assert(newtoken); 157 | 158 | newtoken->type = token->type; 159 | newtoken->str = NULL; 160 | newtoken->size = 0; 161 | newtoken->lineno = token->lineno; 162 | newtoken->fileno = token->fileno; 163 | 164 | switch (newtoken->type) 165 | { 166 | case CTokenSTR: 167 | case CTokenIDENT: 168 | newtoken->str = (char*)malloc(token->size + 1); 169 | assert(newtoken->str); 170 | memcpy(newtoken->str, token->str, token->size); 171 | newtoken->str[token->size] = 0; 172 | newtoken->size = token->size; 173 | break; 174 | case CTokenKEYWORD: 175 | newtoken->keyword = token->keyword; 176 | break; 177 | case CTokenOPERATOR: 178 | newtoken->ch = token->ch; 179 | break; 180 | case CTokenINT: 181 | newtoken->intval = token->intval; 182 | break; 183 | case CTokenFLOAT: 184 | newtoken->fltval = token->fltval; 185 | break; 186 | default: 187 | break; 188 | } 189 | 190 | newtoken->next = newtoken; 191 | newtoken->prev = newtoken; 192 | 193 | return newtoken; 194 | } 195 | 196 | 197 | //--------------------------------------------------------------------- 198 | // get string 199 | //--------------------------------------------------------------------- 200 | const char *ctoken_get_string(const CTOKEN *token) 201 | { 202 | if (token->type != CTokenIDENT && token->type != CTokenSTR) { 203 | return ""; 204 | } 205 | return token->str; 206 | } 207 | 208 | //--------------------------------------------------------------------- 209 | // get integer 210 | //--------------------------------------------------------------------- 211 | long ctoken_get_int(const CTOKEN *token) 212 | { 213 | if (token->type != CTokenINT) { 214 | return 0; 215 | } 216 | return token->intval; 217 | } 218 | 219 | //--------------------------------------------------------------------- 220 | // get char 221 | //--------------------------------------------------------------------- 222 | int ctoken_get_char(const CTOKEN *token) 223 | { 224 | if (token->type != CTokenOPERATOR) { 225 | return '\0'; 226 | } 227 | return token->ch; 228 | } 229 | 230 | //--------------------------------------------------------------------- 231 | // get float 232 | //--------------------------------------------------------------------- 233 | double ctoken_get_float(const CTOKEN *token) 234 | { 235 | if (token->type != CTokenFLOAT) { 236 | return 0.0; 237 | } 238 | return token->fltval; 239 | } 240 | 241 | //--------------------------------------------------------------------- 242 | // get keyword 243 | //--------------------------------------------------------------------- 244 | int ctoken_get_keyword(const CTOKEN *token) 245 | { 246 | if (token->type != CTokenKEYWORD) { 247 | return -1; 248 | } 249 | return token->keyword; 250 | } 251 | 252 | 253 | int ctoken_is_endl(const CTOKEN *token) { 254 | return token->type == CTokenENDL; 255 | } 256 | 257 | int ctoken_is_endf(const CTOKEN *token) { 258 | return token->type == CTokenENDF; 259 | } 260 | 261 | int ctoken_is_ident(const CTOKEN *token) { 262 | return token->type == CTokenIDENT; 263 | } 264 | 265 | int ctoken_is_keyword(const CTOKEN *token) { 266 | return token->type == CTokenKEYWORD; 267 | } 268 | 269 | int ctoken_is_string(const CTOKEN *token) { 270 | return token->type == CTokenSTR; 271 | } 272 | 273 | int ctoken_is_int(const CTOKEN *token) { 274 | return token->type == CTokenINT; 275 | } 276 | 277 | int ctoken_is_float(const CTOKEN *token) { 278 | return token->type == CTokenFLOAT; 279 | } 280 | 281 | int ctoken_is_operator(const CTOKEN *token) { 282 | return token->type == CTokenOPERATOR; 283 | } 284 | 285 | int ctoken_is_error(const CTOKEN *token) { 286 | return token->type == CTokenERROR; 287 | } 288 | 289 | 290 | //--------------------------------------------------------------------- 291 | // add node to head 292 | //--------------------------------------------------------------------- 293 | void ctoken_list_add(CTOKEN *node, CTOKEN *head) 294 | { 295 | (node)->prev = (head), (node)->next = (head)->next; 296 | (head)->next->prev = (node), (head)->next = (node); 297 | } 298 | 299 | //--------------------------------------------------------------------- 300 | // add node to head's tail 301 | //--------------------------------------------------------------------- 302 | void ctoken_list_add_tail(CTOKEN *node, CTOKEN *head) 303 | { 304 | (node)->prev = (head)->prev, (node)->next = (head); 305 | (head)->prev->next = (node), (head)->prev = (node); 306 | } 307 | 308 | //--------------------------------------------------------------------- 309 | // delete between 310 | //--------------------------------------------------------------------- 311 | void ctoken_list_del_between(CTOKEN *p, CTOKEN *n) 312 | { 313 | (n)->prev = (p), (p)->next = (n); 314 | } 315 | 316 | //--------------------------------------------------------------------- 317 | // remove self 318 | //--------------------------------------------------------------------- 319 | void ctoken_list_del(CTOKEN *entry) 320 | { 321 | (entry)->next->prev = (entry)->prev; 322 | (entry)->prev->next = (entry)->next; 323 | (entry)->next = 0, (entry)->prev = 0; 324 | (entry)->next = entry; 325 | (entry)->prev = entry; 326 | } 327 | 328 | //--------------------------------------------------------------------- 329 | // check if empty 330 | //--------------------------------------------------------------------- 331 | int ctoken_list_is_empty(const CTOKEN *entry) 332 | { 333 | return (entry) == (entry)->next; 334 | } 335 | 336 | //--------------------------------------------------------------------- 337 | // print to file 338 | //--------------------------------------------------------------------- 339 | void ctoken_print(FILE *fp, const CTOKEN *token) 340 | { 341 | if (fp == NULL) fp = stdout; 342 | if (token->type == CTokenIDENT) { 343 | fprintf(fp, "(%s)", token->str); 344 | } 345 | else if (token->type == CTokenSTR) { 346 | fprintf(fp, "(\"%s\")", token->str); 347 | } 348 | else if (token->type == CTokenENDL) { 349 | fprintf(fp, "ENDL"); 350 | } 351 | else if (token->type == CTokenENDF) { 352 | fprintf(fp, "ENDF"); 353 | } 354 | else if (token->type == CTokenKEYWORD) { 355 | fprintf(fp, "<%d>", token->keyword); 356 | } 357 | else if (token->type == CTokenINT) { 358 | fprintf(fp, "[%ld]", token->intval); 359 | } 360 | else if (token->type == CTokenFLOAT) { 361 | fprintf(fp, "[%f]", token->fltval); 362 | } 363 | else if (token->type == CTokenOPERATOR) { 364 | fprintf(fp, "[%c]", (char)token->ch); 365 | } 366 | else if (token->type == CTokenERROR) { 367 | fprintf(fp, "ERROR"); 368 | } 369 | fflush(fp); 370 | } 371 | 372 | -------------------------------------------------------------------------------- /source/ctoken.h: -------------------------------------------------------------------------------- 1 | //===================================================================== 2 | // 3 | // ctoken.h - token definition 4 | // 5 | // NOTE: 6 | // for more information, please see the readme file. 7 | // 8 | //===================================================================== 9 | #ifndef __CTOKEN_H__ 10 | #define __CTOKEN_H__ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | //--------------------------------------------------------------------- 19 | // TOKEN Type 20 | //--------------------------------------------------------------------- 21 | enum CTokenType 22 | { 23 | CTokenENDL = 0, 24 | CTokenENDF = 1, 25 | CTokenIDENT = 2, 26 | CTokenKEYWORD = 3, 27 | CTokenSTR = 4, 28 | CTokenOPERATOR = 5, 29 | CTokenINT = 6, 30 | CTokenFLOAT = 7, 31 | CTokenERROR = 8, 32 | }; 33 | 34 | 35 | //--------------------------------------------------------------------- 36 | // CTOKEN DEFINITION 37 | //--------------------------------------------------------------------- 38 | struct CTOKEN 39 | { 40 | enum CTokenType type; 41 | union { 42 | long intval; 43 | double fltval; 44 | int keyword; 45 | int ch; 46 | int errcode; 47 | char *str; 48 | }; 49 | long size; 50 | int lineno; 51 | int fileno; 52 | struct CTOKEN *next; 53 | struct CTOKEN *prev; 54 | }; 55 | 56 | typedef struct CTOKEN CTOKEN; 57 | 58 | #define ctoken_type(token) ((token)->type) 59 | #define ctoken_int(token) ((token)->intval) 60 | #define ctoken_str(token) ((token)->str) 61 | #define ctoken_chr(token) ((token)->ch) 62 | #define ctoken_len(token) ((token)->size) 63 | #define ctoken_key(token) ((token)->keyword) 64 | 65 | 66 | #ifdef __cplusplus 67 | extern "C" { 68 | #endif 69 | 70 | 71 | //--------------------------------------------------------------------- 72 | // BASIC INTERFACE 73 | //--------------------------------------------------------------------- 74 | // create a new token 75 | CTOKEN *ctoken_new(enum CTokenType type, const void *data, int size); 76 | 77 | // delete and free memory 78 | void ctoken_delete(CTOKEN *token); 79 | 80 | 81 | CTOKEN *ctoken_new_endl(void); // create a new endl 82 | CTOKEN *ctoken_new_endf(void); // create a new endf 83 | CTOKEN *ctoken_new_ident(const char *ident); // create a new identity 84 | CTOKEN *ctoken_new_keyword(int keyid); // create a new keyword 85 | CTOKEN *ctoken_new_string(const char *string); // create a new string 86 | CTOKEN *ctoken_new_int(long intval); // create a new integer 87 | CTOKEN *ctoken_new_float(double fltval); // create a new float 88 | CTOKEN *ctoken_new_operator(int op); // create a new operator 89 | CTOKEN *ctoken_new_error(int errcode); // create a new errcode 90 | CTOKEN *ctoken_new_copy(const CTOKEN *token); // create a new copy 91 | 92 | 93 | //--------------------------------------------------------------------- 94 | // type & value operation 95 | //--------------------------------------------------------------------- 96 | const char *ctoken_get_string(const CTOKEN *token); // get string 97 | long ctoken_get_int(const CTOKEN *token); // get integer value 98 | int ctoken_get_char(const CTOKEN *token); // get operator char 99 | double ctoken_get_float(const CTOKEN *token); // get float value 100 | int ctoken_get_keyword(const CTOKEN *token); // get keyword 101 | 102 | int ctoken_is_endl(const CTOKEN *token); 103 | int ctoken_is_endf(const CTOKEN *token); 104 | int ctoken_is_ident(const CTOKEN *token); 105 | int ctoken_is_keyword(const CTOKEN *token); 106 | int ctoken_is_string(const CTOKEN *token); 107 | int ctoken_is_int(const CTOKEN *token); 108 | int ctoken_is_float(const CTOKEN *token); 109 | int ctoken_is_operator(const CTOKEN *token); 110 | int ctoken_is_error(const CTOKEN *token); 111 | 112 | 113 | //--------------------------------------------------------------------- 114 | // list operation 115 | //--------------------------------------------------------------------- 116 | void ctoken_list_add(CTOKEN *node, CTOKEN *head); 117 | void ctoken_list_add_tail(CTOKEN *node, CTOKEN *head); 118 | void ctoken_list_del_between(CTOKEN *p, CTOKEN *n); 119 | void ctoken_list_del(CTOKEN *p); 120 | int ctoken_list_is_empty(const CTOKEN *p); 121 | 122 | 123 | //--------------------------------------------------------------------- 124 | // misc 125 | //--------------------------------------------------------------------- 126 | void ctoken_print(FILE *fp, const CTOKEN *token); 127 | 128 | 129 | #ifdef __cplusplus 130 | } 131 | #endif 132 | 133 | #endif 134 | 135 | 136 | -------------------------------------------------------------------------------- /source/test1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cencoding.h" 8 | #include "ckeywords.h" 9 | #include "cinstruct.h" 10 | #include "cinstset.h" 11 | #include "ctoken.h" 12 | #include "cscanner.h" 13 | #include "csynthesis.h" 14 | #include "cparser.h" 15 | #include "casmpure.h" 16 | 17 | 18 | /* 19 | void CrossProduct(float *V0, float *V1, float *V2) 20 | { 21 | V2[0] = V0[1] * V1[2] - V0[2] * V1[1]; 22 | V2[1] = V0[2] * V1[0] - V0[0] * V1[2]; 23 | V2[2] = V0[0] * V1[1] - V0[1] * V1[0]; 24 | }*/ 25 | 26 | const char *CrossProductAsm = 27 | " mov ecx, [esp+8]\n" 28 | " mov eax, [esp+4]\n" 29 | " mov edx, [esp+12]\n" 30 | "\n" 31 | " fld DWORD [ecx+8]\n" 32 | " fmul DWORD [eax+4]\n" 33 | " fld DWORD [eax+8]\n" 34 | " fmul DWORD [ecx+4]\n" 35 | " fsubp st1, st0\n" 36 | " fstp DWORD [edx]\n" 37 | "\n" 38 | " fld DWORD [eax+8]\n" 39 | " fmul DWORD [ecx]\n" 40 | " fld DWORD [eax]\n" 41 | " fmul DWORD [ecx+8]\n" 42 | " fsubp st1, st0\n" 43 | " fstp DWORD [edx+4]\n" 44 | "\n" 45 | " fld DWORD [eax]\n" 46 | " fmul DWORD [ecx+4]\n" 47 | " fld DWORD [ecx]\n" 48 | " fmul DWORD [eax+4]\n" 49 | " fsubp st1, st0\n" 50 | " fstp DWORD [edx+8]\n" 51 | " \n" 52 | " ret\n"; 53 | 54 | 55 | void testCrossProduct(void) 56 | { 57 | CAssembler *casm; 58 | int size, c; 59 | 60 | void (*CrossProductPtr)(float*, float*, float*); 61 | 62 | // create assembler 63 | casm = casm_create(); 64 | 65 | // append assembly source 66 | casm_source(casm, CrossProductAsm); 67 | 68 | // calculate size 69 | size = casm_compile(casm, NULL, 0); 70 | 71 | if (size < 0) { 72 | printf("compile error: %s\n", casm->error); 73 | casm_release(casm); 74 | return; 75 | } 76 | 77 | CrossProductPtr = (void (*)(float*, float*, float*))malloc(size); 78 | 79 | // compile into memory 80 | casm_compile(casm, (unsigned char*)CrossProductPtr, size); 81 | 82 | printf("==================== Cross Product ====================\n"); 83 | 84 | casm_dumpinst(casm, stdout); 85 | 86 | printf("\nExecute code (y/n)?\n\n"); 87 | 88 | do 89 | { 90 | c = getch(); 91 | } 92 | while(c != 'y' && c != 'n'); 93 | 94 | if(c == 'y') 95 | { 96 | float V0[3] = {1, 0, 0}; 97 | float V1[3] = {0, 1, 0}; 98 | float V2[3]; 99 | 100 | // execute 101 | CrossProductPtr(V0, V1, V2); 102 | 103 | printf("output: (%.3f, %.3f, %.3f)\n\n", V2[0], V2[1], V2[2]); 104 | } 105 | 106 | free(CrossProductPtr); 107 | casm_release(casm); 108 | } 109 | 110 | 111 | const char *HelloWorldAsm = 112 | " mov eax, [esp+8]\n" 113 | " push eax\n" 114 | " call DWORD [esp+8]\n" 115 | " pop ecx\n" 116 | " ret\n"; 117 | 118 | void testHelloWorld(void) 119 | { 120 | CAssembler *casm; 121 | int size, c; 122 | 123 | void (*HelloWorldPtr)(void*, const char*); 124 | 125 | // create assembler 126 | casm = casm_create(); 127 | 128 | // append assembly source 129 | casm_source(casm, HelloWorldAsm); 130 | 131 | // calculate size 132 | size = casm_compile(casm, NULL, 0); 133 | 134 | if (size < 0) { 135 | printf("compile error: %s\n", casm->error); 136 | casm_release(casm); 137 | return; 138 | } 139 | 140 | HelloWorldPtr = (void (*)(void*, const char*))malloc(size); 141 | 142 | // compile into memory 143 | casm_compile(casm, (unsigned char*)HelloWorldPtr, size); 144 | 145 | printf("==================== Hello World ====================\n"); 146 | 147 | casm_dumpinst(casm, stdout); 148 | 149 | printf("\nExecute code (y/n)?\n\n"); 150 | 151 | do 152 | { 153 | c = getch(); 154 | } 155 | while(c != 'y' && c != 'n'); 156 | 157 | if(c == 'y') 158 | { 159 | // execute 160 | HelloWorldPtr((void*)printf, "Hello, World !!\n"); 161 | } 162 | 163 | free(HelloWorldPtr); 164 | casm_release(casm); 165 | } 166 | 167 | 168 | const char *AlphaBlendAsm = 169 | "PROC C1:DWORD, C2:DWORD, A:DWORD\n" 170 | " movd mm0, A\n" 171 | " punpcklwd mm0, mm0\n" 172 | " punpckldq mm0, mm0\n" 173 | " pcmpeqb mm7, mm7\n" 174 | " psubw mm7, mm0\n" 175 | " \n" 176 | " punpcklbw mm1, C1\n" 177 | " psrlw mm1, 8\n" 178 | " punpcklbw mm2, C2\n" 179 | " psrlw mm2, 8\n" 180 | " \n" 181 | " pmullw mm1, mm7\n" 182 | " pmullw mm2, mm0\n" 183 | " paddw mm1, mm2\n" 184 | " \n" 185 | " psrlw mm1, 8\n" 186 | " packuswb mm1, mm1\n" 187 | " movd eax, mm1\n" 188 | " emms\n" 189 | " ret\n" 190 | "ENDP\n"; 191 | 192 | 193 | void testAlphaBlend(void) 194 | { 195 | CAssembler *casm; 196 | int c; 197 | 198 | int (*AlphaBlendPtr)(int, int, int); 199 | 200 | // create assembler 201 | casm = casm_create(); 202 | 203 | // append assembly source 204 | casm_source(casm, AlphaBlendAsm); 205 | 206 | // compile directly 207 | AlphaBlendPtr = (int (*)(int, int, int))casm_callable(casm, NULL); 208 | 209 | if (AlphaBlendPtr == NULL) { 210 | printf("error: %s\n", casm->error); 211 | casm_release(casm); 212 | return; 213 | } 214 | 215 | printf("==================== Alpha Blend ====================\n"); 216 | 217 | casm_dumpinst(casm, stdout); 218 | 219 | printf("\nExecute code (y/n)?\n\n"); 220 | 221 | do 222 | { 223 | c = getch(); 224 | } 225 | while(c != 'y' && c != 'n'); 226 | 227 | if(c == 'y') 228 | { 229 | // execute 230 | int x = AlphaBlendPtr(0x00FF00FF, 0xFF00FF00, 128); 231 | printf("output: %.8X\n\n", x); 232 | } 233 | 234 | free(AlphaBlendPtr); 235 | casm_release(casm); 236 | } 237 | 238 | 239 | //! src: ctoken.c, cscanner.c, csynthesis.c, cparser.c, casmpure.c 240 | //! exe: cencoding.c, cinstruct.c, cinstset.c, ckeywords.c, cloader.c 241 | int main(void) 242 | { 243 | testCrossProduct(); 244 | testHelloWorld(); 245 | testAlphaBlend(); 246 | return 0; 247 | } 248 | 249 | 250 | -------------------------------------------------------------------------------- /source/testblit.asm: -------------------------------------------------------------------------------- 1 | PROC dst:DWORD, src:DWORD, dpitch:DWORD, spitch:DWORD, width:DWORD, height:DWORD, mask:DWORD 2 | local diff1:DWORD 3 | local diff2:DWORD 4 | 5 | mov edi, dst 6 | mov esi, src 7 | 8 | mov ebx, width 9 | shl ebx, 2 ; ebx = width * 4 10 | 11 | mov eax, dpitch 12 | sub eax, ebx 13 | mov diff1, eax ; diff1 = dpitch - width * 4 14 | 15 | mov eax, spitch 16 | sub eax, ebx 17 | mov diff2, eax ; diff2 = spitch - width * 4 18 | 19 | mov ebx, mask 20 | mov edx, height 21 | 22 | ALIGN 23 | loop_line: 24 | mov ecx, width 25 | ALIGN 26 | loop_pixel: 27 | mov eax, [esi] 28 | cmp eax, ebx ; same to color key ?? 29 | jz @f 30 | mov [edi], eax 31 | @@: 32 | add esi, 4 33 | add edi, 4 34 | dec ecx 35 | jnz loop_pixel 36 | 37 | add edi, diff1 38 | add esi, diff2 39 | dec height 40 | jnz loop_line 41 | 42 | ret 43 | ENDP 44 | 45 | 46 | -------------------------------------------------------------------------------- /source/testblit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "cencoding.h" 8 | #include "ckeywords.h" 9 | #include "cinstruct.h" 10 | #include "cinstset.h" 11 | #include "ctoken.h" 12 | #include "cscanner.h" 13 | #include "csynthesis.h" 14 | #include "cparser.h" 15 | #include "casmpure.h" 16 | 17 | //dst:DWORD, src:DWORD, dpitch:DWORD, spitch:DWORD, mask:DWORD, width:DWORD, height:DWORD 18 | typedef void (*MaskBlitProc)(void *dst, const void *src, long dpitch, long spitch, 19 | int width, int height, unsigned long mask); 20 | 21 | int BMP1[16] = { 22 | 1, 1, 1, 1, 23 | 1, 1, 1, 1, 24 | 1, 1, 1, 1, 25 | 1, 1, 1, 1, 26 | }; 27 | 28 | int BMP2[16] = { 29 | 0, 0, 0, 0, 30 | 0, 2, 2, 0, 31 | 0, 2, 2, 0, 32 | 0, 0, 0, 0, 33 | }; 34 | 35 | void testBlit() 36 | { 37 | CAssembler *casm; 38 | MaskBlitProc MaskBlit; 39 | int ret, i, j; 40 | 41 | // create assembler 42 | casm = casm_create(); 43 | 44 | // load assembly source 45 | ret = casm_loadfile(casm, "testblit.asm"); 46 | 47 | if (ret != 0) { 48 | printf("error: %s\n", casm->error); 49 | casm_release(casm); 50 | } 51 | 52 | MaskBlit = (MaskBlitProc)casm_callable(casm, NULL); 53 | 54 | if (ret != 0) { 55 | printf("error: %s\n", casm->error); 56 | casm_release(casm); 57 | } 58 | 59 | casm_dumpinst(casm, stdout); 60 | 61 | MaskBlit(BMP1, BMP2, 16, 16, 4, 4, 0); 62 | 63 | for (j = 0; j < 4; j++) { 64 | for (i = 0; i < 4; i++) 65 | printf("%x ", BMP1[j * 4 + i]); 66 | printf("\n"); 67 | } 68 | 69 | casm_release(casm); 70 | } 71 | 72 | 73 | //! src: ctoken.c, cscanner.c, csynthesis.c, cparser.c, casmpure.c 74 | //! exe: cencoding.c, cinstruct.c, cinstset.c, ckeywords.c, cloader.c 75 | int main(void) 76 | { 77 | testBlit(); 78 | return 0; 79 | } 80 | 81 | 82 | --------------------------------------------------------------------------------