├── LICENSE
├── README.md
└── source
    ├── casmpure.c
    ├── casmpure.h
    ├── cencoding.c
    ├── cencoding.h
    ├── cinstruct.c
    ├── cinstruct.h
    ├── cinstset.c
    ├── cinstset.h
    ├── ckeywords.c
    ├── ckeywords.h
    ├── cloader.c
    ├── cloader.h
    ├── cparser.c
    ├── cparser.h
    ├── cscanner.c
    ├── cscanner.h
    ├── csynthesis.c
    ├── csynthesis.h
    ├── ctoken.c
    ├── ctoken.h
    ├── test1.c
    ├── testblit.asm
    └── testblit.c


/LICENSE:
--------------------------------------------------------------------------------
  1 | GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Introduction
  2 | ============
  3 | Asmpure is a reimplementation and an enhancement of [SoftWire](http://gna.org/projects/softwire) in C for compiling assembly code. It can be used in projects to generate x86 machine code at run-time as an alternative to self-modifying code. Scripting languages might also benefit by using Asmpure as a JIT-compiler back-end. It also allows to eliminate jumps for variables which are temporarily constant during run-time, like for efficient graphics processing by constructing an optimised pipeline. Because of its possibility for 'instruction rewiring' by run-time conditional compilation, I named it "Asmpure". It is targeted only at developers with a good knowledge of C++ and x86 assembly. 
  4 | 
  5 | Examples
  6 | ========
  7 | 
  8 | CrossProduct
  9 | ------------
 10 | ```cpp
 11 | /*
 12 | void CrossProduct(float *V0, float *V1, float *V2)
 13 | {
 14 |         V2[0] = V0[1] * V1[2] - V0[2] * V1[1];
 15 |         V2[1] = V0[2] * V1[0] - V0[0] * V1[2];
 16 |         V2[2] = V0[0] * V1[1] - V0[1] * V1[0];
 17 | }*/
 18 | 
 19 | const char *CrossProductAsm = 
 20 | "    mov        ecx, [esp+8]\n"
 21 | "    mov        eax, [esp+4]\n"
 22 | "    mov        edx, [esp+12]\n"
 23 | "\n"    
 24 | "    fld        DWORD [ecx+8]\n"
 25 | "    fmul       DWORD [eax+4]\n"
 26 | "    fld        DWORD [eax+8]\n"
 27 | "    fmul       DWORD [ecx+4]\n"
 28 | "    fsubp      st1, st0\n"
 29 | "    fstp       DWORD [edx]\n"
 30 | "\n"    
 31 | "    fld        DWORD [eax+8]\n"
 32 | "    fmul       DWORD [ecx]\n"
 33 | "    fld        DWORD [eax]\n"
 34 | "    fmul       DWORD [ecx+8]\n"
 35 | "    fsubp      st1, st0\n"
 36 | "    fstp       DWORD [edx+4]\n"
 37 | "\n"    
 38 | "    fld        DWORD [eax]\n"
 39 | "    fmul       DWORD [ecx+4]\n"
 40 | "    fld        DWORD [ecx]\n"
 41 | "    fmul       DWORD [eax+4]\n"
 42 | "    fsubp      st1, st0\n"
 43 | "    fstp       DWORD [edx+8]\n"
 44 | "    \n"
 45 | "    ret\n";
 46 | 
 47 | 
 48 | void testCrossProduct(void)
 49 | {
 50 |         CAssembler *casm;
 51 |         int size, c;
 52 | 
 53 |         void (*CrossProductPtr)(float*, float*, float*);
 54 | 
 55 |         // create assembler
 56 |         casm = casm_create();
 57 | 
 58 |         // append assembly source
 59 |         casm_source(casm, CrossProductAsm);
 60 | 
 61 |         // calculate size
 62 |         size = casm_compile(casm, NULL, 0);
 63 | 
 64 |         if (size < 0) {
 65 |                 printf("compile error: %s\n", casm->error);
 66 |                 casm_release(casm);
 67 |                 return;
 68 |         }
 69 | 
 70 |         CrossProductPtr = (void (*)(float*, float*, float*))malloc(size);
 71 | 
 72 |         casm_compile(casm, (unsigned char*)CrossProductPtr, size);
 73 | 
 74 |         printf("==================== Cross Product ====================\n");
 75 | 
 76 |         casm_dumpinst(casm, stdout);
 77 | 
 78 |         printf("\nExecute code (y/n)?\n\n");
 79 | 
 80 |         do
 81 |         {
 82 |                 c = getch();
 83 |         }
 84 |         while(c != 'y' && c != 'n');
 85 | 
 86 |         if(c == 'y')
 87 |         {
 88 |                 float V0[3] = {1, 0, 0};
 89 |                 float V1[3] = {0, 1, 0};
 90 |                 float V2[3];
 91 | 
 92 |                 CrossProductPtr(V0, V1, V2);
 93 | 
 94 |                 printf("output: (%.3f, %.3f, %.3f)\n\n", V2[0], V2[1], V2[2]);
 95 |         }
 96 | 
 97 |         free(CrossProductPtr);
 98 |         casm_release(casm);
 99 | }
100 | ```
101 | 
102 | *output: (0.000, 0.000, 1.000) *
103 | 
104 | Hello World
105 | -----------
106 | ```cpp
107 | const char *HelloWorldAsm = 
108 | "    mov     eax,  [esp+8]\n"
109 | "    push    eax\n"
110 | "    call    DWORD [esp+8]\n"
111 | "    pop     ecx\n"
112 | "    ret\n";
113 | 
114 | void testHelloWorld(void)
115 | {
116 |         CAssembler *casm;
117 |         int size, c;
118 | 
119 |         void (*HelloWorldPtr)(void*, const char*);
120 | 
121 |         // create assembler
122 |         casm = casm_create();
123 | 
124 |         // append assembly source
125 |         casm_source(casm, HelloWorldAsm);
126 | 
127 |         // calculate size
128 |         size = casm_compile(casm, NULL, 0);
129 | 
130 |         if (size < 0) {
131 |                 printf("compile error: %s\n", casm->error);
132 |                 casm_release(casm);
133 |                 return;
134 |         }
135 | 
136 |         HelloWorldPtr = (void (*)(void*, const char*))malloc(size);
137 | 
138 |         casm_compile(casm, (unsigned char*)HelloWorldPtr, size);
139 | 
140 |         printf("==================== Hello World ====================\n");
141 | 
142 |         casm_dumpinst(casm, stdout);
143 | 
144 |         printf("\nExecute code (y/n)?\n\n");
145 | 
146 |         do
147 |         {
148 |                 c = getch();
149 |         }
150 |         while(c != 'y' && c != 'n');
151 | 
152 |         if(c == 'y')
153 |         {
154 |                 HelloWorldPtr((void*)printf, "Hello, World !!\n");
155 |         }
156 | 
157 |         free(HelloWorldPtr);
158 |         casm_release(casm);
159 | }
160 | ```
161 | *output: Hello, World !! *
162 | 
163 | Alpha Blend
164 | -----------
165 | ```cpp
166 | const char *AlphaBlendAsm = 
167 | "PROC C1:DWORD, C2:DWORD, A:DWORD\n"
168 | "    movd mm0, A\n"
169 | "    punpcklwd mm0, mm0\n"
170 | "    punpckldq mm0, mm0\n"
171 | "    pcmpeqb mm7, mm7\n"
172 | "    psubw mm7, mm0\n"
173 | "    \n"
174 | "    punpcklbw mm1, C1\n"
175 | "    psrlw mm1, 8\n"
176 | "    punpcklbw mm2, C2\n"
177 | "    psrlw mm2, 8\n"
178 | "    \n"
179 | "    pmullw mm1, mm7\n"
180 | "    pmullw mm2, mm0\n"
181 | "    paddw mm1, mm2\n"
182 | "    \n"
183 | "    psrlw mm1, 8\n"
184 | "    packuswb mm1, mm1\n"
185 | "    movd eax, mm1\n"
186 | "    emms\n"
187 | "    ret\n"
188 | "ENDP\n";
189 | 
190 | 
191 | void testAlphaBlend(void)
192 | {
193 |         CAssembler *casm;
194 |         int c;
195 | 
196 |         int (*AlphaBlendPtr)(int, int, int);
197 | 
198 |         // create assembler
199 |         casm = casm_create();
200 | 
201 |         // append assembly source
202 |         casm_source(casm, AlphaBlendAsm);
203 | 
204 | 
205 |         AlphaBlendPtr = (int (*)(int, int, int))casm_callable(casm, NULL);
206 | 
207 |         if (AlphaBlendPtr == NULL) {
208 |                 printf("error: %s\n", casm->error);
209 |                 casm_release(casm);
210 |                 return;
211 |         }
212 | 
213 |         printf("==================== Alpha Blend ====================\n");
214 | 
215 |         casm_dumpinst(casm, stdout);
216 | 
217 |         printf("\nExecute code (y/n)?\n\n");
218 | 
219 |         do
220 |         {
221 |                 c = getch();
222 |         }
223 |         while(c != 'y' && c != 'n');
224 | 
225 |         if(c == 'y')
226 |         {
227 |                 int x = AlphaBlendPtr(0x00FF00FF, 0xFF00FF00, 128);
228 |                 printf("output: %.8X\n\n", x);
229 |         }
230 | 
231 |         free(AlphaBlendPtr);
232 |         casm_release(casm);
233 | }
234 | ```
235 | *output: 7f7f7f7f*
236 | 
237 | 


--------------------------------------------------------------------------------
/source/casmpure.c:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // casmpure.c - assembly pure compiler
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #include "casmpure.h"
 10 | 
 11 | #include <stdio.h>
 12 | #include <stdlib.h>
 13 | #include <string.h>
 14 | #include <stdarg.h>
 15 | 
 16 | #define IMAX_LINESIZE		4096
 17 | 
 18 | //---------------------------------------------------------------------
 19 | // CORE INTERFACE
 20 | //---------------------------------------------------------------------
 21 | 
 22 | // create assembler
 23 | CAssembler *casm_create(void)
 24 | {
 25 | 	CAssembler *self;
 26 | 	self = (CAssembler*)malloc(sizeof(CAssembler));
 27 | 	assert(self);
 28 | 	self->parser = cparser_create();
 29 | 	assert(self->parser);
 30 | 	self->loader = cloader_create();
 31 | 	assert(self->loader);
 32 | 	self->source = (char*)malloc(1024 + 1);
 33 | 	assert(self->source);
 34 | 	self->srcblock = 1024;
 35 | 	self->srcsize = 0;
 36 | 	self->source[0] = 0;
 37 | 	self->line = (char*)malloc(IMAX_LINESIZE + 10);
 38 | 	assert(self->line);
 39 | 	self->error = (char*)malloc(2048);
 40 | 	assert(self->error);
 41 | 	self->error[0] = 0;
 42 | 	self->errcode = 0;
 43 | 	return self;
 44 | }
 45 | 
 46 | // reset assembler
 47 | void casm_reset(CAssembler *self)
 48 | {
 49 | 	assert(self);
 50 | 	if (self->source) free(self->source);
 51 | 	self->source = (char*)malloc(1024 + 1);
 52 | 	assert(self->source);
 53 | 	self->srcblock = 1024;
 54 | 	self->srcsize = 0;
 55 | 	self->source[0] = 0;
 56 | 	cloader_reset(self->loader);
 57 | 	cparser_reset(self->parser);
 58 | }
 59 | 
 60 | // release assembler
 61 | void casm_release(CAssembler *self)
 62 | {
 63 | 	assert(self);
 64 | 	if (self->parser) {
 65 | 		cparser_release(self->parser);
 66 | 		self->parser = NULL;
 67 | 	}
 68 | 	if (self->loader) {
 69 | 		cloader_release(self->loader);
 70 | 		self->loader = NULL;
 71 | 	}
 72 | 	if (self->source) {
 73 | 		free(self->source);
 74 | 		self->source = NULL;
 75 | 	}
 76 | 	if (self->line) {
 77 | 		free(self->line);
 78 | 		self->line = NULL;
 79 | 	}
 80 | 	if (self->error) {
 81 | 		free(self->error);
 82 | 		self->error = NULL;
 83 | 	}
 84 | 	self->srcblock = 0;
 85 | 	self->srcsize = 0;
 86 | 	free(self);
 87 | }
 88 | 
 89 | // add source to source buffer
 90 | int casm_source(CAssembler *self, const char *text)
 91 | {
 92 | 	int datasize = (int)strlen(text);
 93 | 	int newsize = datasize + self->srcsize;
 94 | 	int newblock = 1;
 95 | 	while (newblock < newsize) newblock <<= 1;
 96 | 	if (newblock != self->srcblock) {
 97 | 		char *buffer = (char*)malloc(newblock + 1);
 98 | 		assert(buffer);
 99 | 		memcpy(buffer, self->source, self->srcsize);
100 | 		buffer[self->srcsize] = 0;
101 | 		free(self->source);
102 | 		self->source = buffer;
103 | 		self->srcblock = newblock;
104 | 	}
105 | 	memcpy(self->source + self->srcsize, text, datasize);
106 | 	self->srcsize = newsize;
107 | 	self->source[newsize] = 0;
108 | 	return 0;
109 | }
110 | 
111 | // prompt error
112 | static void casm_error(CAssembler *self, const char *msg, int code)
113 | {
114 | 	sprintf(self->error, "line(%d): error(%d): %s", self->lineno, code, msg);
115 | 	self->errcode = code;
116 | }
117 | 
118 | // compile single line
119 | static int casm_compile_line(CAssembler *self, const char *line)
120 | {
121 | 	const CEncoding *encoding;
122 | 
123 | 	assert(self);
124 | 
125 | 	self->error[0] = 0;
126 | 	self->errcode = 0;
127 | 
128 | 	encoding = cparser_parse_line(self->parser, line);
129 | 
130 | 	if (encoding == NULL) {
131 | 		casm_error(self, self->parser->error, self->parser->errcode);
132 | 		return -1;
133 | 	}
134 | 
135 | 	cloader_new_encoding(self->loader, encoding);
136 | 
137 | 	return 0;
138 | }
139 | 
140 | // compile source buffer
141 | // if (code == NULL) returns compiled code size
142 | // if (code != NULL) and (maxsize >= codesize) compile and returns codesize
143 | // if (code != NULL) and (maxsize < codesize) returns error
144 | int casm_compile(CAssembler *self, unsigned char *code, long maxsize)
145 | {
146 | 	int lineno, p1, p2;
147 | 	const char *text;
148 | 	long codesize;
149 | 
150 | 	assert(self);
151 | 
152 | 	text = self->source;
153 | 
154 | 	cloader_reset(self->loader);
155 | 	cparser_reset(self->parser);
156 | 
157 | 	for (lineno = 1, p1 = 0; p1 < self->srcsize; ) {
158 | 		for (p2 = p1; text[p2] != 0 && text[p2] != '\n'; p2++);
159 | 		self->lineno = lineno++;
160 | 		if (p2 - p1 >= IMAX_LINESIZE) {
161 | 			casm_error(self, "line size too long", 1);
162 | 			return -1;
163 | 		}
164 | 
165 | 		memcpy(self->line, self->source + p1, p2 - p1);
166 | 		self->line[p2 - p1] = 0;
167 | 		p1 = p2 + 1;
168 | 
169 | 		if (casm_compile_line(self, self->line) != 0) {
170 | 			return -2;
171 | 		}
172 | 	}
173 | 
174 | 	codesize = cloader_get_codesize(self->loader) + 10;
175 | 
176 | 	if (code == NULL) 
177 | 		return codesize;
178 | 
179 | 	if (maxsize < codesize) {
180 | 		casm_error(self, "need a larger memory block to get code", 2);
181 | 		return -3;
182 | 	}
183 | 
184 | 	memset(code, 0xcc, codesize);
185 | 
186 | 	if (cloader_output(self->loader, code) != 0) {
187 | 		self->lineno = self->loader->errcode;
188 | 		casm_error(self, self->loader->error, 3);
189 | 		return -4;
190 | 	}
191 | 
192 | 	return codesize;
193 | }
194 | 
195 | 
196 | // get error
197 | const char *casm_geterror(const CAssembler *self, int *errcode)
198 | {
199 | 	if (errcode) *errcode = self->errcode;
200 | 	return self->error;
201 | }
202 | 
203 | 
204 | //---------------------------------------------------------------------
205 | // HIGH LEVEL
206 | //---------------------------------------------------------------------
207 | int casm_pushline(CAssembler *self, const char *fmt, ...)
208 | {
209 | 	char *buffer = self->error;
210 | 	va_list argptr;
211 | 
212 | 	va_start(argptr, fmt);
213 | 	vsprintf(buffer, fmt, argptr);
214 | 	va_end(argptr);
215 | 
216 | 	casm_source(self, buffer);
217 | 	casm_source(self, "\n");
218 | 
219 | 	self->error[0] = 0;
220 | 
221 | 	return 0;
222 | }
223 | 
224 | 
225 | void *casm_callable(CAssembler *self, long *codesize)
226 | {
227 | 	unsigned char *code;
228 | 	long size;
229 | 
230 | 	if (codesize) *codesize = 0;
231 | 
232 | 	size = casm_compile(self, NULL, 0);
233 | 
234 | 	if (size < 0) {
235 | 		return NULL;
236 | 	}
237 | 
238 | 	code = (unsigned char*)malloc(size + 1);
239 | 	assert(code);
240 | 
241 | 	if (casm_compile(self, code, size) < 0) {
242 | 		free(code);
243 | 		return NULL;
244 | 	}
245 | 
246 | 	if (codesize) *codesize = size;
247 | 
248 | 	return code;
249 | }
250 | 
251 | 
252 | // load assembly source file
253 | int casm_loadfile(CAssembler *self, const char *filename)
254 | {
255 | 	char line[80];
256 | 	FILE *fp;
257 | 	casm_reset(self);
258 | 	if ((fp = fopen(filename, "r")) == NULL) 
259 | 		return -1;
260 | 	while (!feof(fp)) {
261 | 		int size = (int)fread(line, 1, 60, fp);
262 | 		line[size] = 0;
263 | 		casm_source(self, line);
264 | 	}
265 | 	fclose(fp);
266 | 	return 0;
267 | }
268 | 
269 | int casm_savefile(CAssembler *self, const char *filename)
270 | {
271 | 	char *codedata, *p;
272 | 	long codesize;
273 | 	FILE *fp;
274 | 
275 | 	codedata = (char*)casm_callable(self, &codesize);
276 | 	if (codedata == NULL) return -1;
277 | 
278 | 	if ((fp = fopen(filename, "wb")) == NULL) {
279 | 		free(codedata);
280 | 		return -2;
281 | 	}
282 | 
283 | 	for (p = codedata; p < codedata + codesize; ) {
284 | 		int canwrite = codesize - (int)(p - codedata);
285 | 		int hr = (int)fwrite(p, 1, canwrite, fp);
286 | 		if (hr > 0) p += hr;
287 | 	}
288 | 
289 | 	fclose(fp);
290 | 
291 | 	free(codedata);
292 | 
293 | 	return 0;
294 | }
295 | 
296 | int casm_dumpinst(CAssembler *self, FILE *fp)
297 | {
298 | 	CLoader *loader = self->loader;
299 | 	int lineno, p1, p2, maxsize, pos;
300 | 	const char *text;
301 | 	char *codedata;
302 | 	iqueue_head *node;
303 | 
304 | 	text = self->source;
305 | 
306 | 	codedata = (char*)casm_callable(self, NULL);
307 | 	if (codedata == NULL) return -1;
308 | 	free(codedata);
309 | 
310 | 	node = loader->head.next;
311 | 
312 | 	for (maxsize = 0; node != &loader->head; node = node->next) {
313 | 		CLink *link = iqueue_entry(node, CLink, head);
314 | 		int length = cencoding_length(&link->encoding);
315 | 		if (length > maxsize) maxsize = length;
316 | 	}
317 | 
318 | 	node = loader->head.next;
319 | 	fp = (fp != NULL)? fp : stdout;
320 | 
321 | 	for (lineno = 1, p1 = 0, pos = 0; p1 < self->srcsize; lineno++) {
322 | 		for (p2 = p1; text[p2] != 0 && text[p2] != '\n'; p2++);
323 | 		if (p2 - p1 >= IMAX_LINESIZE) {
324 | 			casm_error(self, "line size too long", 1);
325 | 			return -1;
326 | 		}
327 | 
328 | 		memcpy(self->line, self->source + p1, p2 - p1);
329 | 		self->line[p2 - p1] = 0;
330 | 		p1 = p2 + 1;
331 | 
332 | 		while (node != &loader->head) {
333 | 			CLink *link = iqueue_entry(node, CLink, head);
334 | 			if (link->lineno >= lineno) break;
335 | 			node = node->next;
336 | 		}
337 | 
338 | 		if (node != &loader->head) {
339 | 			CLink *link = iqueue_entry(node, CLink, head);
340 | 			if (link->lineno == lineno) {
341 | 				static char output[4096];
342 | 				int length, size;
343 | 				length = cencoding_length(&link->encoding);
344 | 				if (link->encoding.align > 0) {
345 | 					int align, i, k;
346 | 					align = link->encoding.align;
347 | 					length = align - (pos % align);
348 | 					for (i = length, k = 0; i > 0; ) {
349 | 						if (i >= 2) {
350 | 							output[k++] = '6';
351 | 							output[k++] = '6';
352 | 							output[k++] = ' ';
353 | 							i--;
354 | 						}
355 | 						output[k++] = '9';
356 | 						output[k++] = '0';
357 | 						output[k++] = ' ';
358 | 						i--;
359 | 					}
360 | 					output[k++] = 0;
361 | 				}	else {
362 | 					cencoding_to_string(&link->encoding, output);
363 | 				}
364 | 				for (size = (int)strlen(output); size < (maxsize) * 3; )
365 | 					output[size++] = ' ';
366 | 				output[size] = 0;
367 | 				if (length == 0) fprintf(fp, "         ");
368 | 				else fprintf(fp, "%08X:", pos);
369 | 				pos += length;
370 | 				fprintf(fp, "  %s\t%s\n", output, self->line);
371 | 			}
372 | 		}
373 | 	}
374 | 
375 | 	return 0;
376 | }
377 | 
378 | 
379 | 


--------------------------------------------------------------------------------
/source/casmpure.h:
--------------------------------------------------------------------------------
 1 | //=====================================================================
 2 | //
 3 | // casmpure.h - assembly pure compiler
 4 | //
 5 | // NOTE:
 6 | // for more information, please see the readme file.
 7 | //
 8 | //=====================================================================
 9 | #ifndef __CASMPURE_H__
10 | #define __CASMPURE_H__
11 | 
12 | #include "cloader.h"
13 | #include "cparser.h"
14 | 
15 | 
16 | //---------------------------------------------------------------------
17 | // CAssembler Definition
18 | //---------------------------------------------------------------------
19 | struct CAssembler
20 | {
21 | 	CParser *parser;
22 | 	CLoader *loader;
23 | 	int srcblock;
24 | 	int srcsize;
25 | 	char *line;
26 | 	char *source;
27 | 	char *error;
28 | 	int errcode;
29 | 	int lineno;
30 | };
31 | 
32 | typedef struct CAssembler CAssembler; 
33 | 
34 | 
35 | 
36 | #ifdef __cplusplus
37 | extern "C" {
38 | #endif
39 | //---------------------------------------------------------------------
40 | // Interface
41 | //---------------------------------------------------------------------
42 | 
43 | // create assembler
44 | CAssembler *casm_create(void);
45 | 
46 | // delete assembler
47 | void casm_release(CAssembler *self);
48 | 
49 | // reset compiler state and clean source buffer
50 | void casm_reset(CAssembler *self);
51 | 
52 | // add source to assembler source buffer
53 | int casm_source(CAssembler *self, const char *text);
54 | 
55 | // compile source buffer
56 | // if (code == NULL) returns compiled code size
57 | // if (code != NULL) and (maxsize >= codesize) compile and returns codesize
58 | // if (code != NULL) and (maxsize < codesize) returns error
59 | int casm_compile(CAssembler *self, unsigned char *code, long maxsize);
60 | 
61 | // get error
62 | const char *casm_geterror(const CAssembler *self, int *errcode);
63 | 
64 | 
65 | // HIGH LEVEL interface:
66 | 
67 | // add a single line to assembly
68 | int casm_pushline(CAssembler *self, const char *fmt, ...);
69 | 
70 | // compile and write execode into a memory block
71 | // you can call free() when you need to dispose
72 | void *casm_callable(CAssembler *self, long *codesize);
73 | 
74 | 
75 | // load assembly source file (will reset source buffer)
76 | int casm_loadfile(CAssembler *self, const char *filename);
77 | 
78 | // save compiled code into file
79 | int casm_savefile(CAssembler *self, const char *filename);
80 | 
81 | // dump instructions and source line 
82 | int casm_dumpinst(CAssembler *self, FILE *fp);
83 | 
84 | 
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 | 
89 | #endif
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/source/cencoding.c:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // cencoding.c - x86 instruction encoding
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #include "cencoding.h"
 10 | 
 11 | #ifdef _MSC_VER
 12 | #pragma warning(disable: 4996)
 13 | #pragma warning(disable: 4311)
 14 | #endif
 15 | 
 16 | void cencoding_reset(CEncoding *self)
 17 | {
 18 | 	if (self->label) free(self->label);
 19 | 	self->label = NULL;
 20 | 	if (self->reference) free(self->reference);
 21 | 	self->reference = NULL;
 22 | 	if (self->data) free(self->data);
 23 | 	self->data = NULL;
 24 | 
 25 | 	self->format.P1 = 0;
 26 | 	self->format.P2 = 0;
 27 | 	self->format.P3 = 0;
 28 | 	self->format.P4 = 0;
 29 | 	self->format.REX = 0;
 30 | 	self->format.O3 = 0;
 31 | 	self->format.O2 = 0;
 32 | 	self->format.O1 = 0;
 33 | 	self->format.modRM = 0;
 34 | 	self->format.SIB = 0;
 35 | 	self->format.D1 = 0;
 36 | 	self->format.D2 = 0;
 37 | 	self->format.D3 = 0;
 38 | 	self->format.D4 = 0;
 39 | 	self->format.I1 = 0;
 40 | 	self->format.I2 = 0;
 41 | 	self->format.I3 = 0;
 42 | 	self->format.I4 = 0;
 43 | 
 44 | 	self->P1 = 0;
 45 | 	self->P2 = 0;
 46 | 	self->P3 = 0;
 47 | 	self->P4 = 0;
 48 | 	self->REX.b = 0;
 49 | 	self->O3 = 0;
 50 | 	self->O2 = 0;
 51 | 	self->O1 = 0;
 52 | 	self->modRM.b = 0;
 53 | 	self->SIB.b = 0;
 54 | 	self->D1 = 0;
 55 | 	self->D2 = 0;
 56 | 	self->D3 = 0;
 57 | 	self->D4 = 0;
 58 | 	self->I1 = 0;
 59 | 	self->I2 = 0;
 60 | 	self->I3 = 0;
 61 | 	self->I4 = 0;
 62 | 
 63 | 	self->immediate = 0;
 64 | 	self->displacement = 0;
 65 | 	self->message = (char*)"";
 66 | 	self->size = 0;
 67 | 	self->align = 0;
 68 | 	self->relative = 0;
 69 | }
 70 | 
 71 | void cencoding_init(CEncoding *self)
 72 | {
 73 | 	self->label = 0;
 74 | 	self->reference = 0;
 75 | 	self->data = 0;
 76 | 	self->size = 0;
 77 | 	cencoding_reset(self);
 78 | 	self->O1 = 0xCC;	// breakpoint
 79 | 	self->format.O1 = 1;	
 80 | }
 81 | 
 82 | void cencoding_destroy(CEncoding *self)
 83 | {
 84 | 	cencoding_reset(self);
 85 | }
 86 | 
 87 | const char *cencoding_get_label(const CEncoding *self)
 88 | {
 89 | 	return self->label;
 90 | }
 91 | 
 92 | const char *cencoding_get_reference(const CEncoding *self)
 93 | {
 94 | 	return self->reference;
 95 | }
 96 | 
 97 | int cencoding_length(const CEncoding *self)
 98 | {
 99 | 	int length = 0;
100 | 	if (self->data && self->size > 0) 
101 | 		return self->size;
102 | 	if (self->align > 0) 
103 | 		return self->align;
104 | 	if (self->format.P1)		length++;
105 | 	if (self->format.P2)		length++;
106 | 	if (self->format.P3)		length++;
107 | 	if (self->format.P4)		length++;
108 | 	if (self->format.REX)		length++;
109 | 	if (self->format.O3)		length++;
110 | 	if (self->format.O2)		length++;
111 | 	if (self->format.O1)		length++;
112 | 	if (self->format.modRM)		length++;
113 | 	if (self->format.SIB)		length++;
114 | 	if (self->format.D1)		length++;
115 | 	if (self->format.D2)		length++;
116 | 	if (self->format.D3)		length++;
117 | 	if (self->format.D4)		length++;
118 | 	if (self->format.I1)		length++;
119 | 	if (self->format.I2)		length++;
120 | 	if (self->format.I3)		length++;
121 | 	if (self->format.I4)		length++;
122 | 
123 | 	return length;
124 | }
125 | 
126 | int cencoding_new_copy(CEncoding *self, const CEncoding *src)
127 | {
128 | 	*self = *src;
129 | 	if (src->label) {
130 | 		long size = (long)strlen(src->label);
131 | 		self->label = (char*)malloc(size + 1);
132 | 		assert(self->label);
133 | 		memcpy(self->label, src->label, size + 1);
134 | 		self->label[size] = 0;
135 | 	}
136 | 	if (src->reference) {
137 | 		long size = (long)strlen(src->reference);
138 | 		self->reference = (char*)malloc(size + 1);
139 | 		assert(self->reference);
140 | 		memcpy(self->reference, src->reference, size + 1);
141 | 		self->reference[size] = 0;
142 | 	}
143 | 	if (src->data) {
144 | 		self->data = (char*)malloc(src->size + 1);
145 | 		assert(self->data);
146 | 		memcpy(self->data, src->data, src->size);
147 | 		self->size = src->size;
148 | 	}
149 | 	return 0;
150 | }
151 | 
152 | int cencoding_add_prefix(CEncoding *self, cbyte p)
153 | {
154 | 	if (!self->format.P1) {
155 | 		self->P1 = p;
156 | 		self->format.P1 = 1;
157 | 	}
158 | 	else if (!self->format.P2) {
159 | 		self->P2 = p;
160 | 		self->format.P2 = 1;
161 | 	}
162 | 	else if (!self->format.P3) {
163 | 		self->P3 = p;
164 | 		self->format.P3 = 1;
165 | 	}
166 | 	else if (!self->format.P4) {
167 | 		self->P4 = p;
168 | 		self->format.P4 = 1;
169 | 	}	else {
170 | 		return -1;
171 | 	}
172 | 	return 0;
173 | }
174 | 
175 | int cencoding_set_immediate(CEncoding *self, int immediate)
176 | {
177 | 	self->immediate = immediate;
178 | 	return 0;
179 | }
180 | 
181 | int cencoding_set_jump_offset(CEncoding *self, int offset)
182 | {
183 | 	if ((char)offset != offset && self->format.I2 == 0) {
184 | 		self->message = (char*)"Jump offset range too big";
185 | 		return -1;
186 | 	}
187 | 	self->immediate = offset;
188 | 	return 0;
189 | }
190 | 
191 | void cencoding_set_label(CEncoding *self, const char *label)
192 | {
193 | 	int size = (int)strlen(label);
194 | 	if (self->label) free(self->label);
195 | 	self->label = (char*)malloc(size + 1);
196 | 	assert(self->label);
197 | 	memcpy(self->label, label, size + 1);
198 | }
199 | 
200 | void cencoding_set_reference(CEncoding *self, const char *ref)
201 | {
202 | 	int size = (int)strlen(ref);
203 | 	if (self->reference) free(self->reference);
204 | 	self->reference = (char*)malloc(size + 1);
205 | 	assert(self->reference);
206 | 	memcpy(self->reference, ref, size + 1);
207 | }
208 | 
209 | void cencoding_set_data(CEncoding *self, const void *data, int size)
210 | {
211 | 	if (self->data) free(self->data);
212 | 	self->data = NULL;
213 | 	self->size = 0;
214 | 	if (data && size > 0) {
215 | 		self->data = (char*)malloc(size + 1);
216 | 		assert(self->data);
217 | 		self->size = size;
218 | 		memcpy(self->data, data, size);
219 | 	}
220 | }
221 | 
222 | int cencoding_check_format(const CEncoding *self)
223 | {
224 | 	// Bytes cannot be changed without updating format, 
225 | 	// except immediate and displacement
226 | 	if ((self->P1 && !self->format.P1) ||
227 | 	   (self->P2 && !self->format.P2) ||
228 | 	   (self->P3 && !self->format.P3) ||
229 | 	   (self->P4 && !self->format.P4) ||
230 | 	   (self->REX.b && !self->format.REX) ||
231 | 	   (self->O2 && !self->format.O2) ||
232 | 	   (self->O1 && !self->format.O1) ||
233 | 	   (self->modRM.b && !self->format.modRM) ||
234 | 	   (self->SIB.b && !self->format.SIB)) {
235 | 		return -1;   
236 | 	}
237 | 
238 | 	if ((self->format.P4 && !self->format.P3) ||
239 | 	   (self->format.P3 && !self->format.P2) ||
240 | 	   (self->format.P2 && !self->format.P1)) {
241 | 		return -2;
242 | 	}
243 | 
244 | 	if (self->format.O2 &&
245 | 	   (self->O2 != 0x0F &&
246 | 	    self->O2 != 0xD8 &&
247 | 		self->O2 != 0xD9 &&
248 | 		self->O2 != 0xDA &&
249 | 		self->O2 != 0xDB &&
250 | 		self->O2 != 0xDC &&
251 | 		self->O2 != 0xDD &&
252 | 		self->O2 != 0xDE &&
253 | 		self->O2 != 0xDF)) {
254 | 		return -3;
255 | 	}
256 | 
257 | 	if (self->format.SIB) {
258 | 		if(!self->format.modRM) {
259 | 			return -4;
260 | 		}
261 | 		if(self->modRM.r_m != E_ESP) {
262 | 			return -5;
263 | 		}
264 | 	}
265 | 
266 | 	// Byte, word or doubleword
267 | 	if ((self->format.D4 && !self->format.D3) ||
268 | 	   (self->format.D3 && !self->format.D4) ||
269 | 	   (self->format.D3 && !self->format.D2) ||
270 | 	   (self->format.D2 && !self->format.D1)) {
271 | 		return -6;
272 | 	}
273 | 
274 | 	// Byte, word or doubleword
275 | 	if ((self->format.I4 && !self->format.I3) ||
276 | 	   (self->format.I3 && !self->format.I4) ||
277 | 	   (self->format.I3 && !self->format.I2) ||
278 | 	   (self->format.I2 && !self->format.I1)) {
279 | 		return -7;
280 | 	}
281 | 
282 | 	return 0;
283 | }
284 | 
285 | int cencoding_write_code(const CEncoding *self, unsigned char *output)
286 | {
287 | 	unsigned char *start = output;
288 | 
289 | 	#define cencoding_output(b) { if (start) *output = (b); output++; } 
290 | 
291 | 	if (self->data && self->size > 0) {
292 | 		if (output) memcpy(output, self->data, self->size);
293 | 		return (int)self->size;
294 | 	}
295 | 
296 | 	if (self->align > 0) {
297 | 		unsigned long linear = (((unsigned long)output) & 0xfffffffful);
298 | 		int size = self->align - (linear % self->align);
299 | 		for (; size >= 2; size -= 2) {
300 | 			cencoding_output(0x66);
301 | 			cencoding_output(0x90);
302 | 		}
303 | 		for (; size > 0; size--) {
304 | 			cencoding_output(0x90);
305 | 		}
306 | 		return (int)(output - start);
307 | 	}
308 | 
309 | 	if (self->format.P1)		cencoding_output(self->P1);
310 | 	if (self->format.P2)		cencoding_output(self->P2);
311 | 	if (self->format.P3)		cencoding_output(self->P3);
312 | 	if (self->format.P4)		cencoding_output(self->P4);
313 | 	if (self->format.REX)		cencoding_output(self->REX.b);
314 | 	if (self->format.O3)		cencoding_output(self->O3);
315 | 	if (self->format.O2)		cencoding_output(self->O2);
316 | 	if (self->format.O1)		cencoding_output(self->O1);
317 | 	if (self->format.modRM)		cencoding_output(self->modRM.b);
318 | 	if (self->format.SIB)		cencoding_output(self->SIB.b);
319 | 	if (self->format.D1)		cencoding_output(self->D1);
320 | 	if (self->format.D2)		cencoding_output(self->D2);
321 | 	if (self->format.D3)		cencoding_output(self->D3);
322 | 	if (self->format.D4)		cencoding_output(self->D4);
323 | 	if (self->format.I1)		cencoding_output(self->I1);
324 | 	if (self->format.I2)		cencoding_output(self->I2);
325 | 	if (self->format.I3)		cencoding_output(self->I3);
326 | 	if (self->format.I4)		cencoding_output(self->I4);
327 | 
328 | 	#undef cencoding_output
329 | 
330 | 	return (int)(output - start);
331 | }
332 | 
333 | 
334 | void cencoding_to_string(const CEncoding *self, char *output)
335 | {
336 | 	const char *fmt = "0123456789ABCDEF";
337 | 	int hr = cencoding_check_format(self);
338 | 
339 | 	assert(hr == 0);
340 | 
341 | 	#define cencoding_format(data) { \
342 | 			if (output) { \
343 | 				unsigned char ch = (unsigned char)(data & 0xff); \
344 | 				*output++ = fmt[ch / 16]; \
345 | 				*output++ = fmt[ch % 16]; \
346 | 				*output++ = ' '; \
347 | 			}	\
348 | 		}
349 | 
350 | 	if (self->data) {
351 | 		long i;
352 | 		for (i = 0; i < self->size; i++) {
353 | 			unsigned int bb = (unsigned char)self->data[i];
354 | 			cencoding_format(bb);
355 | 		}
356 | 		*output++ = '\0';
357 | 		return;
358 | 	}
359 | 
360 | 	if (self->align > 0) {
361 | 		*output++ = '\0';
362 | 		return;
363 | 	}
364 | 
365 | 	if (self->format.P1)		cencoding_format(self->P1);
366 | 	if (self->format.P2)		cencoding_format(self->P2);
367 | 	if (self->format.P3)		cencoding_format(self->P3);
368 | 	if (self->format.P4)		cencoding_format(self->P4);
369 | 	if (self->format.REX)		cencoding_format(self->REX.b);
370 | 	if (self->format.O3)		cencoding_format(self->O3);
371 | 	if (self->format.O2)		cencoding_format(self->O2);
372 | 	if (self->format.O1)		cencoding_format(self->O1);
373 | 	if (self->format.modRM)		cencoding_format(self->modRM.b);
374 | 	if (self->format.SIB)		cencoding_format(self->SIB.b);
375 | 	if (self->format.D1)		cencoding_format(self->D1);
376 | 	if (self->format.D2)		cencoding_format(self->D2);
377 | 	if (self->format.D3)		cencoding_format(self->D3);
378 | 	if (self->format.D4)		cencoding_format(self->D4);
379 | 	if (self->format.I1)		cencoding_format(self->I1);
380 | 	if (self->format.I2)		cencoding_format(self->I2);
381 | 	if (self->format.I3)		cencoding_format(self->I3);
382 | 	if (self->format.I4)		cencoding_format(self->I4);
383 | 
384 | 	#undef cencoding_format
385 | 
386 | 	*output++ = '\0';
387 | }
388 | 
389 | 
390 | void cencoding_to_stdout(const CEncoding *self)
391 | {
392 | 	static char text[8192];
393 | 	cencoding_to_string(self, text);
394 | 	printf("%s\n", text);
395 | }
396 | 
397 | 


--------------------------------------------------------------------------------
/source/cencoding.h:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // cencoding.h - x86 instruction encoding
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | 
 10 | #ifndef __CENCODING_H__
 11 | #define __CENCODING_H__
 12 | 
 13 | #include <stdio.h>
 14 | #include <stdlib.h>
 15 | #include <string.h>
 16 | #include <ctype.h>
 17 | #include <assert.h>
 18 | 
 19 | //---------------------------------------------------------------------
 20 | // Platform Word Size Detect
 21 | //---------------------------------------------------------------------
 22 | #if (!defined(__CUINT32_DEFINED)) && (!defined(__CINT32_DEFINED))
 23 | #define __CUINT32_DEFINED
 24 | #define __CINT32_DEFINED
 25 | #if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) \
 26 | 	 || defined(__i386__) || defined(__i386) || defined(_M_X86)
 27 | 	typedef unsigned int cuint32;
 28 | 	typedef int cint32;
 29 | #elif defined(__MACOS__)
 30 | 	typedef UInt32 cuint32;
 31 | 	typedef Int32 cint32;
 32 | #elif defined(__APPLE__) && defined(__MACH__)
 33 | 	#include <sys/types.h>
 34 | 	typedef u_int32_t cuint32;
 35 | 	typedef int32_t cint32;
 36 | #elif defined(__BEOS__)
 37 | 	#include <sys/inttypes.h>
 38 | 	typedef u_int32_t cuint32;
 39 | 	typedef int32_t cint32;
 40 | #elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || \
 41 | 	defined(__amd64) || defined(_M_IA64) || defined(_M_AMD64)
 42 | 	typedef unsigned int cuint32;
 43 | 	typedef int cint32;
 44 | #elif defined(_MSC_VER) || defined(__BORLANDC__)
 45 | 	typedef unsigned __int32 cuint32;
 46 | 	typedef __int32 cint32;
 47 | #elif defined(__GNUC__)
 48 | 	#include <stdint.h>
 49 | 	typedef uint32_t cuint32;
 50 | 	typedef int32_t cint32;
 51 | #else 
 52 | 	typedef unsigned long cuint32;     
 53 | 	typedef long cint32;
 54 | #endif
 55 | #endif
 56 | 
 57 | #ifndef __CINT8_DEFINED
 58 | #define __CINT8_DEFINED
 59 | typedef char cint8;
 60 | #endif
 61 | 
 62 | #ifndef __CUINT8_DEFINED
 63 | #define __CUINT8_DEFINED
 64 | typedef unsigned char cuint8;
 65 | #endif
 66 | 
 67 | #ifndef __CUINT16_DEFINED
 68 | #define __CUINT16_DEFINED
 69 | typedef unsigned short cuint16;
 70 | #endif
 71 | 
 72 | #ifndef __CINT16_DEFINED
 73 | #define __CINT16_DEFINED
 74 | typedef short CINT16;
 75 | #endif
 76 | 
 77 | #ifndef __CINT64_DEFINED
 78 | #define __CINT64_DEFINED
 79 | #if defined(_MSC_VER) || defined(__BORLANDC__)
 80 | typedef __int64 cint64;
 81 | #else
 82 | typedef long long cint64;
 83 | #endif
 84 | #endif
 85 | 
 86 | #ifndef __CUINT64_DEFINED
 87 | #define __CUINT64_DEFINED
 88 | #if defined(_MSC_VER) || defined(__BORLANDC__)
 89 | typedef unsigned __int64 cuint64;
 90 | #else
 91 | typedef unsigned long long cuint64;
 92 | #endif
 93 | #endif
 94 | 
 95 | #ifndef INLINE
 96 | #ifdef __GNUC__
 97 | 
 98 | #if __GNUC_MINOE__ >= 1  && __GNUC_MINOE__ < 4
 99 | #define INLINE         __inline__ __attribute__((always_inline))
100 | #else
101 | #define INLINE         __inline__
102 | #endif
103 | 
104 | #elif (defined(_MSC_VER) || defined(__BORLANDC__) || defined(__WATCOMC__))
105 | #define INLINE __inline
106 | #else
107 | #define INLINE 
108 | #endif
109 | #endif
110 | 
111 | #ifndef inline
112 | #define inline INLINE
113 | #endif
114 | 
115 | typedef cuint8 cbyte;
116 | 
117 | 
118 | //---------------------------------------------------------------------
119 | // CReg
120 | //---------------------------------------------------------------------
121 | enum CRegID
122 | {
123 | 	REG_UNKNOWN = -1,
124 | 	E_AL = 0, E_AX = 0, E_EAX = 0, E_ST0 = 0, E_MM0 = 0, E_XMM0 = 0,
125 | 	E_CL = 1, E_CX = 1, E_ECX = 1, E_ST1 = 1, E_MM1 = 1, E_XMM1 = 1,
126 | 	E_DL = 2, E_DX = 2, E_EDX = 2, E_ST2 = 2, E_MM2 = 2, E_XMM2 = 2,
127 | 	E_BL = 3, E_BX = 3, E_EBX = 3, E_ST3 = 3, E_MM3 = 3, E_XMM3 = 3,
128 | 	E_AH = 4, E_SP = 4, E_ESP = 4, E_ST4 = 4, E_MM4 = 4, E_XMM4 = 4,
129 | 	E_CH = 5, E_BP = 5, E_EBP = 5, E_ST5 = 5, E_MM5 = 5, E_XMM5 = 5,
130 | 	E_DH = 6, E_SI = 6, E_ESI = 6, E_ST6 = 6, E_MM6 = 6, E_XMM6 = 6,
131 | 	E_BH = 7, E_DI = 7, E_EDI = 7, E_ST7 = 7, E_MM7 = 7, E_XMM7 = 7,
132 | 	E_R0 = 0, E_R1 = 1, E_R2 = 2, E_R3 = 3, E_R4 = 4, E_R5 = 5,
133 | 	E_R6 = 6, E_R7 = 7, E_R8 = 8, E_R9 = 9, E_R10 = 10, E_R11 = 11,
134 | 	E_R12 = 12, E_R13 = 13, E_R14 = 14, E_R15 = 15
135 | };
136 | 
137 | enum CSMod
138 | {
139 | 	MOD_NO_DISP = 0,
140 | 	MOD_BYTE_DISP = 1,
141 | 	MOD_DWORD_DISP = 2,
142 | 	MOD_REG = 3
143 | };
144 | 
145 | enum CScale
146 | {
147 | 	SCALE_UNKNOWN = 0,
148 | 	SCALE_1 = 0,
149 | 	SCALE_2 = 1,
150 | 	SCALE_4 = 2,
151 | 	SCALE_8 = 3
152 | };
153 | 
154 | 
155 | //---------------------------------------------------------------------
156 | // CEncoding 
157 | //---------------------------------------------------------------------
158 | struct CEncoding
159 | {
160 | 	char *label;
161 | 	char *reference;
162 | 	char *message;
163 | 	char *data;
164 | 	int size;
165 | 	int align;
166 | 	int relative;
167 | 
168 | 	struct {
169 | 		unsigned char P1 : 1;
170 | 		unsigned char P2 : 1;
171 | 		unsigned char P3 : 1;
172 | 		unsigned char P4 : 1;
173 | 		unsigned char REX : 1;
174 | 		unsigned char O3 : 1;
175 | 		unsigned char O2 : 1;
176 | 		unsigned char O1 : 1;
177 | 		unsigned char modRM : 1;
178 | 		unsigned char SIB : 1;
179 | 		unsigned char D1 : 1;
180 | 		unsigned char D2 : 1;
181 | 		unsigned char D3 : 1;
182 | 		unsigned char D4 : 1;
183 | 		unsigned char I1 : 1;
184 | 		unsigned char I2 : 1;
185 | 		unsigned char I3 : 1;
186 | 		unsigned char I4 : 1;		
187 | 	}	format;
188 | 
189 | 	unsigned char P1;   // Prefixes
190 | 	unsigned char P2;
191 | 	unsigned char P3;
192 | 	unsigned char P4;
193 | 
194 | 	struct {
195 | 		union {
196 | 			struct 	{
197 | 				unsigned char B : 1;
198 | 				unsigned char X : 1;
199 | 				unsigned char R : 1;
200 | 				unsigned char W : 1;
201 | 				unsigned char prefix : 4;
202 | 			};
203 | 			unsigned char b;
204 | 		};
205 | 	}	REX;
206 | 
207 | 	unsigned char O1;   // Opcode
208 | 	unsigned char O2;
209 | 	unsigned char O3;
210 | 
211 | 	struct {
212 | 		union {
213 | 			struct {
214 | 				unsigned char r_m : 3;
215 | 				unsigned char reg : 3;
216 | 				unsigned char mod : 2;
217 | 			};
218 | 			unsigned char b;
219 | 		};
220 | 	}	modRM;
221 | 
222 | 	struct {
223 | 		union {
224 | 			struct {
225 | 				unsigned char base : 3;
226 | 				unsigned char index : 3;
227 | 				unsigned char scale : 2;
228 | 			};
229 | 			unsigned char b;
230 | 		};
231 | 	}	SIB;
232 | 
233 | 	union {
234 | 		cint32 displacement;
235 | 		struct {
236 | 			unsigned char D1;
237 | 			unsigned char D2;
238 | 			unsigned char D3;
239 | 			unsigned char D4;
240 | 		};
241 | 	};
242 | 
243 | 	union {
244 | 		cint32 immediate;
245 | 		struct {
246 | 			unsigned char I1;
247 | 			unsigned char I2;
248 | 			unsigned char I3;
249 | 			unsigned char I4;
250 | 		};
251 | 	};
252 | };
253 | 
254 | typedef struct CEncoding CEncoding;
255 | 
256 | #ifdef __cplusplus
257 | extern "C" {
258 | #endif
259 | 
260 | 
261 | //---------------------------------------------------------------------
262 | // CEncoding 
263 | //---------------------------------------------------------------------
264 | void cencoding_init(CEncoding *self);
265 | void cencoding_reset(CEncoding *self);
266 | void cencoding_destroy(CEncoding *self);
267 | 
268 | const char *cencoding_get_label(const CEncoding *self);
269 | const char *cencoding_get_reference(const CEncoding *self);
270 | 
271 | int cencoding_length(const CEncoding *self);
272 | int cencoding_new_copy(CEncoding *self, const CEncoding *src);
273 | 
274 | int cencoding_add_prefix(CEncoding *self, unsigned char prefix);
275 | int cencoding_set_immediate(CEncoding *self, int immediate);
276 | int cencoding_set_jump_offset(CEncoding *self, int offset);
277 | void cencoding_set_label(CEncoding *self, const char *label);
278 | void cencoding_set_reference(CEncoding *self, const char *ref);
279 | 
280 | void cencoding_set_data(CEncoding *self, const void *data, int size);
281 | 
282 | int cencoding_check_format(const CEncoding *self);
283 | int cencoding_write_code(const CEncoding *self, unsigned char *output);
284 | 
285 | void cencoding_to_string(const CEncoding *self, char *output);
286 | void cencoding_to_stdout(const CEncoding *self);
287 | 
288 | 
289 | #ifdef __cplusplus
290 | }
291 | #endif
292 | 
293 | #endif
294 | 
295 | 
296 | 


--------------------------------------------------------------------------------
/source/cinstruct.c:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // cinstruct.c - 
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #include "cinstruct.h"
 10 | 
 11 | #ifdef _MSC_VER
 12 | #pragma warning(disable: 4996)
 13 | #endif
 14 | 
 15 | static void cinst_extract_operands(CInstruction *self, const char *syntax);
 16 | 
 17 | CInstruction *cinst_create(const CInstSyntax *syntax)
 18 | {
 19 | 	CInstruction *self;
 20 | 	self = (CInstruction*)malloc(sizeof(CInstruction));
 21 | 	assert(self);
 22 | 	self->syntax = syntax;
 23 | 	cinst_extract_operands(self, syntax->operands);
 24 | 	self->syntaxMnemonic = 0;
 25 | 	self->syntaxSpecifier = 0;
 26 | 	self->syntaxFirstOperand = 0;
 27 | 	self->syntaxSecondOperand = 0;
 28 | 	self->syntaxThirdOperand = 0;
 29 | 	self->flags = syntax->flags;
 30 | 	self->next = NULL;
 31 | 	return self;
 32 | }
 33 | 
 34 | void cinst_release(CInstruction *self)
 35 | {
 36 | 	if (self->next) cinst_release(self->next);
 37 | 	self->next = NULL;
 38 | 	memset(self, 0, sizeof(CInstruction));
 39 | 	free(self);
 40 | }
 41 | 
 42 | static void cinst_extract_operands(CInstruction *self, const char *syntax)
 43 | {
 44 | 	char *token;
 45 | 	char *string;
 46 | 	char *sep;
 47 | 
 48 | 	assert(syntax && self);
 49 | 
 50 | 	self->specifier = CS_UNKNOWN;
 51 | 	self->firstOperand = O_VOID;
 52 | 	self->secondOperand = O_VOID;
 53 | 	self->thirdOperand = O_VOID;
 54 | 
 55 | 	string = strdup(syntax);
 56 | 	cstring_strip(string);
 57 | 
 58 | 	sep = string;
 59 | 	token = cstring_strsep(&sep, " ,");
 60 | 
 61 | 	if (token == NULL) return;
 62 | 
 63 | 	cstring_strip(token);
 64 | 	self->specifier = cspecifier_scan(token);
 65 | 
 66 | 	if (self->specifier != CS_UNKNOWN) {
 67 | 		token = cstring_strsep(&sep, " ,");
 68 | 		if (token == 0) {
 69 | 			free(string);
 70 | 			return;
 71 | 		}
 72 | 	}
 73 | 
 74 | 	cstring_strip(token);
 75 | 	self->firstOperand = coperand_scan_syntax(token);
 76 | 
 77 | 	if (self->firstOperand != O_UNKNOWN) {
 78 | 		token = cstring_strsep(&sep, " ,");
 79 | 		if (token == 0) {
 80 | 			free(string);
 81 | 			return;
 82 | 		}
 83 | 	}
 84 | 
 85 | 	cstring_strip(token);
 86 | 	self->secondOperand = coperand_scan_syntax(token);
 87 | 
 88 | 	if (self->secondOperand != O_UNKNOWN) {
 89 | 		token = cstring_strsep(&sep, " ,");
 90 | 		if (token == 0) {
 91 | 			free(string);
 92 | 			return;
 93 | 		}
 94 | 	}
 95 | 
 96 | 	cstring_strip(token);
 97 | 	self->thirdOperand = coperand_scan_syntax(token);
 98 | 
 99 | 	if (self->thirdOperand != O_UNKNOWN) {
100 | 		token = cstring_strsep(&sep, " ,");
101 | 		if (token == 0) {
102 | 			free(string);
103 | 			return;
104 | 		}
105 | 	}
106 | 
107 | 	if (token != 0) {
108 | 		fprintf(stderr, "casm: Invalid operand encoding '%s'\n", syntax);
109 | 		fflush(stderr);
110 | 		assert(0);
111 | 		return;
112 | 	}
113 | 
114 | 	free(string);
115 | }
116 | 
117 | CInstruction *cinst_get_next(CInstruction *self)
118 | {
119 | 	assert(self);
120 | 	return self->next;
121 | }
122 | 
123 | void cinst_attach_new(CInstruction *self, const CInstSyntax *instruction)
124 | {
125 | 	if (!self->next) {
126 | 		self->next = cinst_create(instruction);
127 | 	}	else {
128 | 		cinst_attach_new(self->next, instruction);
129 | 	}
130 | }
131 | 		
132 | void cinst_reset_match(CInstruction *self)
133 | {
134 | 	self->syntaxMnemonic = 0;
135 | 	self->syntaxSpecifier = 0;
136 | 	self->syntaxFirstOperand = 0;
137 | 	self->syntaxSecondOperand = 0;
138 | 	self->syntaxThirdOperand = 0;
139 | 
140 | 	if (self->next) {
141 | 		cinst_reset_match(self->next);
142 | 	}
143 | }
144 | 
145 | int cinst_match_syntax(CInstruction *self)
146 | {
147 | 	return  self->syntaxMnemonic != 0 &&
148 | 			self->syntaxSpecifier != 0 &&
149 | 			self->syntaxFirstOperand != 0 &&
150 | 			self->syntaxSecondOperand != 0 &&
151 | 			self->syntaxThirdOperand != 0;	
152 | }
153 | 
154 | void cinst_match_mnemonic(CInstruction *self, const char *mnemonic)
155 | {
156 | 	if (stricmp(self->syntax->mnemonic, mnemonic) == 0) {
157 | 		self->syntaxMnemonic = 1;
158 | 	}
159 | 	if (self->next) {
160 | 		cinst_match_mnemonic(self->next, mnemonic);
161 | 	}
162 | }
163 | 
164 | void cinst_match_specifier(CInstruction *self, enum CSpecifierType specifier)
165 | {
166 | 	if (self->specifier == CS_UNKNOWN) 
167 | 	{
168 | 		if (self->specifier != CS_UNKNOWN) {
169 | 			if (self->firstOperand == O_R_M8 || 
170 | 				self->secondOperand == O_R_M8) {
171 | 				self->syntaxSpecifier = self->specifier == CS_BYTE;
172 | 			}
173 | 			else if (self->firstOperand == O_R_M16 || 
174 | 					self->secondOperand == O_R_M16) {
175 | 				self->syntaxSpecifier = self->specifier == CS_WORD;
176 | 			}
177 | 			else if (self->firstOperand == O_R_M32 || 
178 | 					self->secondOperand == O_R_M32) {
179 | 				self->syntaxSpecifier = self->specifier == CS_DWORD;
180 | 			}
181 | 			else if (self->firstOperand == O_R_M64 || 
182 | 					self->secondOperand == O_R_M64) {
183 | 				self->syntaxSpecifier = 
184 | 					(self->specifier == CS_QWORD || 
185 | 					self->specifier == CS_MMWORD);
186 | 			}
187 | 			else if (self->firstOperand == O_R_M128 || 
188 | 					self->secondOperand == O_R_M128) {
189 | 				self->syntaxSpecifier = self->specifier == CS_XMMWORD;
190 | 			}	
191 | 			else {
192 | 				self->syntaxSpecifier = 1;
193 | 			}
194 | 		}	else {
195 | 			self->syntaxSpecifier = 1;
196 | 		}
197 | 	}
198 | 	else if (self->specifier != CS_UNKNOWN)   // Explicit specifier
199 | 	{
200 | 		if (self->specifier == specifier) {
201 | 			self->syntaxSpecifier = 1;
202 | 		}
203 | 		else if (specifier == CS_UNKNOWN) {  
204 | 			self->syntaxSpecifier = 1;	// Specifiers are optional
205 | 		}
206 | 		else {
207 | 			self->syntaxSpecifier = 0;
208 | 		}
209 | 	}
210 | 
211 | 	if (self->next) {
212 | 		cinst_match_specifier(self->next, specifier);
213 | 	}
214 | }
215 | 
216 | void cinst_match_first_operand(CInstruction *self, const COperand *operand)
217 | {
218 | 	if (coperand_is_subtype_of(operand, self->firstOperand)) {
219 | 		self->syntaxFirstOperand = 1;
220 | 	}
221 | 	else if (operand->type == O_MEM && self->firstOperand & O_MEM) {
222 | 		if(self->syntaxSpecifier) {  // Explicit size specfier
223 | 			self->syntaxFirstOperand = 1;
224 | 		}
225 | 		else if(self->secondOperand != O_UNKNOWN) { //Implicit size specifier
226 | 			self->syntaxFirstOperand = 1;
227 | 		}
228 | 	}
229 | 
230 | 	if (self->next) {
231 | 		cinst_match_first_operand(self->next, operand);
232 | 	}
233 | }
234 | 
235 | void cinst_match_second_operand(CInstruction *self, const COperand *operand)
236 | {
237 | 	if (coperand_is_subtype_of(operand, self->secondOperand)) {
238 | 		self->syntaxSecondOperand = 1;
239 | 	}
240 | 	else if (operand->type == O_MEM && self->secondOperand & O_MEM) {
241 | 		if (self->syntaxSpecifier) {  // Explicit size specfier
242 | 			self->syntaxSecondOperand = 1;
243 | 		}
244 | 		else if (self->firstOperand != O_UNKNOWN) {
245 | 			self->syntaxSecondOperand = 1;
246 | 		}
247 | 	}
248 | 	if (self->next) {
249 | 		cinst_match_second_operand(self->next, operand);
250 | 	}
251 | }
252 | 
253 | void cinst_match_third_operand(CInstruction *self, const COperand *operand)
254 | {
255 | 	if (coperand_is_subtype_of(operand, self->thirdOperand)) {
256 | 		self->syntaxThirdOperand = 1;
257 | 	}
258 | 	if (self->next) {
259 | 		cinst_match_third_operand(self->next, operand);
260 | 	}
261 | }
262 | 
263 | enum COperandType cinst_getFirstOperand(CInstruction *self)
264 | {
265 | 	return self->firstOperand;
266 | }
267 | 
268 | enum COperandType cinst_getSecondOperand(CInstruction *self)
269 | {
270 | 	return self->secondOperand;
271 | }
272 | 
273 | enum COperandType cinst_getThirdOperand(CInstruction *self)
274 | {
275 | 	return self->thirdOperand;
276 | }
277 | 
278 | const char *cinst_getMnemonic(CInstruction *self)
279 | {
280 | 	return self->syntax->mnemonic;
281 | }
282 | 
283 | const char *cinst_getOperandSyntax(CInstruction *self)
284 | {
285 | 	return self->syntax->operands;
286 | }
287 | 
288 | const char *cinst_getEncoding(CInstruction *self)
289 | {
290 | 	return self->syntax->encoding;
291 | }
292 | 		
293 | int cinst_is_32bit(CInstruction *self)
294 | {
295 | 	return (self->flags & CT_CPU_386) == CT_CPU_386;
296 | }
297 | 
298 | 
299 | 
300 | 


--------------------------------------------------------------------------------
/source/cinstruct.h:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // cinstruct.h - 
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #ifndef __CINSTRUCT_H__
 10 | #define __CINSTRUCT_H__
 11 | 
 12 | #include "ckeywords.h"
 13 | 
 14 | //---------------------------------------------------------------------
 15 | // CInstructionType
 16 | //---------------------------------------------------------------------
 17 | enum CInstructionType
 18 | {
 19 | 	CT_CPU_UNKNOWN	= 0x00000000,
 20 | 
 21 | 	CT_CPU_8086		= 0x00000001,
 22 | 	CT_CPU_186		= 0x00000002,
 23 | 	CT_CPU_286		= 0x00000004,
 24 | 	CT_CPU_386		= 0x00000008,
 25 | 	CT_CPU_486		= 0x00000010,
 26 | 	CT_CPU_PENT		= 0x00000020,   // Pentium
 27 | 	CT_CPU_P6		= 0x00000040,   // Pentium Pro
 28 | 
 29 | 	CT_CPU_FPU		= 0x00000080,
 30 | 	CT_CPU_MMX		= 0x00000100,
 31 | 	CT_CPU_KATMAI	= 0x00000200,
 32 | 	CT_CPU_SSE		= 0x00000400,
 33 | 
 34 | //	CT_CPU_AMD		= 0x00000800,   // AMD specific system calls
 35 | 	CT_CPU_CYRIX	= 0x00001000,
 36 | 	CT_CPU_3DNOW	= 0x00002000,
 37 | 	CT_CPU_ATHLON	= 0x00004000,
 38 | //	CT_CPU_SMM		= 0x00008000,   // System Management Mode, standby mode
 39 | 
 40 | 	CT_CPU_P7		= 0x00010000 | CT_CPU_SSE,
 41 | 	CT_CPU_WILLAMETTE = CT_CPU_P7,
 42 | 	CT_CPU_SSE2		= 0x00020000 | CT_CPU_WILLAMETTE,
 43 | 	CT_CPU_PNI		= 0x00040000,
 44 | 	CT_CPU_SSE3		= 0x00080000,
 45 | 
 46 | // Undocumented, also not supported by inline assembler
 47 | //	CT_CPU_UNDOC	= 0x00010000,  
 48 | // Priviledged, run-time compiled OS kernel anyone?
 49 | //	CT_CPU_PRIV	= 0x00020000    
 50 | };
 51 | 
 52 | 
 53 | //---------------------------------------------------------------------
 54 | // CInstructionSyntax
 55 | //---------------------------------------------------------------------
 56 | struct CInstSyntax
 57 | {
 58 | 	const char *mnemonic;
 59 | 	const char *operands;
 60 | 	const char *encoding;
 61 | 	int flags;
 62 | };
 63 | 
 64 | typedef struct CInstSyntax CInstSyntax;
 65 | 
 66 | 
 67 | //---------------------------------------------------------------------
 68 | // CInstruction
 69 | //---------------------------------------------------------------------
 70 | struct CInstruction
 71 | {
 72 | 	int syntaxMnemonic : 1;
 73 | 	int syntaxSpecifier : 1;
 74 | 	int syntaxFirstOperand : 1;
 75 | 	int syntaxSecondOperand : 1;
 76 | 	int syntaxThirdOperand : 1;
 77 | 
 78 | 	const struct CInstSyntax *syntax;
 79 | 	enum CSpecifierType specifier;
 80 | 	enum COperandType firstOperand;
 81 | 	enum COperandType secondOperand;
 82 | 	enum COperandType thirdOperand;
 83 | 	int flags;
 84 | 
 85 | 	struct CInstruction *next;
 86 | };
 87 | 
 88 | typedef struct CInstruction CInstruction;
 89 | 
 90 | 
 91 | #ifdef __cplusplus
 92 | extern "C" {
 93 | #endif
 94 | 
 95 | 
 96 | CInstruction *cinst_create(const CInstSyntax *syntax);
 97 | void cinst_release(CInstruction *self);
 98 | 
 99 | CInstruction *cinst_get_next(CInstruction *self);
100 | 
101 | void cinst_attach_new(CInstruction *self, const CInstSyntax *instruction);
102 | 		
103 | void cinst_reset_match(CInstruction *self);
104 | int cinst_match_syntax(CInstruction *self);
105 | void cinst_match_mnemonic(CInstruction *self, const char *mnemonic);
106 | void cinst_match_specifier(CInstruction *self, enum CSpecifierType sizeSpec);
107 | void cinst_match_first_operand(CInstruction *self, const COperand *operand);
108 | void cinst_match_second_operand(CInstruction *self, const COperand *operand);
109 | void cinst_match_third_operand(CInstruction *self, const COperand *operand);
110 | 
111 | enum COperandType cinst_getFirstOperand(CInstruction *self);
112 | enum COperandType cinst_getSecondOperand(CInstruction *self);
113 | enum COperandType cinst_getThirdOperand(CInstruction *self);
114 | 
115 | const char *cinst_getMnemonic(CInstruction *self);
116 | const char *cinst_getOperandSyntax(CInstruction *self);
117 | const char *cinst_getEncoding(CInstruction *self);
118 | 		
119 | int cinst_is_32bit(CInstruction *self);
120 | 
121 | 
122 | 
123 | #ifdef __cplusplus
124 | }
125 | #endif
126 | 
127 | #endif
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/source/cinstset.h:
--------------------------------------------------------------------------------
 1 | //=====================================================================
 2 | //
 3 | // cinstset.h - 
 4 | //
 5 | // NOTE:
 6 | // for more information, please see the readme file.
 7 | //
 8 | //=====================================================================
 9 | #ifndef __CINSTSET_H__
10 | #define __CINSTSET_H__
11 | 
12 | #include "cinstruct.h"
13 | 
14 | 
15 | struct CInstructionEntry
16 | {
17 | 	const char *mnemonic;
18 | 	CInstruction *instruction;
19 | };
20 | 
21 | typedef struct CInstructionEntry CInstructionEntry;
22 | 
23 | struct CInstructionSet
24 | {
25 | 	CInstructionEntry *instructionMap;
26 | };
27 | 
28 | typedef struct CInstructionSet CInstructionSet;
29 | 
30 | 
31 | #ifdef __cplusplus
32 | extern "C" {
33 | #endif
34 | 
35 | extern CInstSyntax cinstruction_set[];
36 | 
37 | int cinstset_num_instructions(void);
38 | int cinstset_num_mnemonics(void);
39 | 
40 | CInstructionSet *cinstset_create(void);
41 | void cinstset_release(CInstructionSet *self);
42 | 
43 | CInstruction *cinstset_query(const CInstructionSet *self, const char *name);
44 | 
45 | 
46 | #ifdef __cplusplus
47 | }
48 | #endif
49 | 
50 | #endif
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/source/ckeywords.c:
--------------------------------------------------------------------------------
  1 | #include "ckeywords.h"
  2 | 
  3 | 
  4 | const CSpecifier cspecifier_set[] =
  5 | {
  6 | 	{CS_UNKNOWN,	""},
  7 | 	{CS_NEAR,		"NEAR"},
  8 | 	{CS_SHORT,		"SHORT"},
  9 | //	{FAR,			"FAR"},
 10 | 	{CS_BYTE,		"BYTE"},
 11 | 	{CS_WORD,		"WORD"},
 12 | 	{CS_DWORD,		"DWORD"},
 13 | 	{CS_QWORD,		"QWORD"},
 14 | 	{CS_MMWORD,		"MMWORD"},
 15 | 	{CS_XMMWORD,	"XMMWORD"},
 16 | };
 17 | 
 18 | enum CSpecifierType cspecifier_scan(const char *string)
 19 | {
 20 | 	if (string) {
 21 | 		int i;
 22 | 		for(i = 0; i < sizeof(cspecifier_set) / sizeof(CSpecifier); i++) {
 23 | 			if(cstring_strcmp(string, cspecifier_set[i].notation, 1) == 0) {
 24 | 				return cspecifier_set[i].type;
 25 | 			}		
 26 | 		}
 27 | 	}
 28 | 	return CS_UNKNOWN;
 29 | }
 30 | 
 31 | 
 32 | int coperand_is_subtype_of(const COperand *self, enum COperandType baseType)
 33 | {
 34 | 	return (self->type & baseType) == self->type;
 35 | }
 36 | 
 37 | int coperand_type_is_void(enum COperandType type)
 38 | {
 39 | 	return type == O_VOID;
 40 | }
 41 | 
 42 | int coperand_type_is_imm(enum COperandType type)
 43 | {
 44 | 	return (type & O_IMM) == type;
 45 | }
 46 | 
 47 | int coperand_type_is_reg(enum COperandType type)
 48 | {
 49 | 	return (type & O_REG) == type;
 50 | }
 51 | 
 52 | int coperand_type_is_mem(enum COperandType type)
 53 | {
 54 | 	return (type & O_MEM) == type;
 55 | }
 56 | 
 57 | int coperand_type_is_R_M(enum COperandType type)
 58 | {
 59 | 	return (type & O_R_M) == type;
 60 | }
 61 | 
 62 | int coperand_is_void(const COperand *operand)
 63 | {
 64 | 	return coperand_type_is_void(operand->type);
 65 | }
 66 | 
 67 | int coperand_is_imm(const COperand *operand)
 68 | {
 69 | 	return coperand_type_is_imm(operand->type);
 70 | }
 71 | 
 72 | int coperand_is_reg(const COperand *operand)
 73 | {
 74 | 	return coperand_type_is_reg(operand->type);
 75 | }
 76 | 
 77 | int coperand_is_mem(const COperand *operand)
 78 | {
 79 | 	return coperand_type_is_mem(operand->type);
 80 | }
 81 | 
 82 | int coperand_is_R_M(const COperand *operand)
 83 | {
 84 | 	return coperand_type_is_R_M(operand->type);
 85 | }
 86 | 
 87 | const COperand cregister_set[] = 
 88 | {
 89 | 	{O_VOID,		""},
 90 | 
 91 | 	{O_AL,		"AL", { 0 } },
 92 | 	{O_CL,		"CL", { 1 } },
 93 | 	{O_REG8,	"DL", { 2 } },
 94 | 	{O_REG8,	"BL", { 3 } },
 95 | 	{O_REG8,	"AH", { 4 } },
 96 | 	{O_REG8,	"CH", { 5 } },
 97 | 	{O_REG8,	"DH", { 6 } },
 98 | 	{O_REG8,	"BH", { 7 } },
 99 | 
100 | 	{O_AX,		"AX", { 0 } },
101 | 	{O_CX,		"CX", { 1 } },
102 | 	{O_DX,		"DX", { 2 } },
103 | 	{O_REG16,	"BX", { 3 } },
104 | 	{O_REG16,	"SP", { 4 } },
105 | 	{O_REG16,	"BP", { 5 } },
106 | 	{O_REG16,	"SI", { 6 } },
107 | 	{O_REG16,	"DI", { 7 } },
108 | 
109 | 	{O_EAX,		"EAX", { 0 } },
110 | 	{O_ECX,		"ECX", { 1 } },
111 | 	{O_REG32,	"EDX", { 2 } },
112 | 	{O_REG32,	"EBX", { 3 } },
113 | 	{O_REG32,	"ESP", { 4 } },
114 | 	{O_REG32,	"EBP", { 5 } },
115 | 	{O_REG32,	"ESI", { 6 } },
116 | 	{O_REG32,	"EDI", { 7 } },
117 | 
118 | 	{O_ES,		"ES", { 0 } },
119 | 	{O_CS,		"CS", { 1 } },
120 | 	{O_SS,		"SS", { 2 } },
121 | 	{O_DS,		"DS", { 3 } },
122 | 	{O_FS,		"FS", { 4 } },
123 | 	{O_GS,		"GS", { 5 } },
124 | 
125 | 	{O_ST0,		"ST0", { 0 } },
126 | 	{O_FPUREG,	"ST1", { 1 } },
127 | 	{O_FPUREG,	"ST2", { 2 } },
128 | 	{O_FPUREG,	"ST3", { 3 } },
129 | 	{O_FPUREG,	"ST4", { 4 } },
130 | 	{O_FPUREG,	"ST5", { 5 } },
131 | 	{O_FPUREG,	"ST6", { 6 } },
132 | 	{O_FPUREG,	"ST7", { 7 } },
133 | 
134 | 	{O_MMREG,	"MM0", { 0 } },
135 | 	{O_MMREG,	"MM1", { 1 } },
136 | 	{O_MMREG,	"MM2", { 2 } },
137 | 	{O_MMREG,	"MM3", { 3 } },
138 | 	{O_MMREG,	"MM4", { 4 } },
139 | 	{O_MMREG,	"MM5", { 5 } },
140 | 	{O_MMREG,	"MM6", { 6 } },
141 | 	{O_MMREG,	"MM7", { 7 } },
142 | 
143 | 	{O_XMMREG,	"XMM0", { 0 } },
144 | 	{O_XMMREG,	"XMM1", { 1 } },
145 | 	{O_XMMREG,	"XMM2", { 2 } },
146 | 	{O_XMMREG,	"XMM3", { 3 } },
147 | 	{O_XMMREG,	"XMM4", { 4 } },
148 | 	{O_XMMREG,	"XMM5", { 5 } },
149 | 	{O_XMMREG,	"XMM6", { 6 } },
150 | 	{O_XMMREG,	"XMM7", { 7 } }
151 | };
152 | 
153 | const COperand csyntax_set[] = 
154 | {
155 | 	{O_VOID,	""},
156 | 
157 | 	{O_ONE,		"1"},
158 | 	{O_IMM,		"imm"},
159 | 	{O_IMM8,	"imm8"},
160 | 	{O_IMM16,	"imm16"},
161 | 	{O_IMM32,	"imm32"},
162 | 
163 | 	{O_AL,		"AL"},
164 | 	{O_AX,		"AX"},
165 | 	{O_EAX,		"EAX"},
166 | 	{O_DX,		"DX"},
167 | 	{O_CL,		"CL"},
168 | 	{O_CX,		"CX"},
169 | 	{O_ECX,		"ECX"},
170 | 	{O_CS,		"CS"},
171 | 	{O_DS,		"DS"},
172 | 	{O_ES,		"ES"},
173 | 	{O_SS,		"SS"},
174 | 	{O_FS,		"FS"},
175 | 	{O_GS,		"GS"},
176 | 	{O_ST0,		"ST0"},
177 | 
178 | 	{O_REG8,	"reg8"},
179 | 	{O_REG16,	"reg16"},
180 | 	{O_REG32,	"reg32"},
181 | 	{O_SEGREG,	"segreg"},
182 | 	{O_FPUREG,	"fpureg"},
183 | 	{O_CR,		"CR0/2/3/4"},
184 | 	{O_DR,		"DR0/1/2/3/6/7"},
185 | 	{O_TR,		"TR3/4/5/6/7"},
186 | 	{O_MMREG,	"mmreg"},
187 | 	{O_XMMREG,	"xmmreg"},
188 | 
189 | 	{O_MEM,		"mem"},
190 | 	{O_MEM8,	"mem8"},
191 | 	{O_MEM16,	"mem16"},
192 | 	{O_MEM32,	"mem32"},
193 | 	{O_MEM64,	"mem64"},
194 | 	{O_MEM80,	"mem80"},
195 | 	{O_MEM128,	"mem128"},
196 | 
197 | 	{O_R_M8,	"r/m8"},
198 | 	{O_R_M16,	"r/m16"},
199 | 	{O_R_M32,	"r/m32"},
200 | 	{O_R_M64,	"r/m64"},
201 | 	{O_R_M128,	"r/m128"},
202 | 
203 | 	{O_XMM32,	"xmmreg/mem32"},
204 | 	{O_XMM32,	"xmmreg/mem64"},
205 | 	{O_M512B,	"m512byte"},
206 | 	{O_MOFF8,	"memoffs8"},
207 | 	{O_MOFF16,	"memoffs16"},
208 | 	{O_MOFF32,	"memoffs32"}
209 | };
210 | 
211 | const COperand CINIT = { O_VOID };
212 | const COperand CNOT_FOUND = { O_UNKNOWN };
213 | 
214 | COperand coperand_scan_reg(const char *string)
215 | {
216 | 	if (string) {
217 | 		size_t i;
218 | 		for (i = 0; i < sizeof(cregister_set) / sizeof(COperand); i++) {
219 | 			if (cstring_strcmp(string, cregister_set[i].notation, 1) == 0) {
220 | 				return cregister_set[i];
221 | 			}
222 | 		}
223 | 	}
224 | 	return CNOT_FOUND;
225 | }
226 | 
227 | enum COperandType coperand_scan_syntax(const char *string)
228 | {
229 | 	if (string) {
230 | 		size_t i;
231 | 		for (i = 0; i < sizeof(csyntax_set) / sizeof(COperand); i++) {
232 | 			if (cstring_strcmp(string, csyntax_set[i].notation, 1) == 0) {
233 | 				return csyntax_set[i].type;
234 | 			}
235 | 		}
236 | 	}
237 | 	return O_UNKNOWN;
238 | }
239 | 
240 | 
241 | //---------------------------------------------------------------------
242 | // string operation
243 | //---------------------------------------------------------------------
244 | char *cstring_strip(char *str)
245 | {
246 | 	int size = (int)strlen(str);
247 | 	char *p = str;
248 | 	int i;
249 | 	while (size > 0) {
250 | 		if (!isspace(str[size - 1])) break;
251 | 		size--;
252 | 	}
253 | 	str[size] = '\0';
254 | 	while (p[0]) {
255 | 		if (!isspace(p[0])) break;
256 | 		p++;
257 | 	}
258 | 	if (p == str) return str;
259 | 	for (i = 0; p[i]; i++) str[i] = p[i];
260 | 	str[i] = '\0';
261 | 	return str;
262 | }
263 | 
264 | int cstring_strcmp(const char *s1, const char *s2, int caseoff)
265 | {
266 | 	const char *p1, *p2, *p3, *p4;
267 | 	int k1, k2, i;
268 | 	for (p1 = s1; isspace(*p1); p1++);
269 | 	for (p2 = s2; isspace(*p2); p2++);
270 | 	for (k1 = (int)strlen(p1); k1 > 0; k1--) if (!isspace(p1[k1 - 1])) break;
271 | 	for (k2 = (int)strlen(p2); k2 > 0; k2--) if (!isspace(p2[k2 - 1])) break;
272 | 	p3 = p1 + k1;
273 | 	p4 = p2 + k2;
274 | 	for (i = 0; i < k1 && i < k2; i++) {
275 | 		char c1 = p1[i];
276 | 		char c2 = p2[i];
277 | 		if (caseoff && c1 >= 'a' && c1 <= 'z') c1 -= 'a' - 'A';
278 | 		if (caseoff && c2 >= 'a' && c2 <= 'z') c2 -= 'a' - 'A';
279 | 		if (c1 < c2) return -1;
280 | 		if (c1 > c2) return 1;
281 | 	}
282 | 	if (k1 < k2) return -1;
283 | 	if (k1 > k2) return 1;
284 | 	return 0;
285 | }
286 | 
287 | char *cstring_strsep(char **stringp, const char *delim)
288 | {
289 | 	register char *s;
290 | 	register const char *spanp;
291 | 	register int c, sc;
292 | 	char *tok;
293 | 
294 | 	if ((s = *stringp) == NULL)
295 | 		return (NULL);
296 | 	for (tok = s;;) {
297 | 		c = *s++;
298 | 		spanp = delim;
299 | 		do {
300 | 			if ((sc = *spanp++) == c) {
301 | 				if (c == 0) s = NULL;
302 | 				else s[-1] = 0;
303 | 				*stringp = s;
304 | 				return tok;
305 | 			}
306 | 		}	while (sc != 0);
307 | 	}
308 | }
309 | 
310 | 
311 | 
312 | 


--------------------------------------------------------------------------------
/source/ckeywords.h:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // ckeywords.h - 
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #ifndef __CKEYWORDS_H__
 10 | #define __CKEYWORDS_H__
 11 | 
 12 | #include "cencoding.h"
 13 | 
 14 | //---------------------------------------------------------------------
 15 | // CSpecifierType
 16 | //---------------------------------------------------------------------
 17 | enum CSpecifierType
 18 | {
 19 | 	CS_UNKNOWN = 0,
 20 | 	CS_NEAR,
 21 | 	CS_SHORT = CS_NEAR,
 22 | 	//	FAR,
 23 | 	CS_BYTE,
 24 | 	CS_WORD,
 25 | 	CS_DWORD,
 26 | 	CS_QWORD,
 27 | 	CS_MMWORD = CS_QWORD,
 28 | 	CS_XMMWORD
 29 | };
 30 | 
 31 | //---------------------------------------------------------------------
 32 | // CSpecifier
 33 | //---------------------------------------------------------------------
 34 | struct CSpecifier
 35 | {
 36 | 	enum CSpecifierType type;
 37 | 	const char *notation;
 38 | };
 39 | 
 40 | typedef struct CSpecifier CSpecifier;
 41 | 
 42 | 
 43 | //---------------------------------------------------------------------
 44 | // COperandType
 45 | //---------------------------------------------------------------------
 46 | enum COperandType
 47 | {
 48 | 	O_UNKNOWN	= 0,
 49 | 
 50 | 	O_VOID	= 0x00000001,
 51 | 
 52 | 	O_ONE	= 0x00000002,
 53 | 	O_IMM8	= 0x00000004 | O_ONE,
 54 | 	O_IMM16	= 0x00000008 | O_IMM8 | O_ONE,
 55 | 	O_IMM32	= 0x00000010 | O_IMM16 | O_IMM8 | O_ONE,
 56 | 	O_IMM	= O_IMM32 | O_IMM16 | O_IMM8 | O_ONE,
 57 | 
 58 | 	O_AL	= 0x00000020,
 59 | 	O_CL	= 0x00000040,
 60 | 	O_REG8	= O_CL | O_AL,
 61 | 
 62 | 	O_AX	= 0x00000080,
 63 | 	O_DX	= 0x00000100,
 64 | 	O_CX	= 0x00000200,
 65 | 	O_REG16	= O_CX | O_DX | O_AX,
 66 | 
 67 | 	O_EAX		= 0x00000400,
 68 | 	O_ECX		= 0x00000800,
 69 | 	O_REG32	= O_ECX | O_EAX,
 70 | 
 71 | 	// No need to touch these in 32-bit protected mode
 72 | 	O_CS		= O_UNKNOWN,   
 73 | 	O_DS		= O_UNKNOWN,
 74 | 	O_ES		= O_UNKNOWN,
 75 | 	O_SS		= O_UNKNOWN,
 76 | 	O_FS		= O_UNKNOWN,
 77 | 	O_GS		= O_UNKNOWN,
 78 | 	O_SEGREG	= O_GS | O_FS | O_SS | O_ES | O_DS | O_CS,
 79 | 
 80 | 	O_ST0		= 0x00001000,
 81 | 	O_FPUREG	= 0x00002000 | O_ST0,
 82 | 
 83 | 	// You won't need these in a JIT assembler
 84 | 	O_CR		= O_UNKNOWN,   
 85 | 	O_DR		= O_UNKNOWN,
 86 | 	O_TR		= O_UNKNOWN,
 87 | 
 88 | 	O_MMREG		= 0x00004000,
 89 | 	O_XMMREG	= 0x00008000,
 90 | 
 91 | 	O_REG		=	O_XMMREG | O_MMREG | O_TR | O_DR | O_CR | O_FPUREG | 
 92 | 					O_SEGREG | O_REG32 | O_REG16 | O_REG8,
 93 | 	O_MEM8		=	0x00010000,
 94 | 	O_MEM16		=	0x00020000,
 95 | 	O_MEM32		=	0x00040000,
 96 | 	O_MEM64		=	0x00080000,
 97 | 	O_MEM80		=	O_UNKNOWN,   // Extended double not supported by NT
 98 | 	O_MEM128	=	0x00100000,
 99 | 	O_M512B		=	O_UNKNOWN,   // Only for state save/restore instructions
100 | 	O_MEM		=	O_M512B | O_MEM128 | O_MEM80 | O_MEM64 | O_MEM32 | 
101 | 					O_MEM16 | O_MEM8,
102 | 		
103 | 	O_XMM32		=	O_MEM32 | O_XMMREG,
104 | 	O_XMM64		=	O_MEM64 | O_XMMREG,
105 | 
106 | 	O_R_M8		=	O_MEM8 | O_REG8,
107 | 	O_R_M16		=	O_MEM16 | O_REG16,
108 | 	O_R_M32		=	O_MEM32 | O_REG32,
109 | 	O_R_M64		=	O_MEM64 | O_MMREG,
110 | 	O_R_M128	=	O_MEM128 | O_XMMREG,
111 | 	O_R_M		=	O_MEM | O_REG,
112 | 
113 | 	O_MOFF8		=	O_UNKNOWN,   // Not supported
114 | 	O_MOFF16	=	O_UNKNOWN,   // Not supported
115 | 	O_MOFF32	=	O_UNKNOWN   // Not supported
116 | };
117 | 
118 | 
119 | //---------------------------------------------------------------------
120 | // COperand
121 | //---------------------------------------------------------------------
122 | struct COperand
123 | {
124 | 	enum COperandType type;
125 | 	const char *notation;
126 | 	union
127 | 	{
128 | 		cint32 value;			// For immediates
129 | 		enum CRegID reg;		// For registers
130 | 	};
131 | };
132 | 
133 | typedef struct COperand COperand;
134 | 
135 | 
136 | #ifdef __cplusplus
137 | extern "C" {
138 | #endif
139 | 
140 | //---------------------------------------------------------------------
141 | // interface
142 | //---------------------------------------------------------------------
143 | extern const CSpecifier cspecifier_set[];
144 | extern const COperand cregister_set[];
145 | extern const COperand csyntax_set[];
146 | extern const COperand CINIT;
147 | extern const COperand CNOT_FOUND;
148 | 
149 | enum CSpecifierType cspecifier_scan(const char *string);
150 | 
151 | int coperand_is_subtype_of(const COperand *self, enum COperandType baseType);
152 | 
153 | int coperand_type_is_void(enum COperandType type);
154 | int coperand_type_is_imm(enum COperandType type);
155 | int coperand_type_is_reg(enum COperandType type);
156 | int coperand_type_is_mem(enum COperandType type);
157 | int coperand_type_is_R_M(enum COperandType type);
158 | 
159 | int coperand_is_void(const COperand *operand);
160 | int coperand_is_imm(const COperand *operand);
161 | int coperand_is_reg(const COperand *operand);
162 | int coperand_is_mem(const COperand *operand);
163 | int coperand_is_R_M(const COperand *operand);
164 | 
165 | COperand coperand_scan_reg(const char *string);
166 | enum COperandType coperand_scan_syntax(const char *string);
167 | 
168 | 
169 | char *cstring_strip(char *str);
170 | int cstring_strcmp(const char *s1, const char *s2, int caseoff);
171 | char *cstring_strsep(char **stringp, const char *delim);
172 | 
173 | 
174 | #ifdef __cplusplus
175 | }
176 | #endif
177 | 
178 | 
179 | #endif
180 | 
181 | 
182 | /*
183 | 		Encoding syntax:
184 | 		----------------
185 | 		+r Add register value to opcode
186 | 		/# Value for Mod R/M register field encoding
187 | 		/r Effective address encoding
188 | 		ib Byte immediate
189 | 		iw Word immediate
190 | 		id Dword immediate
191 | 		-b Byte relative address
192 | 		-i Word or dword relative address
193 | 		p0 LOCK instruction prefix (F0h)
194 | 		p2 REPNE/REPNZ instruction prefix (F2h)
195 | 		p3 REP/REPE/REPZ instruction prefix (F3h) (also SSE prefix)
196 | 		po Offset override prefix (66h)
197 | 		pa Address override prefix (67h)
198 | 
199 | 	{"JMP",			"imm",						"E9 -i",			CT_CPU_8086},
200 | 	{"JMP",			"SHORT imm",				"EB -b",			CT_CPU_8086},
201 | //	{"JMP",			"imm:imm16",				"po EA iw iw",		CT_CPU_8086},
202 | //	{"JMP",			"imm:imm32",				"po EA id iw",		CT_CPU_386},
203 | 	{"JMP",			"mem",						"po FF /5",			CT_CPU_8086},
204 | //	{"JMP",			"FAR mem",					"po FF /5",			CT_CPU_386},
205 | 	{"JMP",			"WORD r/m16",				"po FF /4",			CT_CPU_8086},
206 | 	{"JMP",			"DWORD r/m32",				"po FF /4",			CT_CPU_386},
207 | 	{"MOV",			"r/m8,reg8",				"88 /r",			CT_CPU_8086},
208 | 	{"MOV",			"r/m16,reg16",				"po 89 /r",			CT_CPU_8086},
209 | 	{"MOV",			"r/m32,reg32",				"po 89 /r",			CT_CPU_386},
210 | 	{"MOV",			"reg8,r/m8",				"8A /r",			CT_CPU_8086},
211 | 	{"MOV",			"reg16,r/m16",				"po 8B /r",			CT_CPU_8086},
212 | 	{"MOV",			"reg32,r/m32",				"po 8B /r",			CT_CPU_386},
213 | 	{"MOV",			"reg8,imm8",				"B0 +r ib",			CT_CPU_8086},
214 | 	{"MOV",			"reg16,imm16",				"po B8 +r iw",		CT_CPU_8086},
215 | 	{"MOV",			"reg32,imm32",				"po B8 +r id",		CT_CPU_386},
216 | 	{"MOV",			"r/m8,imm8",				"C6 /0 ib",			CT_CPU_8086},
217 | 	{"MOV",			"r/m16,imm16",				"po C7 /0 iw",		CT_CPU_8086},
218 | 	{"MOV",			"r/m32,imm32",				"po C7 /0 id",		CT_CPU_386},
219 | */
220 | 
221 | 


--------------------------------------------------------------------------------
/source/cloader.c:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // cloader.c - source loader
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #include "cloader.h"
 10 | 
 11 | //---------------------------------------------------------------------
 12 | // CLink interface
 13 | //---------------------------------------------------------------------
 14 | static CLink *clink_create(const CEncoding *encoding)
 15 | {
 16 | 	CLink *link;
 17 | 
 18 | 	link = (CLink*)malloc(sizeof(CLink));
 19 | 	assert(link);
 20 | 
 21 | 	iqueue_init(&link->head);
 22 | 	cencoding_new_copy(&link->encoding, encoding);
 23 | 	link->offset = 0;
 24 | 	link->size = 0;
 25 | 
 26 | 	return link;
 27 | }
 28 | 
 29 | static void clink_release(CLink *link)
 30 | {
 31 | 	assert(link);
 32 | 	cencoding_destroy(&link->encoding);
 33 | }
 34 | 
 35 | 
 36 | //---------------------------------------------------------------------
 37 | // CLoader interface
 38 | //---------------------------------------------------------------------
 39 | CLoader *cloader_create(void)
 40 | {
 41 | 	CLoader *loader;
 42 | 	loader = (CLoader*)malloc(sizeof(CLoader));
 43 | 	assert(loader);
 44 | 	iqueue_init(&loader->head);
 45 | 	loader->error = (char*)malloc(1024);
 46 | 	assert(loader->error);
 47 | 	loader->error[0] = 0;
 48 | 	loader->errcode = 0;
 49 | 	loader->linear = 0;
 50 | 	loader->output = NULL;
 51 | 	loader->lineno = 0;
 52 | 	return loader;
 53 | }
 54 | 
 55 | void cloader_reset(CLoader *loader)
 56 | {
 57 | 	assert(loader);
 58 | 	while (!iqueue_is_empty(&loader->head)) {
 59 | 		CLink *link = iqueue_entry(loader->head.next, CLink, head);
 60 | 		iqueue_del(&link->head);
 61 | 		clink_release(link);
 62 | 	}
 63 | 	loader->error[0] = 0;
 64 | 	loader->errcode = 0;
 65 | 	loader->linear = 0;
 66 | 	loader->output = NULL;
 67 | 	loader->lineno = 0;
 68 | }
 69 | 
 70 | void cloader_release(CLoader *loader)
 71 | {
 72 | 	assert(loader);
 73 | 	cloader_reset(loader);
 74 | 	if (loader->error) {
 75 | 		free(loader->error);
 76 | 		loader->error = NULL;
 77 | 	}
 78 | }
 79 | 
 80 | int cloader_new_encoding(CLoader *loader, const CEncoding *encoding)
 81 | {
 82 | 	CLink *link;
 83 | 	link = clink_create(encoding);
 84 | 	assert(link);
 85 | 	link->lineno = ++loader->lineno;
 86 | 	iqueue_add_tail(&link->head, &loader->head);
 87 | 	return 0;
 88 | }
 89 | 
 90 | int cloader_get_codesize(CLoader *loader)
 91 | {
 92 | 	struct IQUEUEHEAD *p;
 93 | 	int size = 0;
 94 | 	assert(loader);
 95 | 	for (p = loader->head.next; p != &loader->head; p = p->next) {
 96 | 		CLink *link = iqueue_entry(p, CLink, head);
 97 | 		size += cencoding_length(&link->encoding);
 98 | 	}
 99 | 	return size;
100 | }
101 | 
102 | unsigned long cloader_resolve_label(CLoader *loader, const char *label)
103 | {
104 | 	struct IQUEUEHEAD *p;
105 | 	for (p = loader->head.next; p != &loader->head; p = p->next) {
106 | 		CLink *link = iqueue_entry(p, CLink, head);
107 | 		CEncoding *encoding = &link->encoding;
108 | 		if (cencoding_get_label(encoding)) {
109 | 			if (strcmp(encoding->label, label) == 0) {
110 | 				return (long)link->offset;
111 | 			}
112 | 		}
113 | 	}
114 | 	return 0;
115 | }
116 | 
117 | int cloader_output(CLoader *loader, unsigned char *output)
118 | {
119 | 	struct IQUEUEHEAD *p;
120 | 	assert(loader);
121 | 
122 | 	loader->output = output;
123 | 	loader->linear = (cuint32)output;
124 | 
125 | 	// encoding instructions
126 | 	for (p = loader->head.next; p != &loader->head; p = p->next) {
127 | 		CLink *link = iqueue_entry(p, CLink, head);
128 | 		CEncoding *encoding = &link->encoding;
129 | 		int size;
130 | 		size = cencoding_write_code(encoding, loader->output);
131 | 		link->offset = loader->linear;
132 | 		link->size = size;
133 | 		loader->linear += size;
134 | 		loader->output += size;
135 | 	}
136 | 
137 | 	// resolve labels
138 | 	for (p = loader->head.next; p != &loader->head; p = p->next) {
139 | 		CLink *link = iqueue_entry(p, CLink, head);
140 | 		CEncoding *encoding = &link->encoding;
141 | 		unsigned char *offset = (unsigned char*)link->offset;
142 | 		if (cencoding_get_reference(encoding)) {
143 | 			const char *label = cencoding_get_reference(encoding);
144 | 			long linear = cloader_resolve_label(loader, label);
145 | 			if (linear == 0) {
146 | 				strncpy(loader->error, "not find label: ", 40);
147 | 				strncat(loader->error, label, 100);
148 | 				loader->errcode = link->lineno;
149 | 				return -1;
150 | 			}
151 | 			if (encoding->relative == 0) {
152 | 				cencoding_set_immediate(encoding, linear);
153 | 			}
154 | 			else {
155 | 				long diff = linear - (link->offset + link->size);
156 | 				cencoding_set_jump_offset(encoding, diff);
157 | 			}
158 | 			cencoding_write_code(encoding, offset);
159 | 		}
160 | 	}
161 | 	
162 | 	return 0;
163 | }
164 | 
165 | 
166 | void cloader_print(const CLoader *loader)
167 | {
168 | 	struct IQUEUEHEAD *p;
169 | 	static char line[400];
170 | 	for (p = loader->head.next; p != &loader->head; p = p->next) {
171 | 		CLink *link = iqueue_entry(p, CLink, head);
172 | 		CEncoding *encoding = &link->encoding;
173 | 		cencoding_to_string(encoding, line);
174 | 		printf("%s\n", line);
175 | 	}
176 | }
177 | 
178 | 
179 | 


--------------------------------------------------------------------------------
/source/cloader.h:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // cloader.h - source loader
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #ifndef __CLOADER_H__
 10 | #define __CLOADER_H__
 11 | 
 12 | #include "cencoding.h"
 13 | 
 14 | 
 15 | /*====================================================================*/
 16 | /* QUEUE DEFINITION                                                   */
 17 | /*====================================================================*/
 18 | #ifndef __IQUEUE_DEF__
 19 | #define __IQUEUE_DEF__
 20 | 
 21 | struct IQUEUEHEAD {
 22 | 	struct IQUEUEHEAD *next, *prev;
 23 | };
 24 | 
 25 | typedef struct IQUEUEHEAD iqueue_head;
 26 | 
 27 | 
 28 | /*--------------------------------------------------------------------*/
 29 | /* queue init                                                         */
 30 | /*--------------------------------------------------------------------*/
 31 | #define IQUEUE_HEAD_INIT(name) { &(name), &(name) }
 32 | #define IQUEUE_HEAD(name) \
 33 | 	struct IQUEUEHEAD name = IQUEUE_HEAD_INIT(name)
 34 | 
 35 | #define IQUEUE_INIT(ptr) ( \
 36 | 	(ptr)->next = (ptr), (ptr)->prev = (ptr))
 37 | 
 38 | #define IOFFSETOF(TYPE, MEMBER) ((unsigned long) &((TYPE *)0)->MEMBER)
 39 | 
 40 | #define ICONTAINEROF(ptr, type, member) ( \
 41 | 		(type*)( ((char*)((type*)ptr)) - IOFFSETOF(type, member)) )
 42 | 
 43 | #define IQUEUE_ENTRY(ptr, type, member) ICONTAINEROF(ptr, type, member)
 44 | 
 45 | 
 46 | /*--------------------------------------------------------------------*/
 47 | /* queue operation                                                    */
 48 | /*--------------------------------------------------------------------*/
 49 | #define IQUEUE_ADD(node, head) ( \
 50 | 	(node)->prev = (head), (node)->next = (head)->next, \
 51 | 	(head)->next->prev = (node), (head)->next = (node))
 52 | 
 53 | #define IQUEUE_ADD_TAIL(node, head) ( \
 54 | 	(node)->prev = (head)->prev, (node)->next = (head), \
 55 | 	(head)->prev->next = (node), (head)->prev = (node))
 56 | 
 57 | #define IQUEUE_DEL_BETWEEN(p, n) ((n)->prev = (p), (p)->next = (n))
 58 | 
 59 | #define IQUEUE_DEL(entry) (\
 60 | 	(entry)->next->prev = (entry)->prev, \
 61 | 	(entry)->prev->next = (entry)->next, \
 62 | 	(entry)->next = 0, (entry)->prev = 0)
 63 | 
 64 | #define IQUEUE_DEL_INIT(entry) do { \
 65 | 	IQUEUE_DEL(entry); IQUEUE_INIT(entry); } while (0)
 66 | 
 67 | #define IQUEUE_IS_EMPTY(entry) ((entry) == (entry)->next)
 68 | 
 69 | #define iqueue_init		IQUEUE_INIT
 70 | #define iqueue_entry	IQUEUE_ENTRY
 71 | #define iqueue_add		IQUEUE_ADD
 72 | #define iqueue_add_tail	IQUEUE_ADD_TAIL
 73 | #define iqueue_del		IQUEUE_DEL
 74 | #define iqueue_del_init	IQUEUE_DEL_INIT
 75 | #define iqueue_is_empty IQUEUE_IS_EMPTY
 76 | 
 77 | #define IQUEUE_FOREACH(iterator, head, TYPE, MEMBER) \
 78 | 	for ((iterator) = iqueue_entry((head)->next, TYPE, MEMBER); \
 79 | 		&((iterator)->MEMBER) != (head); \
 80 | 		(iterator) = iqueue_entry((iterator)->MEMBER.next, TYPE, MEMBER))
 81 | 
 82 | #define iqueue_foreach(iterator, head, TYPE, MEMBER) \
 83 | 	IQUEUE_FOREACH(iterator, head, TYPE, MEMBER)
 84 | 
 85 | #define iqueue_foreach_entry(pos, head) \
 86 | 	for( (pos) = (head)->next; (pos) != (head) ; (pos) = (pos)->next )
 87 | 	
 88 | 
 89 | #define __iqueue_splice(list, head) do {	\
 90 | 		iqueue_head *first = (list)->next, *last = (list)->prev; \
 91 | 		iqueue_head *at = (head)->next; \
 92 | 		(first)->prev = (head), (head)->next = (first);		\
 93 | 		(last)->next = (at), (at)->prev = (last); }	while (0)
 94 | 
 95 | #define iqueue_splice(list, head) do { \
 96 | 	if (!iqueue_is_empty(list)) __iqueue_splice(list, head); } while (0)
 97 | 
 98 | #define iqueue_splice_init(list, head) do {	\
 99 | 	iqueue_splice(list, head);	iqueue_init(list); } while (0)
100 | 
101 | 
102 | #ifdef _MSC_VER
103 | #pragma warning(disable:4311)
104 | #pragma warning(disable:4312)
105 | #pragma warning(disable:4996)
106 | #endif
107 | 
108 | #endif
109 | 
110 | 
111 | //---------------------------------------------------------------------
112 | // CLink
113 | //---------------------------------------------------------------------
114 | struct CLink
115 | {
116 | 	struct IQUEUEHEAD head;
117 | 	CEncoding encoding;
118 | 	unsigned long offset;
119 | 	int size;
120 | 	int lineno;
121 | };
122 | 
123 | typedef struct CLink CLink;
124 | 
125 | 
126 | //---------------------------------------------------------------------
127 | // CLoader Structure
128 | //---------------------------------------------------------------------
129 | struct CLoader
130 | {
131 | 	struct IQUEUEHEAD head;		// link head;
132 | 	char *error;
133 | 	int errcode;
134 | 	int lineno;
135 | 	unsigned long linear;
136 | 	unsigned char *output;
137 | };
138 | 
139 | typedef struct CLoader CLoader;
140 | 
141 | #ifdef __cplusplus
142 | extern "C" {
143 | #endif
144 | //---------------------------------------------------------------------
145 | // CLoader interface
146 | //---------------------------------------------------------------------
147 | CLoader *cloader_create(void);
148 | 
149 | void cloader_release(CLoader *loader);
150 | 
151 | void cloader_reset(CLoader *loader);
152 | 
153 | int cloader_new_encoding(CLoader *loader, const CEncoding *encoding);
154 | 
155 | int cloader_get_codesize(CLoader *loader);
156 | 
157 | int cloader_output(CLoader *loader, unsigned char *output);
158 | 
159 | void cloader_print(const CLoader *loader);
160 | 
161 | 
162 | #ifdef __cplusplus
163 | }
164 | #endif
165 | 
166 | #endif
167 | 
168 | 
169 | 


--------------------------------------------------------------------------------
/source/cparser.c:
--------------------------------------------------------------------------------
   1 | //=====================================================================
   2 | //
   3 | // cparser.c - source parser
   4 | //
   5 | // NOTE:
   6 | // for more information, please see the readme file.
   7 | //
   8 | //=====================================================================
   9 | #include "cparser.h"
  10 | 
  11 | #ifdef _MSC_VER
  12 | #pragma warning(disable: 4996)
  13 | #endif
  14 | 
  15 | #define IMAX_DATA 65536
  16 | 
  17 | CParser *cparser_create(void)
  18 | {
  19 | 	CParser *parser;
  20 | 	parser = (CParser*)malloc(sizeof(CParser));
  21 | 	assert(parser);
  22 | 	parser->token = cscanner_create();
  23 | 	assert(parser->token);
  24 | 	parser->instruction = NULL;
  25 | 	parser->instructionset = cinstset_create();
  26 | 	csynth_init(&parser->synthesizer);
  27 | 	parser->data = (char*)malloc(IMAX_DATA);
  28 | 	assert(parser->data);
  29 | 	parser->error = (char*)malloc(1024);
  30 | 	assert(parser->error);
  31 | 	parser->error[0] = 0;
  32 | 	parser->errcode = 0;
  33 | 	parser->vars = NULL;
  34 | 	parser->inproc = 0;
  35 | 	parser->stack = 0;
  36 | 	return parser;
  37 | }
  38 | 
  39 | void cparser_release(CParser *parser)
  40 | {
  41 | 	assert(parser);
  42 | 	if (parser->token) {
  43 | 		cscanner_release(parser->token);
  44 | 		parser->token = NULL;
  45 | 	}
  46 | 	if (parser->instructionset) {
  47 | 		cinstset_release(parser->instructionset);
  48 | 		parser->instructionset = NULL;
  49 | 	}
  50 | 	if (parser->error) {
  51 | 		free(parser->error);
  52 | 		parser->error = NULL;
  53 | 	}
  54 | 	if (parser->data) {
  55 | 		free(parser->data);
  56 | 		parser->data = NULL;
  57 | 	}
  58 | 	while (parser->vars) {
  59 | 		CVariable *var = parser->vars;
  60 | 		parser->vars = parser->vars->next;
  61 | 		free(var->name);
  62 | 		free(var);
  63 | 	}
  64 | 	csynth_destroy(&parser->synthesizer);
  65 | 	free(parser);
  66 | }
  67 | 
  68 | void cparser_reset(CParser *parser)
  69 | {
  70 | 	cscanner_macro_reset(parser->token);
  71 | 	while (parser->vars) {
  72 | 		CVariable *var = parser->vars;
  73 | 		parser->vars = parser->vars->next;
  74 | 		free(var->name);
  75 | 		free(var);
  76 | 	}
  77 | 	parser->inproc = 0;
  78 | 	parser->stack = 0;
  79 | }
  80 | 
  81 | static int cparser_parse_label(CParser *parser);
  82 | static int cparser_parse_mnemonic(CParser *parser);
  83 | static int cparser_parse_specifier(CParser *parser);
  84 | static int cparser_parse_first_operand(CParser *parser);
  85 | static int cparser_parse_second_operand(CParser *parser);
  86 | static int cparser_parse_third_operand(CParser *parser);
  87 | static COperand cparser_parse_immediate(CParser *parser);
  88 | static COperand cparser_parse_register(CParser *parser);
  89 | static COperand cparser_parse_memory_reference(CParser *parser);
  90 | 
  91 | static int cparser_parse_data(CParser *parser);
  92 | static int cparser_parse_align(CParser *parser);
  93 | static int cparser_parse_prefix(CParser *parser);
  94 | 
  95 | static int cparser_parse_proc(CParser *parser);
  96 | 
  97 | static void cparser_error(CParser *parser, const char *error, int code)
  98 | {
  99 | 	strncpy(parser->error, error, 100);
 100 | 	parser->errcode = code;
 101 | }
 102 | 
 103 | const CEncoding *cparser_parse_line(CParser *parser, const char *source)
 104 | {
 105 | 	int retval;
 106 | 
 107 | 	if (source == NULL) {
 108 | 		cparser_error(parser, "empty source line", 1);
 109 | 		return NULL;
 110 | 	}
 111 | 
 112 | 	retval = cscanner_set_source(parser->token, source);
 113 | 
 114 | 	if (retval != 0) {
 115 | 		cparser_error(parser, parser->token->error, 2);
 116 | 		return NULL;
 117 | 	}
 118 | 
 119 | 	parser->instruction = NULL;
 120 | 	csynth_reset(&parser->synthesizer);
 121 | 
 122 | 	parser->error[0] = 0;
 123 | 	parser->errcode = 0;
 124 | 
 125 | 	// parse label
 126 | 	if (!cscanner_is_endl(parser->token)) {
 127 | 		if (cparser_parse_label(parser)) {
 128 | 			cparser_error(parser, "label error", 3);
 129 | 			return NULL;
 130 | 		}
 131 | 	}
 132 | 
 133 | 	// parse inline data
 134 | 	if (!cscanner_is_endl(parser->token)) {
 135 | 		if (cparser_parse_data(parser)) {
 136 | 			return NULL;
 137 | 		}
 138 | 		if (parser->synthesizer.encoding.data != NULL) {
 139 | 			return &parser->synthesizer.encoding;
 140 | 		}
 141 | 	}
 142 | 
 143 | 	// parse align
 144 | 	if (!cscanner_is_endl(parser->token)) {
 145 | 		if (cparser_parse_align(parser)) {
 146 | 			return NULL;
 147 | 		}
 148 | 		if (parser->synthesizer.encoding.align != 0) {
 149 | 			return &parser->synthesizer.encoding;
 150 | 		}
 151 | 	}
 152 | 
 153 | 	// parse proc
 154 | 	if (!cscanner_is_endl(parser->token)) {
 155 | 		if (cparser_parse_proc(parser)) {
 156 | 			return NULL;
 157 | 		}
 158 | 	}
 159 | 
 160 | 	// parse repnz, repz
 161 | 	if (!cscanner_is_endl(parser->token)) {
 162 | 		if (cparser_parse_prefix(parser)) {
 163 | 			return NULL;
 164 | 		}
 165 | 	}
 166 | 
 167 | 	// parse mnemonic
 168 | 	if (!cscanner_is_endl(parser->token)) {
 169 | 		if (cparser_parse_mnemonic(parser)) {
 170 | 			cparser_error(parser, "mnemonic syntax error", 4);
 171 | 			return NULL;
 172 | 		}
 173 | 
 174 | 		if (!parser->instruction) {
 175 | 			cparser_error(parser, "mnemonic error", 5);
 176 | 			return NULL;
 177 | 		}
 178 | 
 179 | 		if (cparser_parse_first_operand(parser)) {
 180 | 			if (parser->errcode == 0) 
 181 | 				cparser_error(parser, "first operand error", 6);
 182 | 			return NULL;
 183 | 		}
 184 | 
 185 | 		if (cparser_parse_second_operand(parser)) {
 186 | 			if (parser->errcode == 0)
 187 | 				cparser_error(parser, "second operand error", 7);
 188 | 			return NULL;
 189 | 		}
 190 | 
 191 | 		if (cparser_parse_third_operand(parser)) {
 192 | 			if (parser->errcode == 0)
 193 | 				cparser_error(parser, "third operand error", 8);
 194 | 			return NULL;
 195 | 		}
 196 | 	}
 197 | 
 198 | 	if (parser->instruction) {
 199 | 		do {
 200 | 			if (cinst_match_syntax(parser->instruction)) {
 201 | 				break;
 202 | 			}
 203 | 			parser->instruction = parser->instruction->next;
 204 | 		}	while (parser->instruction);
 205 | 
 206 | 		if (parser->instruction == NULL) {
 207 | 			cparser_error(parser, "operands mismatch", 9);
 208 | 			return NULL;
 209 | 		}
 210 | #if 0
 211 | 		printf("%s (%s) (%s) specifier=%d\n",
 212 | 			parser->instruction->syntax->mnemonic,
 213 | 			parser->instruction->syntax->operands,
 214 | 			parser->instruction->syntax->encoding,
 215 | 			parser->instruction->specifier);
 216 | #endif
 217 | 	}
 218 | 
 219 | 	return csynth_encode_instruction(&parser->synthesizer, 
 220 | 		parser->instruction);
 221 | }
 222 | 
 223 | 
 224 | static int cparser_parse_label(CParser *parser)
 225 | {
 226 | 	const CTOKEN *current = cscanner_token_current(parser->token);
 227 | 	const CTOKEN *next = cscanner_token_lookahead(parser->token);
 228 | 	if (ctoken_is_ident(current) && ctoken_get_char(next) == ':') {
 229 | 		csynth_define_label(&parser->synthesizer, current->str);
 230 | 		cscanner_token_advance(parser->token, 2);
 231 | 	}
 232 | 	else if (ctoken_get_char(current) == '.' && ctoken_is_ident(next)) {
 233 | 		csynth_define_label(&parser->synthesizer, next->str);
 234 | 		cscanner_token_advance(parser->token, 2);
 235 | 	}
 236 | 	return 0;
 237 | }
 238 | 
 239 | static int cparser_parse_mnemonic(CParser *parser)
 240 | {
 241 | 	const char *name = cscanner_get_string(parser->token);
 242 | 	parser->instruction = cinstset_query(parser->instructionset, name);
 243 | 	if (parser->instruction) {
 244 | 		cinst_match_mnemonic(parser->instruction, name);
 245 | 		cscanner_token_advance(parser->token, 1);
 246 | 	}	else {
 247 | 		cparser_error(parser, "Mnemonic not recognised", 10);
 248 | 		return -1;
 249 | 	}
 250 | 	return 0;
 251 | }
 252 | 
 253 | static int cparser_parse_specifier(CParser *parser)
 254 | {
 255 | 	enum CSpecifierType type = CS_UNKNOWN;
 256 | 
 257 | 	if (cscanner_is_ident(parser->token)) {
 258 | 		type = cspecifier_scan(cscanner_get_string(parser->token));
 259 | 	}
 260 | 
 261 | 	cinst_match_specifier(parser->instruction, type);
 262 | 
 263 | 	if (type != CS_UNKNOWN) {
 264 | 		cscanner_token_advance(parser->token, 1);
 265 | 	}
 266 | 
 267 | 	return 0;
 268 | }
 269 | 
 270 | static int cparser_parse_first_operand(CParser *parser)
 271 | {
 272 | 	COperand firstOperand = CINIT;
 273 | 
 274 | 	assert(parser->instruction);
 275 | 
 276 | 	cparser_parse_specifier(parser);
 277 | 
 278 | 	if (cscanner_is_endl(parser->token)) {
 279 | 	}
 280 | 	else if (cscanner_is_operator(parser->token)) {
 281 | 		switch (cscanner_get_char(parser->token)) {
 282 | 		case '[':
 283 | 			firstOperand = cparser_parse_memory_reference(parser);
 284 | 			if (parser->errcode) return -5;
 285 | 			break;
 286 | 		case '+':
 287 | 		case '-':
 288 | 		case '~':
 289 | 			firstOperand = cparser_parse_immediate(parser);
 290 | 			break;
 291 | 		default:
 292 | 			cparser_error(parser, "Unexpected punctuator after mnemonic", 1);
 293 | 			return -1;
 294 | 			break;
 295 | 		}
 296 | 	}
 297 | 	else if (cscanner_is_int(parser->token)) {
 298 | 		firstOperand = cparser_parse_immediate(parser);
 299 | 		if (parser->errcode) return -5;
 300 | 	}
 301 | 	else if (cscanner_is_ident(parser->token)) {
 302 | 		firstOperand = cparser_parse_register(parser);
 303 | 		if (parser->errcode) return -5;
 304 | 	}
 305 | 	else {
 306 | 		cparser_error(parser, "Invalid destination operand", 11);
 307 | 		return -2;
 308 | 	}
 309 | 
 310 | 	cinst_match_first_operand(parser->instruction, &firstOperand);
 311 | 	csynth_encode_first_operand(&parser->synthesizer, &firstOperand);
 312 | 
 313 | 	return 0;
 314 | }
 315 | 
 316 | static int cparser_parse_second_operand(CParser *parser)
 317 | {
 318 | 	COperand secondOperand = CINIT;
 319 | 	assert(parser->instruction);
 320 | 
 321 | 	if (cscanner_get_char(parser->token) == ',') {
 322 | 		cscanner_token_advance(parser->token, 1);
 323 | 	}	
 324 | 	else if (!cscanner_is_endl(parser->token)) {
 325 | 		cparser_error(parser, "Operands must be separated by comma", 12);
 326 | 		return -3;
 327 | 	}
 328 | 	else {
 329 | 		cinst_match_second_operand(parser->instruction, &secondOperand);
 330 | 		return 0;
 331 | 	}
 332 | 
 333 | 	cparser_parse_specifier(parser);
 334 | 
 335 | 	if (cscanner_is_endl(parser->token)) {
 336 | 	}
 337 | 	else if (cscanner_is_operator(parser->token)) {
 338 | 		switch (cscanner_get_char(parser->token)) {
 339 | 		case '[':
 340 | 			secondOperand = cparser_parse_memory_reference(parser);
 341 | 			if (parser->errcode) return -5;
 342 | 			break;
 343 | 		case '+':
 344 | 		case '-':
 345 | 		case '~':
 346 | 			secondOperand = cparser_parse_immediate(parser);
 347 | 			if (parser->errcode) return -5;
 348 | 			break;
 349 | 		default:
 350 | 			cparser_error(parser, "Unexpected punctuator after mnemonic", 1);
 351 | 			return -1;
 352 | 			break;
 353 | 		}
 354 | 	}
 355 | 	else if (cscanner_is_int(parser->token)) {
 356 | 		secondOperand = cparser_parse_immediate(parser);
 357 | 		if (parser->errcode) return -5;
 358 | 	}
 359 | 	else if (cscanner_is_ident(parser->token)) {
 360 | 		secondOperand = cparser_parse_register(parser);
 361 | 		if (parser->errcode) return -5;
 362 | 	}
 363 | 	else {
 364 | 		cparser_error(parser, "Invalid source operand", 13);
 365 | 		return -2;
 366 | 	}
 367 | 
 368 | 	cinst_match_second_operand(parser->instruction, &secondOperand);
 369 | 	csynth_encode_second_operand(&parser->synthesizer, &secondOperand);
 370 | 
 371 | 	return 0;
 372 | }
 373 | 
 374 | static int cparser_parse_third_operand(CParser *parser)
 375 | {
 376 | 	COperand thirdOperand = CINIT;
 377 | 
 378 | 	assert(parser->instruction);
 379 | 
 380 | 	if (cscanner_get_char(parser->token) == ',') {
 381 | 		cscanner_token_advance(parser->token, 1);
 382 | 	}
 383 | 	else if (!cscanner_is_endl(parser->token)) {
 384 | 		cparser_error(parser, "Operands must be separated by comma", 14);
 385 | 		return -3;
 386 | 	}
 387 | 	else {
 388 | 		cinst_match_third_operand(parser->instruction, &thirdOperand);
 389 | 		return 0;
 390 | 	}
 391 | 
 392 | 	if (cscanner_is_endl(parser->token)) {
 393 | 	}
 394 | 	else if (cscanner_is_operator(parser->token)) {
 395 | 		switch (cscanner_get_char(parser->token)) {
 396 | 		case '+':
 397 | 		case '-':
 398 | 		case '~':
 399 | 			thirdOperand = cparser_parse_immediate(parser);
 400 | 			if (parser->errcode) return -5;
 401 | 			break;
 402 | 		default:
 403 | 			cparser_error(parser, "Unexpected punctuator after mnemonic", 1);
 404 | 			return -1;
 405 | 			break;
 406 | 		}
 407 | 	}
 408 | 	else if (cscanner_is_int(parser->token)) {
 409 | 		thirdOperand = cparser_parse_immediate(parser);
 410 | 		if (parser->errcode) return -5;
 411 | 	}
 412 | 	else {
 413 | 		cparser_error(parser, "Too many operands", 15);
 414 | 		return -2;
 415 | 	}
 416 | 
 417 | 	cinst_match_third_operand(parser->instruction, &thirdOperand);
 418 | 	csynth_encode_third_operand(&parser->synthesizer, &thirdOperand);
 419 | 
 420 | 	return 0;
 421 | }
 422 | 
 423 | static COperand cparser_parse_immediate(CParser *parser)
 424 | {
 425 | 	COperand imm = CINIT;
 426 | 	if (cscanner_is_operator(parser->token)) {
 427 | 		int ch = cscanner_get_char(parser->token);
 428 | 		if (ch == '+') {
 429 | 			cscanner_token_advance(parser->token, 1);
 430 | 			imm.value = +cscanner_get_value(parser->token);
 431 | 		}
 432 | 		else if (ch == '-') {
 433 | 			cscanner_token_advance(parser->token, 1);
 434 | 			imm.value = -cscanner_get_value(parser->token);
 435 | 		}
 436 | 		else if (ch == '~') {
 437 | 			cscanner_token_advance(parser->token, 1);
 438 | 			imm.value = ~cscanner_get_value(parser->token);
 439 | 		}
 440 | 		else {
 441 | 			cparser_error(parser, "error operator", 16);
 442 | 			return imm;
 443 | 		}
 444 | 	}
 445 | 	else if (cscanner_is_int(parser->token)) {
 446 | 		imm.value = cscanner_get_value(parser->token);
 447 | 	}
 448 | 	else {
 449 | 		cparser_error(parser, "immediate error", 17);
 450 | 		return imm;
 451 | 	}
 452 | 
 453 | 	if ((unsigned char)imm.value == imm.value) {
 454 | 		imm.type = O_IMM8;
 455 | 	}
 456 | 	else if ((unsigned short)imm.value == imm.value) {
 457 | 		imm.type = O_IMM16;
 458 | 	}
 459 | 	else {
 460 | 		imm.type = O_IMM32;
 461 | 	}
 462 | 
 463 | 	cscanner_token_advance(parser->token, 1);
 464 | 
 465 | 	return imm;
 466 | }
 467 | 
 468 | static COperand cparser_parse_register(CParser *parser)
 469 | {
 470 | 	COperand reg = CINIT;
 471 | 	const char *name;
 472 | 
 473 | 	name = cscanner_get_string(parser->token);
 474 | 	reg = coperand_scan_reg(name);
 475 | 
 476 | 	// It's not a register, so it must be a reference
 477 | 	if (reg.type == O_UNKNOWN) {
 478 | 		csynth_reference_label(&parser->synthesizer, name);
 479 | 		// first operand should be immediate
 480 | 		reg.type = O_IMM8;	// also matchs IMM32
 481 | 		cinst_match_first_operand(parser->instruction, &reg);
 482 | 	}
 483 | 
 484 | 	cscanner_token_advance(parser->token, 1);
 485 | 
 486 | 	return reg;
 487 | }
 488 | 
 489 | static COperand cparser_parse_memory_reference(CParser *parser)
 490 | {
 491 | 	COperand mem = CINIT;
 492 | 
 493 | 	for (; ; ) {
 494 | 		const CTOKEN *next;
 495 | 		const CTOKEN *prev;
 496 | 		const CTOKEN *token;
 497 | 		int type;
 498 | 
 499 | 		type = cscanner_token_lookahead(parser->token)->type;
 500 | 		if (type == CTokenENDL || type == CTokenENDF) break;
 501 | 
 502 | 		prev = cscanner_token_current(parser->token);
 503 | 		cscanner_token_advance(parser->token, 1);
 504 | 		next = cscanner_token_lookahead(parser->token);
 505 | 		token = cscanner_token_current(parser->token);
 506 | 
 507 | 		if (token->type == CTokenIDENT) {
 508 | 			COperand reg = coperand_scan_reg(token->str);
 509 | 			if (reg.type == O_UNKNOWN) {
 510 | 				cparser_error(parser, "unknow reg reference", 18);
 511 | 				return reg;
 512 | 			}
 513 | 			if (ctoken_get_char(prev) == '*' || ctoken_get_char(next) == '*')
 514 | 			{
 515 | 				csynth_encode_index(&parser->synthesizer, &reg);
 516 | 			}
 517 | 			else 
 518 | 			{
 519 | 				csynth_encode_base(&parser->synthesizer, &reg);
 520 | 			}
 521 | 		}
 522 | 		else if (token->type == CTokenOPERATOR) {
 523 | 			switch (ctoken_get_char(token)) {
 524 | 			case ']':
 525 | 				mem.type = O_MEM;
 526 | 				cscanner_token_advance(parser->token, 1);
 527 | 				return mem;
 528 | 				break;
 529 | 			case '+':
 530 | 				if ((prev->type != CTokenINT && prev->type != CTokenIDENT) ||
 531 | 					(next->type != CTokenINT && next->type != CTokenIDENT)) {
 532 | 					cparser_error(parser, 
 533 | 						"Syntax error '+' in memory reference", 19);
 534 | 					return mem;
 535 | 				}
 536 | 				break;
 537 | 			case '-':
 538 | 				if ((prev->type != CTokenINT && prev->type != CTokenIDENT &&
 539 | 					ctoken_get_char(prev) != '[') ||
 540 | 					next->type != CTokenINT) {
 541 | 					cparser_error(parser, 
 542 | 						"Syntax error '-' in memory reference", 20);
 543 | 					return mem;
 544 | 				}
 545 | 				break;
 546 | 			case '*':
 547 | 				if ((prev->type != CTokenINT || next->type != CTokenIDENT) &&
 548 | 					(next->type != CTokenINT || prev->type != CTokenIDENT)) {
 549 | 					cparser_error(parser,
 550 | 						"Syntax error '*' in memory reference", 21);
 551 | 					return mem;
 552 | 				}
 553 | 				break;
 554 | 			default:
 555 | 				cparser_error(parser, 
 556 | 					"Unexpected punctuator in memory reference", 22);
 557 | 				return mem;
 558 | 				break;
 559 | 			}
 560 | 		}
 561 | 		else if (token->type == CTokenINT) {
 562 | 			int prevch = ctoken_get_char(prev);
 563 | 			int nextch = ctoken_get_char(next);
 564 | 			int value = ctoken_get_int(token);
 565 | 			if (prevch == '*' || nextch == '*') {
 566 | 				if (value == 1 || value == 2 || value == 4 || value == 8) {
 567 | 					csynth_encode_scale(&parser->synthesizer, value);
 568 | 				}	else {
 569 | 					cparser_error(parser, 
 570 | 						"Invalid scale in memory reference", 23);
 571 | 					return mem;
 572 | 				}
 573 | 			}
 574 | 			else if (prevch == '-') {
 575 | 				csynth_encode_displacement(&parser->synthesizer, -value);
 576 | 			}
 577 | 			else if (prevch == '+' || nextch == '+') {
 578 | 				csynth_encode_displacement(&parser->synthesizer, value);
 579 | 			}
 580 | 			else if (prevch == '[' && nextch == ']') {
 581 | 				cparser_error(parser,
 582 | 					"Invalid number in memory reference", 30);
 583 | 				return mem;
 584 | 			}
 585 | 			else {
 586 | 				cparser_error(parser, 
 587 | 					"Invalid number in memory reference", 24);
 588 | 				return mem;
 589 | 			}
 590 | 		}
 591 | 		else {
 592 | 			cparser_error(parser, 
 593 | 				"Unexpected token in memory reference", 25);
 594 | 			return mem;
 595 | 		}
 596 | 	}
 597 | 
 598 | 	cparser_error(parser, "Unexpected end of line in memory reference", 26);
 599 | 
 600 | 	return mem;
 601 | }
 602 | 
 603 | 
 604 | static int cparser_parse_data(CParser *parser)
 605 | {
 606 | 	const char *name;
 607 | 	long pos = 0;
 608 | 	int size = -1;
 609 | 
 610 | 	if (cscanner_is_ident(parser->token) == 0) {
 611 | 		return 0;
 612 | 	}
 613 | 
 614 | 	name = cscanner_get_string(parser->token);
 615 | 
 616 | 	if (stricmp(name, "DB") == 0) size = 1;
 617 | 	else if (stricmp(name, "DW") == 0) size = 2;
 618 | 	else if (stricmp(name, "DD") == 0) size = 4;
 619 | 
 620 | 	if (size < 0) return 0;
 621 | 
 622 | 	cscanner_token_advance(parser->token, 1);
 623 | 
 624 | 	for (pos = 0; ; ) {
 625 | 		unsigned char *ptr = (unsigned char*)parser->data;
 626 | 		const CTOKEN *token;
 627 | 
 628 | 		if (cscanner_is_endl(parser->token)) break;
 629 | 
 630 | 		token = cscanner_token_current(parser->token);
 631 | 
 632 | 		if (token->type == CTokenINT) {
 633 | 			cuint32 value = (cuint32)token->intval;
 634 | 			if (pos + size >= IMAX_DATA) {
 635 | 				cparser_error(parser, "data too long", 41);
 636 | 				return -1;
 637 | 			}
 638 | 			if (size == 1) {
 639 | 				ptr[pos++] = (unsigned char)((value >>  0) & 0xff);
 640 | 			}
 641 | 			else if (size == 2) {
 642 | 				ptr[pos++] = (unsigned char)((value >>  0) & 0xff);
 643 | 				ptr[pos++] = (unsigned char)((value >>  8) & 0xff);
 644 | 			}
 645 | 			else if (size == 4) {
 646 | 				ptr[pos++] = (unsigned char)((value >>  0) & 0xff);
 647 | 				ptr[pos++] = (unsigned char)((value >>  8) & 0xff);
 648 | 				ptr[pos++] = (unsigned char)((value >> 16) & 0xff);
 649 | 				ptr[pos++] = (unsigned char)((value >> 24) & 0xff);
 650 | 			}
 651 | 		}
 652 | 		else if (token->type == CTokenSTR) {
 653 | 			const char *text = token->str;
 654 | 			long size, i, c;
 655 | 			char hex[3];
 656 | 			size = (long)strlen(text);
 657 | 			for (i = 0; i < size; ) {
 658 | 				if (i + 1 >= IMAX_DATA) {
 659 | 					cparser_error(parser, "data too long", 41);
 660 | 					return -2;
 661 | 				}
 662 | 				if (text[i] == '\\') {
 663 | 					switch (text[i + 1])
 664 | 					{
 665 | 					case '\\': ptr[pos++] = '\\'; i += 2; break;
 666 | 					case 'n' : ptr[pos++] = '\n'; i += 2; break;
 667 | 					case 'r' : ptr[pos++] = '\r'; i += 2; break;
 668 | 					case 't' : ptr[pos++] = '\t'; i += 2; break;
 669 | 					case '0' : ptr[pos++] = '\0'; i += 2; break;
 670 | 					case '?' : ptr[pos++] = '?'; i += 2; break;
 671 | 					case '\'': ptr[pos++] = '\''; i += 2; break;
 672 | 					case '\"': ptr[pos++] = '\"'; i += 2; break;
 673 | 					case 'a' : ptr[pos++] = '\a'; i += 2; break;
 674 | 					case 'b' : ptr[pos++] = '\b'; i += 2; break;
 675 | 					case 'f' : ptr[pos++] = '\f'; i += 2; break;
 676 | 					case 'v' : ptr[pos++] = '\v'; i += 2; break;
 677 | 					case 'x' :
 678 | 						i += 2;
 679 | 						hex[0] = text[i++];
 680 | 						hex[1] = text[i++];
 681 | 						hex[2] = 0;
 682 | 						c = strtol(hex, NULL, 16);
 683 | 						ptr[pos++] = (unsigned char)(c & 255);
 684 | 						break;
 685 | 					default:
 686 | 						cparser_error(parser, "string format error", 42);
 687 | 						return -3;
 688 | 						break;
 689 | 					}
 690 | 				}
 691 | 				else if (text[i] == '\'') {
 692 | 					if (text[i + 1] == '\'') {
 693 | 						ptr[pos++] = '\'';
 694 | 						i += 2;
 695 | 					}	else {
 696 | 						ptr[pos++] = '\'';
 697 | 						i += 1;
 698 | 					}
 699 | 				}
 700 | 				else if (text[i] == '\"') {
 701 | 					if (text[i + 1] == '\"') {
 702 | 						ptr[pos++] = '\"';
 703 | 						i += 2;
 704 | 					}	else {
 705 | 						ptr[pos++] = '\"';
 706 | 						i += 1;
 707 | 					}
 708 | 				}
 709 | 				else {
 710 | 					ptr[pos++] = (unsigned char)text[i++];
 711 | 				}
 712 | 			}
 713 | 		}
 714 | 		else {
 715 | 			cparser_error(parser, "unrecongnize data", 43);
 716 | 			return -4;
 717 | 		}
 718 | 
 719 | 		cscanner_token_advance(parser->token, 1);
 720 | 
 721 | 		if (!cscanner_is_endl(parser->token)) {
 722 | 			if (cscanner_get_char(parser->token) != ',') {
 723 | 				cparser_error(parser, "expected comma", 40);
 724 | 				return -3;
 725 | 			}
 726 | 			cscanner_token_advance(parser->token, 1);
 727 | 		}
 728 | 	}
 729 | 
 730 | 	if (pos > 0) {
 731 | 		cencoding_set_data(&parser->synthesizer.encoding, parser->data, pos);
 732 | 	}
 733 | 
 734 | 	return 0;
 735 | }
 736 | 
 737 | 
 738 | static int cparser_parse_prefix(CParser *parser)
 739 | {
 740 | 	const char *name;
 741 | 
 742 | 	if (cscanner_is_ident(parser->token) == 0) {
 743 | 		return 0;
 744 | 	}
 745 | 
 746 | 	name = cscanner_get_string(parser->token);
 747 | 
 748 | 	if (stricmp(name, "REP") == 0 ||
 749 | 		stricmp(name, "REPE") == 0 ||
 750 | 		stricmp(name, "REPZ") == 0) {
 751 | 		if (csynth_encode_prefix(&parser->synthesizer, 0xf3)) {
 752 | 			cparser_error(parser, parser->synthesizer.error, 70);
 753 | 			return -1;
 754 | 		}
 755 | 		cscanner_token_advance(parser->token, 1);
 756 | 	}
 757 | 	else if (stricmp(name, "REPNE") == 0 || stricmp(name, "REPNZ") == 0) {
 758 | 		parser->synthesizer.prefix = 0xf2;
 759 | 		if (csynth_encode_prefix(&parser->synthesizer, 0xf2)) {
 760 | 			cparser_error(parser, parser->synthesizer.error, 71);
 761 | 			return -2;
 762 | 		}
 763 | 		cscanner_token_advance(parser->token, 1);
 764 | 	}
 765 | 	else if (stricmp(name, "LOCK") == 0) {
 766 | 		if (csynth_encode_prefix(&parser->synthesizer, 0xf0)) {
 767 | 			cparser_error(parser, parser->synthesizer.error, 72);
 768 | 			return -3;
 769 | 		}
 770 | 		cscanner_token_advance(parser->token, 1);
 771 | 	}
 772 | 
 773 | 	return 0;
 774 | }
 775 | 
 776 | static int cparser_parse_align(CParser *parser)
 777 | {
 778 | 	const char *name;
 779 | 
 780 | 	if (cscanner_is_ident(parser->token) == 0) {
 781 | 		return 0;
 782 | 	}
 783 | 
 784 | 	name = cscanner_get_string(parser->token);
 785 | 	
 786 | 	if (stricmp(name, "ALIGN") == 0) {
 787 | 		int align = 4;
 788 | 		cscanner_token_advance(parser->token, 1);
 789 | 		if (cscanner_is_int(parser->token)) {
 790 | 			align = cscanner_get_value(parser->token);
 791 | 		}
 792 | 		while (!cscanner_is_endf(parser->token)) {
 793 | 			cscanner_token_advance(parser->token, 1);
 794 | 		}
 795 | 		if (align < 1) {
 796 | 			cparser_error(parser, "error align size", 80);
 797 | 			return -1;
 798 | 		}
 799 | 		parser->synthesizer.encoding.align = align;
 800 | 	}
 801 | 
 802 | 	return 0;
 803 | }
 804 | 
 805 | static int cparser_parse_size(CParser *parser)
 806 | {
 807 | 	const CTOKEN *token = cscanner_token_current(parser->token);
 808 | 	cscanner_token_advance(parser->token, 1);
 809 | 	if (token->type == CTokenIDENT) {
 810 | 		if (stricmp(token->str, "BYTE") == 0) return 1;
 811 | 		if (stricmp(token->str, "CHAR") == 0) return 1;
 812 | 		if (stricmp(token->str, "INT8") == 0) return 1;
 813 | 		if (stricmp(token->str, "UINT8") == 0) return 1;
 814 | 		if (stricmp(token->str, "WORD") == 0) return 2;
 815 | 		if (stricmp(token->str, "SHORT") == 0) return 2;
 816 | 		if (stricmp(token->str, "USHORT") == 0) return 2;
 817 | 		if (stricmp(token->str, "INT16") == 0) return 2;
 818 | 		if (stricmp(token->str, "UINT16") == 0) return 2;
 819 | 		if (stricmp(token->str, "DWORD") == 0) return 4;
 820 | 		if (stricmp(token->str, "INT") == 0) return 4;
 821 | 		if (stricmp(token->str, "UINT") == 0) return 4;
 822 | 		if (stricmp(token->str, "LONG") == 0) return 4;
 823 | 		if (stricmp(token->str, "ULONG") == 0) return 4;
 824 | 		if (stricmp(token->str, "INT32") == 0) return 4;
 825 | 		if (stricmp(token->str, "UINT32") == 0) return 4;
 826 | 	}
 827 | 	return 0;
 828 | }
 829 | 
 830 | static int cparser_parse_newvar(CParser *parser, const char *name, int stack)
 831 | {
 832 | 	char *macro = (char*)parser->data;
 833 | 	CVariable *var;
 834 | 
 835 | 	if (stricmp(name, "RET") == 0) {
 836 | 		sprintf(macro, "'%s' conflicted with keyword", name);
 837 | 		cparser_error(parser, macro, 96);
 838 | 		return -1;
 839 | 	}
 840 | 
 841 | 	for (var = parser->vars; var; var = var->next) {
 842 | 		if (strcmp(var->name, name) == 0) {
 843 | 			sprintf(macro, "'%s' redefined", name);
 844 | 			cparser_error(parser, macro, 97);
 845 | 			return -2;
 846 | 		}
 847 | 	}
 848 | 
 849 | 	if (stack >= 0) sprintf(macro, "[EBP + %d]", stack);
 850 | 	else sprintf(macro, "[EBP - %d]", -stack);
 851 | 
 852 | 	if (cscanner_macro_set(parser->token, name, macro)) {
 853 | 		sprintf(macro, "name '%s' redefined", name);
 854 | 		cparser_error(parser, macro, 95);
 855 | 		return -3;
 856 | 	}
 857 | 
 858 | 	var = (CVariable*)malloc(sizeof(CVariable));
 859 | 	assert(var);
 860 | 	var->name = strdup(name);
 861 | 	assert(var->name);
 862 | 	var->pos = stack;
 863 | 
 864 | 	var->next = parser->vars;
 865 | 	parser->vars = var;
 866 | 
 867 | 	return 0;
 868 | }
 869 | 
 870 | static int cparser_parse_proc(CParser *parser)
 871 | {
 872 | 	unsigned char instruction[20];
 873 | 	const char *name;
 874 | 
 875 | 	if (cscanner_is_ident(parser->token) == 0) {
 876 | 		return 0;
 877 | 	}
 878 | 
 879 | 	name = cscanner_get_string(parser->token);
 880 | 
 881 | 	if (stricmp(name, "PROC") == 0) {
 882 | 		const char *replace = "DB 0x8B, 0xE5, 0x5D, 0xC3\n";
 883 | 		int stack = 8;
 884 | 
 885 | 		if (parser->inproc) {
 886 | 			cparser_error(parser, "cannot define proc in a proc block", 90);
 887 | 			return -1;
 888 | 		}
 889 | 		parser->inproc = 1;
 890 | 		parser->stack = 0;
 891 | 
 892 | 		// replace ret to "mov esp, ebp; pop ebp; ret"
 893 | 		cscanner_macro_set(parser->token, "ret", replace);
 894 | 		cscanner_macro_set(parser->token, "RET", replace);
 895 | 		cscanner_macro_set(parser->token, "Ret", replace);
 896 | 		cscanner_macro_set(parser->token, "rEt", replace);
 897 | 		cscanner_macro_set(parser->token, "reT", replace);
 898 | 		cscanner_macro_set(parser->token, "rET", replace);
 899 | 		cscanner_macro_set(parser->token, "ReT", replace);
 900 | 		cscanner_macro_set(parser->token, "REt", replace);
 901 | 
 902 | 		cscanner_token_advance(parser->token, 1);
 903 | 
 904 | 		for (stack = 8; !cscanner_is_endl(parser->token); ) {
 905 | 			const CTOKEN *token = cscanner_token_current(parser->token);
 906 | 			const CTOKEN *next = cscanner_token_lookahead(parser->token);
 907 | 			char *macro = (char*)parser->data;
 908 | 			if (ctoken_get_char(token) == ',') {
 909 | 				cscanner_token_advance(parser->token, 1);
 910 | 			}
 911 | 			else if (token->type == CTokenIDENT && next->ch == ':') {
 912 | 				int size;
 913 | 				cscanner_token_advance(parser->token, 2);
 914 | 				size = cparser_parse_size(parser);
 915 | 				if (size == 0) {
 916 | 					cparser_error(parser, "variable type unknown", 93);
 917 | 					return -1;
 918 | 				}
 919 | 				if (cparser_parse_newvar(parser, token->str, stack)) {
 920 | 					return -4;
 921 | 				}
 922 | 				stack += size;
 923 | 			}
 924 | 			else {
 925 | 				if (token->type == CTokenIDENT) {
 926 | 					sprintf(macro, "parameter '%s' error", token->str);
 927 | 				}	else {
 928 | 					sprintf(macro, "parameter error");
 929 | 				}
 930 | 				cparser_error(parser, macro, 93);
 931 | 				return -3;
 932 | 			}
 933 | 		}
 934 | 
 935 | 		instruction[0] = 0x55;		// push ebp
 936 | 		instruction[1] = 0x8B;		// mov ebp, esp
 937 | 		instruction[2] = 0xEC;
 938 | 
 939 | 		cencoding_set_data(&parser->synthesizer.encoding, instruction, 3);
 940 | 	}
 941 | 	else if (stricmp(name, "LOCAL") == 0) {
 942 | 		int localsize = 0;
 943 | 		int IS;
 944 | 
 945 | 		cscanner_token_advance(parser->token, 1);
 946 | 
 947 | 		if (parser->inproc == 0) {
 948 | 			cparser_error(parser, "local is forbbiden outside a proc", 90);
 949 | 			return -5;
 950 | 		}
 951 | 
 952 | 		for (; !cscanner_is_endl(parser->token); ) {
 953 | 			const CTOKEN *token = cscanner_token_current(parser->token);
 954 | 			const CTOKEN *next = cscanner_token_lookahead(parser->token);
 955 | 			char *macro = (char*)parser->data;
 956 | 			if (ctoken_get_char(token) == ',') {
 957 | 				cscanner_token_advance(parser->token, 1);
 958 | 			}
 959 | 			else if (token->type == CTokenIDENT && next->ch == ':') {
 960 | 				int pos, size;
 961 | 				cscanner_token_advance(parser->token, 2);
 962 | 				size = cparser_parse_size(parser);
 963 | 				if (size == 0) {
 964 | 					cparser_error(parser, "variable type unknown", 93);
 965 | 					return -1;
 966 | 				}
 967 | 				pos = -(parser->stack + size);
 968 | 				if (cparser_parse_newvar(parser, token->str, pos)) {
 969 | 					return -6;
 970 | 				}
 971 | 				parser->stack += size;
 972 | 				localsize += size;
 973 | 				//printf("LOCAL %s=[EBP+(%d)]\n", token->str, pos);
 974 | 			}
 975 | 			else {
 976 | 				if (token->type == CTokenIDENT) {
 977 | 					sprintf(macro, "parameter '%s' error", token->str);
 978 | 				}	else {
 979 | 					sprintf(macro, "parameter error");
 980 | 				}
 981 | 				cparser_error(parser, macro, 92);
 982 | 				return -3;
 983 | 			}
 984 | 		}
 985 | 
 986 | 		if (localsize <= 127) {
 987 | 			instruction[0] = 0x83;		// sub esp, imm8
 988 | 			instruction[1] = 0xEC;
 989 | 			instruction[2] = (unsigned char)(localsize & 0xff);
 990 | 			IS = 3;
 991 | 		}	else {
 992 | 			instruction[0] = 0x81;		// sub esp, imm32
 993 | 			instruction[1] = 0xEC;
 994 | 			instruction[2] = (unsigned char)((localsize >>  0) & 0xff);
 995 | 			instruction[3] = (unsigned char)((localsize >>  8) & 0xff);
 996 | 			instruction[4] = (unsigned char)((localsize >> 16) & 0xff);
 997 | 			instruction[5] = (unsigned char)((localsize >> 24) & 0xff);
 998 | 			IS = 6;
 999 | 		}
1000 | 
1001 | 		cencoding_set_data(&parser->synthesizer.encoding, instruction, IS);
1002 | 	}
1003 | 	else if (stricmp(name, "ENDP") == 0) {
1004 | 		if (parser->inproc == 0) {
1005 | 			cparser_error(parser, "not find proc definition", 91);
1006 | 			return -2;
1007 | 		}
1008 | 		parser->inproc = 0;
1009 | 		parser->stack = 0;
1010 | 		while (parser->vars) {
1011 | 			CVariable *var = parser->vars;
1012 | 			parser->vars = parser->vars->next;
1013 | 			cscanner_macro_del(parser->token, var->name);
1014 | 			free(var->name);
1015 | 			free(var);
1016 | 		}
1017 | 		cscanner_macro_del(parser->token, "ret");
1018 | 		cscanner_macro_del(parser->token, "RET");
1019 | 		cscanner_macro_del(parser->token, "Ret");
1020 | 		cscanner_macro_del(parser->token, "rEt");
1021 | 		cscanner_macro_del(parser->token, "reT");
1022 | 		cscanner_macro_del(parser->token, "rET");
1023 | 		cscanner_macro_del(parser->token, "ReT");
1024 | 		cscanner_macro_del(parser->token, "REt");
1025 | 	}
1026 | 	else {
1027 | 		return 0;
1028 | 	}
1029 | 
1030 | 	while (!cscanner_is_endf(parser->token)) {
1031 | 		cscanner_token_advance(parser->token, 1);
1032 | 	}
1033 | 	
1034 | 	return 0;
1035 | }
1036 | 
1037 | 


--------------------------------------------------------------------------------
/source/cparser.h:
--------------------------------------------------------------------------------
 1 | //=====================================================================
 2 | //
 3 | // cparser.h - source parser
 4 | //
 5 | // NOTE:
 6 | // for more information, please see the readme file.
 7 | //
 8 | //=====================================================================
 9 | #ifndef __CPARSER_H__
10 | #define __CPARSER_H__
11 | 
12 | #include "csynthesis.h"
13 | #include "cinstset.h"
14 | #include "cscanner.h"
15 | 
16 | 
17 | //---------------------------------------------------------------------
18 | // CVariable
19 | //---------------------------------------------------------------------
20 | struct CVariable
21 | {
22 | 	char *name;
23 | 	int pos;
24 | 	struct CVariable *next;
25 | };
26 | 
27 | typedef struct CVariable CVariable;
28 | 
29 | 
30 | //---------------------------------------------------------------------
31 | // CParser
32 | //---------------------------------------------------------------------
33 | struct CParser
34 | {
35 | 	char *data;
36 | 	char *error;
37 | 	int errcode;
38 | 	int inproc;
39 | 	int stack;
40 | 	CScanner *token;
41 | 	CVariable *vars;
42 | 	CInstruction *instruction;
43 | 	CInstructionSet *instructionset;
44 | 	CSynthesizer synthesizer;
45 | };
46 | 
47 | typedef struct CParser CParser;
48 | 
49 | 
50 | #ifdef __cplusplus
51 | extern "C" {
52 | #endif
53 | //---------------------------------------------------------------------
54 | // interfaces
55 | //---------------------------------------------------------------------
56 | CParser *cparser_create(void);
57 | void cparser_release(CParser *parser);
58 | 
59 | void cparser_reset(CParser *parser);
60 | 
61 | const CEncoding *cparser_parse_line(CParser *parser, const char *source);
62 | 
63 | 
64 | #ifdef __cplusplus
65 | }
66 | #endif
67 | 
68 | 
69 | #endif
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/source/cscanner.c:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // cscanner.c - source scanner
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #include "cscanner.h"
 10 | 
 11 | #ifdef _MSC_VER
 12 | #pragma warning(disable: 4996)
 13 | #endif
 14 | 
 15 | //---------------------------------------------------------------------
 16 | // compatible
 17 | //---------------------------------------------------------------------
 18 | int cstricmp(const char *dst, const char *src) {
 19 | 	int ch1, ch2;
 20 | 	do {
 21 | 		if ( ((ch1 = (unsigned char)(*(dst++))) >= 'A') && (ch1 <= 'Z') )
 22 | 			ch1 += 0x20;
 23 | 		if ( ((ch2 = (unsigned char)(*(src++))) >= 'A') && (ch2 <= 'Z') )
 24 | 			ch2 += 0x20;
 25 | 	}	while ( ch1 && (ch1 == ch2) );
 26 | 	return(ch1 - ch2);
 27 | }
 28 | 
 29 | 
 30 | //---------------------------------------------------------------------
 31 | // Token Reader
 32 | //---------------------------------------------------------------------
 33 | CTokenReader *ctoken_reader_create(int (*readch)(void*), void *fp)
 34 | {
 35 | 	CTokenReader *reader;
 36 | 	reader = (CTokenReader*)malloc(sizeof(CTokenReader));
 37 | 	assert(reader);
 38 | 	reader->readch = readch;
 39 | 	reader->fp = fp;
 40 | 	reader->ch = ' ';
 41 | 	reader->unch = -1;
 42 | 	reader->saved = -1;
 43 | 	reader->lineno = 1;
 44 | 	reader->colno = 0;
 45 | 	reader->state = 0;
 46 | 	reader->buffer = (char*)malloc(CMAX_IDENT * 2);
 47 | 	assert(reader->buffer);
 48 | 	reader->error = (char*)malloc(8192);
 49 | 	assert(reader->error);
 50 | 	reader->pos = 0;
 51 | 	reader->keywords = NULL;
 52 | 	reader->eof = 0;
 53 | 	reader->error[0] = 0;
 54 | 	reader->errcode = 0;
 55 | 	return reader;
 56 | }
 57 | 
 58 | void ctoken_reader_release(CTokenReader *reader)
 59 | {
 60 | 	assert(reader);
 61 | 	if (reader->buffer) {
 62 | 		free(reader->buffer);
 63 | 		reader->buffer = NULL;
 64 | 	}
 65 | 	if (reader->error) {
 66 | 		free(reader->error);
 67 | 		reader->error = NULL;
 68 | 	}
 69 | 	free(reader);
 70 | }
 71 | 
 72 | int ctoken_reader_getch(CTokenReader *reader)
 73 | {
 74 | 	assert(reader);
 75 | 	if (reader->unch >= 0) {
 76 | 		reader->ch = reader->unch;
 77 | 		reader->unch = -1;
 78 | 	}	else {
 79 | 		reader->saved = reader->ch;
 80 | 		reader->ch = reader->readch(reader->fp);
 81 | 		if (reader->ch == '\n') reader->lineno++, reader->colno = 1;
 82 | 		else if (reader->ch >= 0) reader->colno++;
 83 | 	}
 84 | 	return reader->ch;
 85 | }
 86 | 
 87 | static int ctoken_reader_ungetch(CTokenReader *reader, int ch)
 88 | {
 89 | 	assert(reader->unch < 0);
 90 | 	reader->unch = ch;
 91 | 	return 0;
 92 | }
 93 | 
 94 | static int ctoken_reader_skip_space(CTokenReader *reader)
 95 | {
 96 | 	while (1) {
 97 | 		int skip = 0;
 98 | 		for (; isspace(reader->ch) && reader->ch != '\n'; skip++)
 99 | 			ctoken_reader_getch(reader);
100 | 		if (reader->ch == ';' || reader->ch == '#') {
101 | 			skip++;
102 | 			while (reader->ch != '\n' && reader->ch >= 0) {
103 | 				ctoken_reader_getch(reader);
104 | 				skip++;
105 | 			}
106 | 		}	
107 | 		else if (reader->ch == '/') {
108 | 			ctoken_reader_getch(reader);
109 | 			if (reader->ch == '/') {
110 | 				skip++;
111 | 				while (reader->ch != '\n' && reader->ch >= 0) {
112 | 					ctoken_reader_getch(reader);
113 | 					skip++;
114 | 				}
115 | 			}	else {
116 | 				ctoken_reader_ungetch(reader, reader->ch);
117 | 				reader->ch = '/';
118 | 			}
119 | 		}
120 | 		if (skip == 0) break;
121 | 	}
122 | 	return 0;
123 | }
124 | 
125 | static CTOKEN *ctoken_reader_read_string(CTokenReader *reader, int *state)
126 | {
127 | 	CTOKEN *token = NULL;
128 | 
129 | 	if (reader->ch == '\'' || reader->ch == '\"') {
130 | 		int mode = (reader->ch == '\"')? 0 : 1;
131 | 		reader->pos = 0;
132 | 		while (1) {
133 | 			int ch = ctoken_reader_getch(reader);
134 | 			if (ch == '\\') {
135 | 				ctoken_reader_getch(reader);
136 | 				reader->buffer[reader->pos++] = '\\';
137 | 				reader->buffer[reader->pos++] = (char)reader->ch;
138 | 			}	
139 | 			else if (mode == 0 && ch == '\'') {
140 | 				reader->buffer[reader->pos++] = '\'';
141 | 			}
142 | 			else if (mode == 1 && ch == '\"') {
143 | 				reader->buffer[reader->pos++] = '\"';
144 | 			}
145 | 			else if (mode == 0 && ch == '\"') {
146 | 				ch = ctoken_reader_getch(reader);
147 | 				if (ch == '\"') {
148 | 					reader->buffer[reader->pos++] = '\"';
149 | 					reader->buffer[reader->pos++] = '\"';
150 | 				}	else {
151 | 					*state = 1;
152 | 					reader->buffer[reader->pos] = 0;
153 | 					token = ctoken_new_string(reader->buffer);
154 | 					break;
155 | 				}
156 | 			}
157 | 			else if (mode == 1 && ch == '\'') {
158 | 				ch = ctoken_reader_getch(reader);
159 | 				if (ch == '\'') {
160 | 					reader->buffer[reader->pos++] = '\'';
161 | 					reader->buffer[reader->pos++] = '\'';
162 | 				}	else {
163 | 					*state = 1;
164 | 					reader->buffer[reader->pos] = 0;
165 | 					token = ctoken_new_string(reader->buffer);
166 | 					break;
167 | 				}
168 | 			}
169 | 			else if (ch == '\n') {
170 | 				*state = -1;
171 | 				break;
172 | 			}
173 | 			else if (ch >= 0) {
174 | 				reader->buffer[reader->pos++] = (char)ch;
175 | 			}
176 | 			else {			// ch < 0
177 | 				*state = -2;
178 | 				break;
179 | 			}
180 | 
181 | 			if (reader->pos >= CMAX_IDENT) {
182 | 				*state = -3;
183 | 				strncpy(reader->error, "string too long", 64);
184 | 				break;
185 | 			}
186 | 		}
187 | 	}
188 | 
189 | 	return token;
190 | }
191 | 
192 | static CTOKEN *ctoken_reader_read_number(CTokenReader *reader, int *state)
193 | {
194 | 	int lineno = reader->lineno;
195 | 	char *text = reader->buffer;
196 | 	CTOKEN *token;
197 | 	int ec1, ec2, pos;
198 | 	long value;
199 | 
200 | 	if (reader->ch < '0' || reader->ch > '9') {
201 | 		*state = 0;
202 | 		return NULL;
203 | 	}
204 | 
205 | 	for (reader->pos = 0; isalnum(reader->ch) || reader->ch == '.'; ) {
206 | 		reader->buffer[reader->pos++] = (char)reader->ch;
207 | 		ctoken_reader_getch(reader);
208 | 		if (reader->pos >= CMAX_IDENT) {
209 | 			strncpy(reader->error, "number too long", 64);
210 | 			*state = -1;
211 | 			reader->errcode = 1;
212 | 			return NULL;
213 | 		}
214 | 	}
215 | 
216 | 	reader->buffer[reader->pos] = 0;
217 | 	for (pos = reader->pos; pos > 0; pos--) {
218 | 		if (isdigit(text[pos - 1]) || text[pos - 1] == '.') {
219 | 			break;
220 | 		}
221 | 		else if (text[pos - 1] >= 'a' && text[pos - 1] <= 'f') {
222 | 			break;
223 | 		}
224 | 		else if (text[pos - 1] >= 'A' && text[pos - 1] <= 'F') {
225 | 			break;
226 | 		}
227 | 	}
228 | 
229 | 	if (reader->pos - pos > 2) {
230 | 		strncpy(reader->error, "number format error", 64);
231 | 		*state = -2;
232 | 		reader->errcode = 2;
233 | 		return NULL;
234 | 	}
235 | 
236 | 	if (reader->pos - pos == 2) ec1 = text[pos], ec2 = text[pos + 1];
237 | 	else if (reader->pos - pos == 1) ec1 = text[pos], ec2 = 0;
238 | 	else ec1 = ec2 = 0;
239 | 	text[pos] = 0;
240 | 	
241 | 	// hex
242 | 	if (text[0] == '0' && (text[1] == 'x' || text[1] == 'X')) {
243 | 		value = (long)strtoul(text + 2, NULL, 16);
244 | 		token = ctoken_new_int(value);
245 | 	}	// hex
246 | 	else if (ec1 == 'h' && ec2 == 0) {
247 | 		value = (long)strtoul(text, NULL, 16);
248 | 		token = ctoken_new_int(value);
249 | 	}	// binary
250 | 	else if (ec1 == 'b' && ec2 == 0) {
251 | 		value = (long)strtoul(text, NULL, 2);
252 | 		token = ctoken_new_int(value);
253 | 	}	// octal
254 | 	else if (ec1 == 'q' && ec2 == 0) {
255 | 		value = (long)strtol(text, NULL, 8);
256 | 		token = ctoken_new_int(value);
257 | 	}	// decimal or float
258 | 	else {
259 | 		int decimal = 1;
260 | 		int i;
261 | 		for (i = 0; text[i]; i++) 
262 | 			if (text[i] == '.') decimal = 0;
263 | 		if (decimal) {
264 | 			value = (long)strtoul(text, NULL, 10);
265 | 			token = ctoken_new_int(value);
266 | 		}	else {
267 | 			float ff;
268 | 			sscanf(text, "%f", &ff);
269 | 			token = ctoken_new_float(ff);
270 | 		}
271 | 	}
272 | 	token->lineno = lineno;
273 | 	*state = 0;
274 | 	return token;
275 | }
276 | 
277 | CTOKEN *ctoken_reader_read(CTokenReader *reader)
278 | {
279 | 	CTOKEN *token = NULL;
280 | 
281 | 	assert(reader);
282 | 
283 | 	// skip memo and space
284 | 	ctoken_reader_skip_space(reader);
285 | 
286 | 	// this is a endl
287 | 	if (reader->ch == '\n') {
288 | 		int lineno = reader->lineno - 1;
289 | 		token = ctoken_new_endl();
290 | 		token->lineno = lineno;
291 | 		ctoken_reader_getch(reader);
292 | 		return token;
293 | 	}
294 | 
295 | 	// this is a endf
296 | 	if (reader->ch < 0) {
297 | 		if (reader->eof++) return NULL;
298 | 		token = ctoken_new_endf();
299 | 		token->lineno = reader->lineno;
300 | 		return token;
301 | 	}
302 | 
303 | 	// this is a string 
304 | 	if (reader->ch == '\'' || reader->ch == '\"') {
305 | 		int lineno = reader->lineno;
306 | 		int state;
307 | 		token = ctoken_reader_read_string(reader, &state);
308 | 		if (state < 0) {
309 | 			strncpy(reader->error, "expected closing quotation mark", 100);
310 | 			reader->errcode = 3;
311 | 			return NULL;
312 | 		}
313 | 		token->lineno = lineno;
314 | 		return token;
315 | 	}
316 | 
317 | 	#define issym2f(c) ((c) == '_' || isalpha(c) || (c) == '$' || (c) == '@')
318 | 	#define issym2x(c) ((c) == '_' || isalnum(c) || (c) == '$' || (c) == '@')
319 | 
320 | 	// this is a identity or a keyword
321 | 	if (issym2f(reader->ch)) {
322 | 		int lineno = reader->lineno;
323 | 		for (reader->pos = 0; issym2x(reader->ch); ) {
324 | 			reader->buffer[reader->pos++] = (char)reader->ch;
325 | 			ctoken_reader_getch(reader);
326 | 			if (reader->pos >= CMAX_IDENT) {
327 | 				strncpy(reader->error, "ident too long", 100);
328 | 				reader->errcode = 4;
329 | 				return NULL;
330 | 			}
331 | 		}
332 | 		reader->buffer[reader->pos] = 0;
333 | 		if (reader->keywords) {
334 | 			int i;
335 | 			for (i = 0; reader->keywords[i]; i++) {
336 | 				if (stricmp(reader->buffer, reader->keywords[i]) == 0) {
337 | 					token = ctoken_new_keyword(i);
338 | 					token->lineno = lineno;
339 | 					return token;
340 | 				}
341 | 			}
342 | 		}
343 | 		token = ctoken_new_ident(reader->buffer);
344 | 		token->lineno = lineno;
345 | 		//printf("{%s:%d:%d}\n", token->str, token->lineno, reader->lineno);
346 | 		return token;
347 | 	}
348 | 
349 | 	#undef issym2f
350 | 	#undef issym2x
351 | 
352 | 	// this is a number
353 | 	if (reader->ch >= '0' && reader->ch <= '9') {
354 | 		int lineno = reader->lineno;
355 | 		int state;
356 | 		//printf("number\n");
357 | 		token = ctoken_reader_read_number(reader, &state);
358 | 		if (state < 0) {
359 | 			strncpy(reader->error, "number format error", 100);
360 | 			reader->errcode = 5;
361 | 			return NULL;
362 | 		}
363 | 		token->lineno = lineno;
364 | 		return token;
365 | 	}
366 | 
367 | 	// operators
368 | 	token = ctoken_new_operator(reader->ch);
369 | 	assert(token);
370 | 	token->lineno = reader->lineno;
371 | 	ctoken_reader_getch(reader);
372 | 
373 | 	return token;
374 | }
375 | 
376 | 
377 | //---------------------------------------------------------------------
378 | // token stream
379 | //---------------------------------------------------------------------
380 | static int ctoken_stream_text_getch(void *fp) {
381 | 	char **ptr = (char**)fp;
382 | 	if (**ptr == 0) return -1;
383 | 	return *((*ptr)++);
384 | }
385 | 
386 | static void ctoken_stream_free(CTOKEN *root) {
387 | 	assert(root);
388 | 	while (!ctoken_list_is_empty(root)) {
389 | 		CTOKEN *token = root->next;
390 | 		ctoken_list_del(token);
391 | 		ctoken_delete(token);
392 | 	}
393 | 	ctoken_delete(root);
394 | }
395 | 
396 | static CTOKEN *ctoken_stream_load(const char *text, char *error)
397 | {
398 | 	CTokenReader *reader;
399 | 	CTOKEN *root;
400 | 	char *string;
401 | 	int retval;
402 | 
403 | 	string = (char*)text;
404 | 	reader = ctoken_reader_create(ctoken_stream_text_getch, &string);
405 | 	assert(reader);
406 | 
407 | 	root = ctoken_new_endf();
408 | 	assert(root);
409 | 
410 | 	for (retval = 0, *error = 0; ; ) {
411 | 		CTOKEN *token;
412 | 		token = ctoken_reader_read(reader);
413 | 		if (token == NULL) {
414 | 			if (error) strncpy(error, reader->error, 100);
415 | 			retval = -1;
416 | 			break;
417 | 		}
418 | 		if (token->type == CTokenENDF) {
419 | 			break;
420 | 		}
421 | 		ctoken_list_add_tail(token, root);
422 | 	}
423 | 
424 | 	ctoken_reader_release(reader);
425 | 
426 | 	if (retval != 0) {
427 | 		ctoken_stream_free(root);
428 | 		return NULL;
429 | 	}
430 | 
431 | 	return root;
432 | }
433 | 
434 | 
435 | //---------------------------------------------------------------------
436 | // Scanner
437 | //---------------------------------------------------------------------
438 | CScanner *cscanner_create(void)
439 | {
440 | 	CScanner *scan;
441 | 	scan = (CScanner*)malloc(sizeof(CScanner));
442 | 	assert(scan);
443 | 	scan->root = NULL;
444 | 	scan->link = NULL;
445 | 	scan->source = NULL;
446 | 	scan->length = 0;
447 | 	scan->position = 0;
448 | 	scan->reader = NULL;
449 | 	scan->error = (char*)malloc(1024);
450 | 	assert(scan->error);
451 | 	scan->errcode = 0;
452 | 	scan->macros = NULL;
453 | 	scan->endf.type = CTokenENDF;
454 | 	scan->endf.lineno = 0;
455 | 	scan->endf.fileno = 0;
456 | 	scan->jmplabel = 0;
457 | 	return scan;
458 | }
459 | 
460 | static void cscanner_token_reset(CScanner *scan)
461 | {
462 | 	assert(scan);
463 | 	if (scan->root) {
464 | 		while (!ctoken_list_is_empty(scan->root)) {
465 | 			CTOKEN *token = scan->root->next;
466 | 			ctoken_list_del(token);
467 | 			ctoken_delete(token);
468 | 		}
469 | 		ctoken_delete(scan->root);
470 | 		scan->root = NULL;
471 | 		scan->link = NULL;
472 | 	}
473 | 	if (scan->reader) {
474 | 		ctoken_reader_release(scan->reader);
475 | 		scan->reader = NULL;
476 | 	}
477 | 	scan->source = NULL;
478 | 	scan->length = 0;
479 | 	scan->position = 0;
480 | 	scan->errcode = 0;
481 | }
482 | 
483 | void cscanner_macro_reset(CScanner *scan)
484 | {
485 | 	while (scan->macros) {
486 | 		CMacro *macro = scan->macros;
487 | 		scan->macros = scan->macros->next;
488 | 		free(macro->ident);
489 | 		free(macro->value);
490 | 		free(macro);
491 | 	}
492 | 	scan->jmplabel = 0;
493 | }
494 | 
495 | void cscanner_release(CScanner *scan)
496 | {
497 | 	cscanner_token_reset(scan);
498 | 	cscanner_macro_reset(scan);
499 | 	if (scan->error) {
500 | 		free(scan->error);
501 | 		scan->error = NULL;
502 | 	}
503 | 	free(scan);
504 | }
505 | 
506 | int cscanner_macro_set(CScanner *scan, const char *name, const char *value)
507 | {
508 | 	CMacro *macro;
509 | 
510 | 	for (macro = scan->macros; macro; macro = macro->next) {
511 | 		if (strcmp(macro->ident, name) == 0) {
512 | 			return -1;
513 | 		}
514 | 	}
515 | 
516 | 	macro = (CMacro*)malloc(sizeof(CMacro));
517 | 	assert(macro);
518 | 
519 | 	macro->ident = strdup(name);
520 | 	macro->value = strdup(value);
521 | 
522 | 	assert(macro->ident);
523 | 	assert(macro->value);
524 | 
525 | 	macro->next = scan->macros;
526 | 	scan->macros = macro;
527 | 
528 | 	return 0;
529 | }
530 | 
531 | int cscanner_macro_del(CScanner *scan, const char *name)
532 | {
533 | 	CMacro *macro;
534 | 	CMacro *prev;
535 | 
536 | 	for (macro = scan->macros, prev = NULL; macro; ) {
537 | 		if (strcmp(macro->ident, name) == 0) {
538 | 			break;
539 | 		}
540 | 		prev = macro;
541 | 		macro = macro->next;
542 | 	}
543 | 
544 | 	if (macro == NULL) {		// not find macro
545 | 		return -1;
546 | 	}
547 | 
548 | 	if (prev) prev->next = macro->next;
549 | 	else scan->macros = macro->next;
550 | 
551 | 	free(macro->ident);
552 | 	free(macro->value);
553 | 	free(macro);
554 | 
555 | 	return 0;
556 | }
557 | 
558 | const char *cscanner_macro_search(CScanner *scan, const char *name)
559 | {
560 | 	CMacro *macro;
561 | 	for (macro = scan->macros; macro; macro = macro->next) {
562 | 		if (strcmp(macro->ident, name) == 0) {
563 | 			return macro->value;
564 | 		}
565 | 	}
566 | 
567 | 	return NULL;
568 | }
569 | 
570 | static int cscanner_reader_getch(void *fp) 
571 | {
572 | 	CScanner *scan = (CScanner*)fp;
573 | 	if (scan->source == NULL) return -1;
574 | 	if (scan->position >= scan->length) return -1;
575 | 	return scan->source[scan->position++];
576 | }
577 | 
578 | int cscanner_set_source(CScanner *scan, const char *source)
579 | {
580 | 	int retval = 0;
581 | 
582 | 	cscanner_token_reset(scan);
583 | 	scan->source = source;
584 | 	scan->length = (int)strlen(source);
585 | 	scan->position = 0;
586 | 	scan->reader = ctoken_reader_create(cscanner_reader_getch, scan);
587 | 	
588 | 	scan->root = ctoken_new_endf();
589 | 	scan->link = scan->root;
590 | 	scan->error[0] = 0;
591 | 	scan->errcode = 0;
592 | 
593 | 	for (; ; ) {
594 | 		CTOKEN *token;
595 | 
596 | 		token = ctoken_reader_read(scan->reader);
597 | 
598 | 		if (token == NULL) {
599 | 			scan->lineno = scan->reader->lineno;
600 | 			scan->errcode = scan->reader->errcode;
601 | 			strncpy(scan->error, scan->reader->error, 80);
602 | 			retval = -1;
603 | 			break;
604 | 		}
605 | 
606 | 		if (token->type == CTokenIDENT) {
607 | 			const char *macro = cscanner_macro_search(scan, token->str);
608 | 			if (macro != NULL) {
609 | 				CTOKEN *ts = ctoken_stream_load(macro, scan->error);
610 | 				if (ts == NULL) {
611 | 					scan->lineno = scan->reader->lineno;
612 | 					scan->errcode = 88;
613 | 					retval = -2;
614 | 					break;
615 | 				}
616 | 				while (!ctoken_list_is_empty(ts)) {
617 | 					CTOKEN *next = ts->next;
618 | 					if (next->type == CTokenENDF) break;
619 | 					ctoken_list_del(next);
620 | 					next->lineno = scan->lineno;
621 | 					ctoken_list_add_tail(next, scan->root);
622 | 				}
623 | 				ctoken_stream_free(ts);
624 | 				continue;
625 | 			}	
626 | 			else {
627 | 				if (strcmp(token->str, "@@") == 0) {
628 | 					scan->jmplabel++;
629 | 					free(token->str);
630 | 					token->str = (char*)malloc(20);
631 | 					assert(token->str);
632 | 					sprintf(token->str, "@@%d", scan->jmplabel);
633 | 					//printf("label %d\n", scan->jmplabel);
634 | 				}
635 | 				else if (stricmp(token->str, "@b") == 0) {
636 | 					free(token->str);
637 | 					token->str = (char*)malloc(20);
638 | 					assert(token->str);
639 | 					sprintf(token->str, "@@%d", scan->jmplabel);
640 | 				}
641 | 				else if (stricmp(token->str, "@f") == 0) {
642 | 					free(token->str);
643 | 					token->str = (char*)malloc(20);
644 | 					assert(token->str);
645 | 					sprintf(token->str, "@@%d", scan->jmplabel + 1);
646 | 				}
647 | 			}
648 | 		}
649 | 
650 | 		ctoken_list_add_tail(token, scan->root);
651 | 		if (token->type == CTokenENDF) {
652 | 			break;
653 | 		}
654 | 	}
655 | 
656 | 	if (retval != 0) {
657 | 		cscanner_token_reset(scan);
658 | 		return retval;
659 | 	}
660 | 
661 | 	scan->link = scan->root->next;
662 | 	scan->root->lineno = scan->root->prev->lineno;
663 | 
664 | 	return 0;
665 | }
666 | 
667 | const CTOKEN *cscanner_token_current(const CScanner *scan)
668 | {
669 | 	if (scan->root == NULL || scan->link == NULL) {
670 | 		return &(scan->endf);
671 | 	}
672 | 	return scan->link;
673 | }
674 | 
675 | const CTOKEN *cscanner_token_lookahead(const CScanner *scan)
676 | {
677 | 	if (scan->root == NULL || scan->link == NULL) {
678 | 		return &(scan->endf);
679 | 	}
680 | 	if (scan->link == scan->root) {
681 | 		return &(scan->endf);
682 | 	}
683 | 	return scan->link->next;
684 | }
685 | 
686 | const CTOKEN *cscanner_token_advance(CScanner *scan, int n)
687 | {
688 | 	if (n < 0) n = 0;
689 | 	if (scan->root == NULL || scan->link == NULL) {
690 | 		return &(scan->endf);
691 | 	}
692 | 	while (n--) {
693 | 		if (scan->link == scan->root) {
694 | 			return &(scan->endf);
695 | 		}
696 | 		scan->link = scan->link->next;
697 | 	}
698 | 	return scan->link;
699 | }
700 | 
701 | int cscanner_get_type(const CScanner *scan)
702 | {
703 | 	return cscanner_token_current(scan)->type;
704 | }
705 | 
706 | const char *cscanner_get_string(const CScanner *scan)
707 | {
708 | 	const CTOKEN *token = cscanner_token_current(scan);
709 | 	if (token->type != CTokenIDENT && token->type != CTokenSTR) {
710 | 		return "";
711 | 	}
712 | 	return token->str;
713 | }
714 | 
715 | int cscanner_get_char(const CScanner *scan)
716 | {
717 | 	const CTOKEN *token = cscanner_token_current(scan);
718 | 	if (token->type != CTokenOPERATOR) {
719 | 		return '\0';
720 | 	}
721 | 	return token->ch;
722 | }
723 | 
724 | int cscanner_get_value(const CScanner *scan)
725 | {
726 | 	const CTOKEN *token = cscanner_token_current(scan);
727 | 	if (token->type != CTokenINT) {
728 | 		return 0;
729 | 	}
730 | 	return token->intval;
731 | }
732 | 
733 | int cscanner_get_lineno(const CScanner *scan)
734 | {
735 | 	const CTOKEN *token = cscanner_token_current(scan);
736 | 	return token->lineno;
737 | }
738 | 
739 | int cscanner_is_endl(const CScanner *scan) {
740 | 	const CTOKEN *token = cscanner_token_current(scan);
741 | 	return (token->type == CTokenENDF || token->type == CTokenENDL);
742 | }
743 | 
744 | int cscanner_is_endf(const CScanner *scan) {
745 | 	return cscanner_get_type(scan) == CTokenENDF;
746 | }
747 | 
748 | int cscanner_is_ident(const CScanner *scan)  {
749 | 	return cscanner_get_type(scan) == CTokenIDENT;
750 | }
751 | 
752 | int cscanner_is_operator(const CScanner *scan) {
753 | 	return cscanner_get_type(scan) == CTokenOPERATOR;
754 | }
755 | 
756 | int cscanner_is_int(const CScanner *scan) {
757 | 	return cscanner_get_type(scan) == CTokenINT;
758 | }
759 | 
760 | int cscanner_is_string(const CScanner *scan) {
761 | 	return cscanner_get_type(scan) == CTokenSTR;
762 | }
763 | 
764 | 


--------------------------------------------------------------------------------
/source/cscanner.h:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // cscanner.h - source scanner
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #ifndef __CSCANNER_H__
 10 | #define __CSCANNER_H__
 11 | 
 12 | #include "ctoken.h"
 13 | 
 14 | 
 15 | #define CMAX_IDENT	8192
 16 | 
 17 | //---------------------------------------------------------------------
 18 | // CTokenReader (assembly)
 19 | //---------------------------------------------------------------------
 20 | struct CTokenReader
 21 | {
 22 | 	int (*readch)(void *fp);
 23 | 	void *fp;
 24 | 	int ch;
 25 | 	int unch;
 26 | 	int saved;
 27 | 	char *buffer;
 28 | 	char *error;
 29 | 	char **keywords;
 30 | 	int state;
 31 | 	int pos;
 32 | 	int lineno;
 33 | 	int eof;
 34 | 	int colno;
 35 | 	int errcode;
 36 | };
 37 | 
 38 | typedef struct CTokenReader CTokenReader;
 39 | 
 40 | #ifdef __cplusplus
 41 | extern "C" {
 42 | #endif
 43 | //---------------------------------------------------------------------
 44 | // Assembly Token Reader
 45 | //---------------------------------------------------------------------
 46 | CTokenReader *ctoken_reader_create(int (*getch)(void*), void *fp);
 47 | 
 48 | void ctoken_reader_release(CTokenReader *reader);
 49 | 
 50 | CTOKEN *ctoken_reader_read(CTokenReader *reader);
 51 | 
 52 | #ifdef __cplusplus
 53 | }
 54 | #endif
 55 | 
 56 | 
 57 | //---------------------------------------------------------------------
 58 | // CMacro
 59 | //---------------------------------------------------------------------
 60 | struct CMacro
 61 | {
 62 | 	char *ident;
 63 | 	char *value;
 64 | 	struct CMacro *next;
 65 | };
 66 | 
 67 | typedef struct CMacro CMacro;
 68 | 
 69 | //---------------------------------------------------------------------
 70 | // CScanner
 71 | //---------------------------------------------------------------------
 72 | struct CScanner
 73 | {
 74 | 	const char *source;
 75 | 	long length;
 76 | 	long position;
 77 | 	int errcode;
 78 | 	char *error;
 79 | 	int jmplabel;
 80 | 	int lineno;
 81 | 	CTOKEN endf;
 82 | 	CTOKEN *root;
 83 | 	CMacro *macros;
 84 | 	const CTOKEN *link;
 85 | 	CTokenReader *reader;
 86 | };
 87 | 
 88 | typedef struct CScanner CScanner;
 89 | 
 90 | 
 91 | #ifdef __cplusplus
 92 | extern "C" {
 93 | #endif
 94 | //---------------------------------------------------------------------
 95 | // Scanner
 96 | //---------------------------------------------------------------------
 97 | CScanner *cscanner_create(void);
 98 | 
 99 | void cscanner_release(CScanner *scan);
100 | 
101 | void cscanner_macro_reset(CScanner *scan);
102 | 
103 | int cscanner_macro_set(CScanner *scan, const char *name, const char *value);
104 | int cscanner_macro_del(CScanner *scan, const char *name);
105 | 
106 | int cscanner_set_source(CScanner *scan, const char *source);
107 | 
108 | const CTOKEN *cscanner_token_current(const CScanner *scan);
109 | const CTOKEN *cscanner_token_lookahead(const CScanner *scan);
110 | const CTOKEN *cscanner_token_advance(CScanner *scan, int n);
111 | 
112 | int cscanner_get_type(const CScanner *scan);
113 | const char *cscanner_get_string(const CScanner *scan);
114 | int cscanner_get_char(const CScanner *scan);
115 | int cscanner_get_value(const CScanner *scan);
116 | int cscanner_get_lineno(const CScanner *scan);
117 | 
118 | int cscanner_is_endl(const CScanner *scan);
119 | int cscanner_is_endf(const CScanner *scan);
120 | int cscanner_is_ident(const CScanner *scan);
121 | int cscanner_is_operator(const CScanner *scan);
122 | int cscanner_is_int(const CScanner *scan);
123 | int cscanner_is_string(const CScanner *scan);
124 | 
125 | 
126 | #ifdef __cplusplus
127 | }
128 | #endif
129 | 
130 | #endif
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/source/csynthesis.c:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // csynthesis.c - source scanner
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | 
 10 | #include "csynthesis.h"
 11 | 
 12 | #ifdef _MSC_VER
 13 | #pragma warning(disable: 4996)
 14 | #endif
 15 | 
 16 | void csynth_init(CSynthesizer *synth)
 17 | {
 18 | 	cencoding_init(&synth->encoding);
 19 | 	synth->error = (char*)malloc(1024);
 20 | 	assert(synth->error);
 21 | 	csynth_reset(synth);
 22 | }
 23 | 
 24 | void csynth_destroy(CSynthesizer *synth)
 25 | {
 26 | 	cencoding_destroy(&synth->encoding);
 27 | 	if (synth->error) free(synth->error);
 28 | 	synth->error = NULL;
 29 | 	synth->errcode = 0;
 30 | }
 31 | 
 32 | void csynth_reset(CSynthesizer *synth)
 33 | {
 34 | 	cencoding_reset(&synth->encoding);
 35 | 	synth->firstType = O_UNKNOWN;
 36 | 	synth->secondType = O_UNKNOWN;
 37 | 	synth->firstReg = REG_UNKNOWN;
 38 | 	synth->secondReg = REG_UNKNOWN;
 39 | 	synth->baseReg = REG_UNKNOWN;
 40 | 	synth->indexReg = REG_UNKNOWN;
 41 | 	synth->scale = 0;
 42 | 	synth->prefix = 0;
 43 | 	synth->error[0] = 0;
 44 | 	synth->errcode = 0;
 45 | }
 46 | 
 47 | static void csynth_error(CSynthesizer *synth, const char *error, int code)
 48 | {
 49 | 	strncpy(synth->error, error, 100);
 50 | 	synth->errcode = code;
 51 | }
 52 | 
 53 | int csynth_define_label(CSynthesizer *synth, const char *label)
 54 | {
 55 | 	if (synth->encoding.label != NULL) {
 56 | 		csynth_error(synth,  "Instruction can't have multiple label", 1);
 57 | 		return -1;
 58 | 	}
 59 | 	cencoding_set_label(&synth->encoding, label);
 60 | 	return 0;
 61 | }
 62 | 
 63 | int csynth_reference_label(CSynthesizer *synth, const char *label)
 64 | {
 65 | 	if (synth->encoding.reference != NULL) {
 66 | 		csynth_error(synth,  "Instruction can't have multiple refrence", 2);
 67 | 		return -1;
 68 | 	}
 69 | 	cencoding_set_reference(&synth->encoding, label);
 70 | 	return 0;
 71 | }
 72 | 
 73 | int csynth_encode_first_operand(CSynthesizer *synth, 
 74 | 	const COperand *firstOperand)
 75 | {
 76 | 	if (synth->firstType != O_UNKNOWN) {
 77 | 		csynth_error(synth,  "Instrucition destination already set", 3);
 78 | 		return -1;
 79 | 	}
 80 | 
 81 | 	synth->firstType = firstOperand->type;
 82 | 
 83 | 	if (coperand_is_reg(firstOperand) || coperand_is_mem(firstOperand)) {
 84 | 		synth->firstReg = firstOperand->reg;
 85 | 	}
 86 | 	else if (coperand_is_imm(firstOperand)) {
 87 | 		csynth_encode_immediate(synth, firstOperand->value);
 88 | 	}
 89 | 	else if (!coperand_is_void(firstOperand)) {
 90 | 		csynth_error(synth, "csynth_encode_first_operand: error", 4);
 91 | 		return -2;
 92 | 	}
 93 | 
 94 | 	return 0;
 95 | }
 96 | 
 97 | int csynth_encode_second_operand(CSynthesizer *synth, 
 98 | 	const COperand *secondOperand)
 99 | {
100 | 	if (synth->secondType != O_UNKNOWN) {
101 | 		csynth_error(synth,  "Instrucition source already set", 4);
102 | 		return -1;
103 | 	}
104 | 
105 | 	synth->secondType = secondOperand->type;
106 | 
107 | 	if (coperand_is_reg(secondOperand) || coperand_is_mem(secondOperand)) {
108 | 		synth->secondReg = secondOperand->reg;
109 | 	}
110 | 	else if (coperand_is_imm(secondOperand)) {
111 | 		csynth_encode_immediate(synth, secondOperand->value);
112 | 	}
113 | 	else if (!coperand_is_void(secondOperand)) {
114 | 		csynth_error(synth, "csynth_encode_second_operand: error", 5);
115 | 		return -2;
116 | 	}
117 | 
118 | 	return 0;
119 | }
120 | 
121 | int csynth_encode_third_operand(CSynthesizer *synth, 
122 | 	const COperand *thirdOperand)
123 | {
124 | 	if (coperand_is_imm(thirdOperand)) {
125 | 		csynth_encode_immediate(synth, thirdOperand->value);
126 | 	}
127 | 	else if (!coperand_is_void(thirdOperand)) {
128 | 		csynth_error(synth, "csynth_encode_third_operand: error", 6);
129 | 		return -3;
130 | 	}
131 | 	return 0;
132 | }
133 | 
134 | int csynth_encode_base(CSynthesizer *synth, const COperand *base)
135 | {
136 | 	if (synth->baseReg != REG_UNKNOWN) {
137 | 		int retval;
138 | 		// base already set, use as index with scale = 1
139 | 		retval = csynth_encode_index(synth, base);
140 | 		if (retval != 0) return -1;
141 | 		retval = csynth_encode_scale(synth, 1);
142 | 		if (retval != 0) return -2;
143 | 		return 0;
144 | 	}
145 | 
146 | 	synth->baseReg = base->reg;
147 | 	return 0;
148 | }
149 | 
150 | int csynth_encode_index(CSynthesizer *synth, const COperand *index)
151 | {
152 | 	if (synth->indexReg != REG_UNKNOWN) {
153 | 		csynth_error(synth, 
154 | 			"Memory reference can't have multiple index registers", 7);
155 | 		return -1;
156 | 	}
157 | 	synth->indexReg = index->reg;
158 | 	return 0;
159 | }
160 | 
161 | int csynth_encode_scale(CSynthesizer *synth, int scale)
162 | {
163 | 	if (synth->scale != 0) {
164 | 		csynth_error(synth, 
165 | 			"Memory reference can't have multiple scale factors", 8);
166 | 		return -1;
167 | 	}
168 | 	if (scale != 1 && scale != 2 && scale != 4 && scale != 8) {
169 | 		csynth_error(synth, "Invalid scale value", 9);
170 | 		return -2;
171 | 	}
172 | 	synth->scale = scale;
173 | 	return 0;
174 | }
175 | 
176 | int csynth_encode_immediate(CSynthesizer *synth, long immediate)
177 | {
178 | 	if (synth->encoding.immediate != 0) {
179 | 		csynth_error(synth, 
180 | 			"Instruction can't have multiple immediate operands", 10);
181 | 		return -1;
182 | 	}
183 | 	synth->encoding.immediate = (cint32)immediate;
184 | 	return 0;
185 | }
186 | 
187 | int csynth_encode_displacement(CSynthesizer *synth, long displacement)
188 | {
189 | 	synth->encoding.displacement += (cint32)displacement;
190 | 	return 0;
191 | }
192 | 
193 | static int csynth_encode_mod_field(CSynthesizer *synth)
194 | {
195 | 	synth->encoding.format.modRM = 1;
196 | 	if (coperand_type_is_reg(synth->firstType) && (
197 | 		coperand_type_is_reg(synth->secondType) ||
198 | 		coperand_type_is_imm(synth->secondType) ||
199 | 		coperand_type_is_void(synth->secondType))) {
200 | 		synth->encoding.modRM.mod = MOD_REG;
201 | 	}
202 | 	else if ((coperand_type_is_mem(synth->firstType) ||
203 | 			  coperand_type_is_mem(synth->secondType)) &&
204 | 			 (coperand_type_is_reg(synth->firstType) ||
205 | 			  coperand_type_is_reg(synth->secondType))) {
206 | 		if (!synth->encoding.displacement) {
207 | 			synth->encoding.modRM.mod = MOD_NO_DISP;
208 | 		}
209 | 		else if ((char)synth->encoding.displacement == 
210 | 				synth->encoding.displacement) {
211 | 			synth->encoding.modRM.mod = MOD_BYTE_DISP;
212 | 			synth->encoding.format.D1 = 1;
213 | 		}
214 | 		else {
215 | 			synth->encoding.modRM.mod = MOD_DWORD_DISP;
216 | 			synth->encoding.format.D1 = 1;
217 | 			synth->encoding.format.D2 = 1;
218 | 			synth->encoding.format.D3 = 1;
219 | 			synth->encoding.format.D4 = 1;
220 | 		}
221 | 	}
222 | 	else {
223 | 		csynth_error(synth, "mod field error", 30);
224 | 		return -1;
225 | 	}
226 | 	return 0;
227 | }
228 | 
229 | static int csynth_encode_sib_byte(CSynthesizer *synth)
230 | {
231 | 	if (synth->scale == 0 && synth->indexReg == REG_UNKNOWN) {
232 | 		if (synth->baseReg == REG_UNKNOWN || (
233 | 			synth->encoding.modRM.r_m != E_ESP && 
234 | 			synth->encoding.modRM.r_m != E_EBP)) {
235 | 			if (synth->encoding.format.SIB) {
236 | 				csynth_error(synth, "SIB byte error", 31);
237 | 				return -1;
238 | 			}
239 | 			// No SIB byte needed
240 | 			return 0;
241 | 		}
242 | 	}
243 | 
244 | 	// Indicates use of SIB in mod R/M
245 | 	synth->encoding.format.SIB = 1;
246 | 	synth->encoding.modRM.r_m = E_ESP;
247 | 
248 | 	if (synth->baseReg == E_EBP && synth->encoding.modRM.mod == MOD_NO_DISP)
249 | 	{
250 | 		synth->encoding.modRM.mod = MOD_BYTE_DISP;
251 | 		synth->encoding.format.D1 = 1;
252 | 	}
253 | 
254 | 	if (synth->indexReg == E_ESP) {
255 | 		if (synth->scale != 1) {
256 | 			csynth_error(synth, 
257 | 				"ESP can't be scaled index in memory reference", 32);
258 | 			return -2;
259 | 		}
260 | 		else {
261 | 			enum CRegID tempReg;
262 | 			tempReg = synth->indexReg;
263 | 			synth->indexReg = synth->baseReg;
264 | 			synth->baseReg = tempReg;
265 | 		}
266 | 	}
267 | 
268 | 	if (synth->baseReg == REG_UNKNOWN) {
269 | 		synth->encoding.SIB.base = E_EBP;
270 | 		synth->encoding.modRM.mod = MOD_NO_DISP;
271 | 		synth->encoding.format.D1 = 1;
272 | 		synth->encoding.format.D2 = 1;
273 | 		synth->encoding.format.D3 = 1;
274 | 		synth->encoding.format.D4 = 1;
275 | 	}	else {
276 | 		synth->encoding.SIB.base = synth->baseReg;
277 | 	}
278 | 	
279 | 	if (synth->indexReg != REG_UNKNOWN) {
280 | 		synth->encoding.SIB.index = synth->indexReg;
281 | 	}	else {
282 | 		synth->encoding.SIB.index = E_ESP;
283 | 	}
284 | 
285 | 	switch (synth->scale)
286 | 	{
287 | 	case 0:
288 | 	case 1:
289 | 		synth->encoding.SIB.scale = SCALE_1;
290 | 		break;
291 | 	case 2:
292 | 		synth->encoding.SIB.scale = SCALE_2;
293 | 		break;
294 | 	case 4:
295 | 		synth->encoding.SIB.scale = SCALE_4;
296 | 		break;
297 | 	case 8:
298 | 		synth->encoding.SIB.scale = SCALE_8;
299 | 		break;
300 | 	default:
301 | 		csynth_error(synth, "scale number error", 33);
302 | 		return -3;
303 | 		break;
304 | 	}
305 | 
306 | 	return 0;
307 | }
308 | 
309 | int csynth_encode_prefix(CSynthesizer *synth, int code)
310 | {
311 | 	if (cencoding_add_prefix(&synth->encoding, code)) {
312 | 		csynth_error(synth, "cannot add prefix", 90);
313 | 		return -1;
314 | 	}
315 | 	return 0;
316 | }
317 | 
318 | const CEncoding *csynth_encode_instruction(CSynthesizer *synth, 
319 | 	CInstruction *instruction)
320 | {
321 | 	enum COperandType p1, p2;
322 | 	const char *format;
323 | 	unsigned long O;
324 | 
325 | 	if (!instruction) {
326 | 		return &synth->encoding;
327 | 	}
328 | 
329 | 	format = cinst_getEncoding(instruction);
330 | 
331 | 	if (!format) {
332 | 		csynth_error(synth, "csynth_encode_instruction: internal error", 11);
333 | 		return NULL;
334 | 	}
335 | 
336 | 	#define IFORMAT_WORD(x, y) ( (((short)(x)) << 8) | ((short)(y)) )
337 | 
338 | 	while (*format) {
339 | 		int head = ((short)format[1] | format[0] << 8);
340 | 		switch (head)
341 | 		{
342 | 		case IFORMAT_WORD('p', '0'):
343 | 			if (cencoding_add_prefix(&synth->encoding, 0xf0)) {
344 | 				csynth_error(synth, "prefix error 0xf0", 40);
345 | 				return NULL;
346 | 			}
347 | 			break;
348 | 		case IFORMAT_WORD('p', '2'):
349 | 			if (cencoding_add_prefix(&synth->encoding, 0xf2)) {
350 | 				csynth_error(synth, "prefix error 0xf2", 41);
351 | 				return NULL;
352 | 			}
353 | 			break;
354 | 		case IFORMAT_WORD('p', '3'):
355 | 			if (cencoding_add_prefix(&synth->encoding, 0xf3)) {
356 | 				csynth_error(synth, "prefix error 0xf3", 42);
357 | 				return NULL;
358 | 			}
359 | 			break;
360 | 		case IFORMAT_WORD('p', 'o'):
361 | 			if (!cinst_is_32bit(instruction)) {
362 | 				if (cencoding_add_prefix(&synth->encoding, 0x66)) {
363 | 					csynth_error(synth, "prefix error 0x66", 43);
364 | 					return NULL;
365 | 				}
366 | 			}
367 | 			break;
368 | 		case IFORMAT_WORD('p', 'a'):
369 | 			if (!cinst_is_32bit(instruction)) {
370 | 				if (cencoding_add_prefix(&synth->encoding, 0x67)) {
371 | 					csynth_error(synth, "prefix error 0x67", 44);
372 | 					return NULL;
373 | 				}
374 | 			}
375 | 			break;
376 | 		case IFORMAT_WORD('+', 'r'):
377 | 			if (synth->encoding.format.O1) {
378 | 				if (coperand_type_is_reg(synth->firstType)) {
379 | 					synth->encoding.O1 += synth->firstReg;
380 | 				}
381 | 				else if(coperand_type_is_reg(synth->secondType)) {
382 | 					synth->encoding.O1 += synth->secondReg;
383 | 				}
384 | 				else {
385 | 					csynth_error(synth, 
386 | 						"'+r' not compatible with operands", 12);
387 | 					return NULL;
388 | 				}
389 | 			}
390 | 			else {
391 | 				csynth_error(synth, "'+r' needs first opcode byte", 13);
392 | 				return NULL;
393 | 			}
394 | 			break;
395 | 		case IFORMAT_WORD('/', 'r'):
396 | 			if (csynth_encode_mod_field(synth) != 0) {
397 | 				return NULL;
398 | 			}
399 | 			p1 = cinst_getFirstOperand(instruction);
400 | 			p2 = cinst_getSecondOperand(instruction);
401 | 			if (coperand_type_is_reg(p1) && coperand_type_is_R_M(p2)) {
402 | 				if (coperand_type_is_mem(synth->secondType)) {
403 | 					synth->encoding.modRM.r_m = synth->baseReg;
404 | 				}
405 | 				else if (coperand_type_is_reg(synth->secondType)) {
406 | 					synth->encoding.modRM.r_m = synth->secondReg;
407 | 				}
408 | 				else {
409 | 					csynth_error(synth, "syntax error", 14);
410 | 					return NULL;
411 | 				}
412 | 				synth->encoding.modRM.reg = synth->firstReg;
413 | 			}
414 | 			else if (coperand_type_is_R_M(p1) && coperand_type_is_reg(p2)) {
415 | 				if (coperand_type_is_mem(synth->firstType)) {
416 | 					synth->encoding.modRM.r_m = synth->baseReg;
417 | 				}
418 | 				else if (coperand_type_is_reg(synth->firstType)) {
419 | 					synth->encoding.modRM.r_m = synth->firstReg;
420 | 				}
421 | 				else {
422 | 					csynth_error(synth, "syntax error", 15);
423 | 					return NULL;
424 | 				}
425 | 				synth->encoding.modRM.reg = synth->secondReg;
426 | 			}
427 | 			else {
428 | 				csynth_error(synth, "format error", 16);
429 | 				return NULL;
430 | 			}
431 | 			if (csynth_encode_sib_byte(synth) != 0) {
432 | 				return NULL;
433 | 			}
434 | 			break;
435 | 		case IFORMAT_WORD('/', '0'):
436 | 		case IFORMAT_WORD('/', '1'):
437 | 		case IFORMAT_WORD('/', '2'):
438 | 		case IFORMAT_WORD('/', '3'):
439 | 		case IFORMAT_WORD('/', '4'):
440 | 		case IFORMAT_WORD('/', '5'):
441 | 		case IFORMAT_WORD('/', '6'):
442 | 		case IFORMAT_WORD('/', '7'):
443 | 			if (csynth_encode_mod_field(synth) != 0) {
444 | 				return NULL;
445 | 			}
446 | 			synth->encoding.modRM.reg = format[1] - '0';
447 | 			if (coperand_type_is_mem(synth->firstType)) {
448 | 				synth->encoding.modRM.r_m = synth->baseReg;
449 | 			}
450 | 			else if (coperand_type_is_reg(synth->firstType)) {
451 | 				synth->encoding.modRM.r_m = synth->firstReg;
452 | 			}
453 | 			else {
454 | 				csynth_error(synth, "syntax error", 17);
455 | 				return NULL;
456 | 			}
457 | 			if (csynth_encode_sib_byte(synth) != 0) {
458 | 				return NULL;
459 | 			}
460 | 			break;
461 | 		case IFORMAT_WORD('i', 'd'):
462 | 			synth->encoding.format.I1 = 1;
463 | 			synth->encoding.format.I2 = 1;
464 | 			synth->encoding.format.I3 = 1;
465 | 			synth->encoding.format.I4 = 1;
466 | 			synth->encoding.relative = 0;
467 | 			break;
468 | 		case IFORMAT_WORD('i', 'w'):
469 | 			synth->encoding.format.I1 = 1;
470 | 			synth->encoding.format.I2 = 1;
471 | 			synth->encoding.relative = 0;
472 | 			break;
473 | 		case IFORMAT_WORD('i', 'b'):
474 | 			synth->encoding.format.I1 = 1;
475 | 			synth->encoding.relative = 0;
476 | 			break;
477 | 		case IFORMAT_WORD('-', 'b'):
478 | 			synth->encoding.format.I1 = 1;
479 | 			synth->encoding.relative = 1;
480 | 			break;
481 | 		case IFORMAT_WORD('-', 'i'):
482 | 			synth->encoding.format.I1 = 1;
483 | 			synth->encoding.format.I2 = 1;
484 | 			synth->encoding.format.I3 = 1;
485 | 			synth->encoding.format.I4 = 1;
486 | 			synth->encoding.relative = 1;
487 | 			break;
488 | 		default:
489 | 			O = strtoul(format, 0, 16);
490 | 			if (O > 0xFF) {
491 | 				csynth_error(synth, "format error", 18);
492 | 				return NULL;
493 | 			}
494 | 			if (!synth->encoding.format.O1) {
495 | 				synth->encoding.O1 = (cbyte)O;
496 | 				synth->encoding.format.O1 = 1;
497 | 			}
498 | 			else if (synth->encoding.format.O2 == 0 &&
499 | 					(synth->encoding.O1 == 0x0f ||
500 | 					 synth->encoding.O1 == 0xd8 ||
501 | 					 synth->encoding.O1 == 0xd9 ||
502 | 					 synth->encoding.O1 == 0xda ||
503 | 					 synth->encoding.O1 == 0xdb ||
504 | 					 synth->encoding.O1 == 0xdc ||
505 | 					 synth->encoding.O1 == 0xde ||
506 | 					 synth->encoding.O1 == 0xdf)) {
507 | 				synth->encoding.O2 = synth->encoding.O1;
508 | 				synth->encoding.O1 = (cbyte)O;
509 | 				synth->encoding.format.O2 = 1;
510 | 			}
511 | 			else {
512 | 				csynth_error(synth, "synth error", 19);
513 | 				return NULL;
514 | 			}
515 | 			break;
516 | 		}
517 | 
518 | 		format += 2;
519 | 		if (*format == ' ') {
520 | 			format++;
521 | 		}
522 | 		else if (*format == '\0') {
523 | 			break;
524 | 		}
525 | 		else {
526 | 			csynth_error(synth, "instruction error", 20);
527 | 			return NULL;
528 | 		}
529 | 		#undef IFORMAT_WORD
530 | 	}
531 | 
532 | 	return &synth->encoding;
533 | }
534 | 
535 | 


--------------------------------------------------------------------------------
/source/csynthesis.h:
--------------------------------------------------------------------------------
 1 | //=====================================================================
 2 | //
 3 | // csynthesis.h - source scanner
 4 | //
 5 | // NOTE:
 6 | // for more information, please see the readme file.
 7 | //
 8 | //=====================================================================
 9 | #ifndef __CSYNTHESIS_H__
10 | #define __CSYNTHESIS_H__
11 | 
12 | #include "cencoding.h"
13 | #include "ckeywords.h"
14 | #include "cinstruct.h"
15 | 
16 | 
17 | //---------------------------------------------------------------------
18 | // CSynthesizer
19 | //---------------------------------------------------------------------
20 | struct CSynthesizer
21 | {
22 | 	CEncoding encoding;
23 | 	enum COperandType firstType;
24 | 	enum COperandType secondType;
25 | 	enum CRegID firstReg;
26 | 	enum CRegID secondReg;
27 | 	enum CRegID baseReg;
28 | 	enum CRegID indexReg;
29 | 	int scale;
30 | 	int prefix;
31 | 	char *error;
32 | 	int errcode;
33 | };
34 | 
35 | typedef struct CSynthesizer CSynthesizer;
36 | 
37 | 
38 | 
39 | #ifdef __cplusplus
40 | extern "C" {
41 | #endif
42 | //---------------------------------------------------------------------
43 | // interface
44 | //---------------------------------------------------------------------
45 | void csynth_init(CSynthesizer *synth);
46 | void csynth_destroy(CSynthesizer *synth);
47 | void csynth_reset(CSynthesizer *synth);
48 | 
49 | int csynth_define_label(CSynthesizer *synth, const char *label);
50 | int csynth_reference_label(CSynthesizer *synth, const char *label);
51 | 
52 | int csynth_encode_first_operand(CSynthesizer *synth, const COperand *);
53 | int csynth_encode_second_operand(CSynthesizer *synth, const COperand *);
54 | int csynth_encode_third_operand(CSynthesizer *synth, const COperand *);
55 | 
56 | int csynth_encode_base(CSynthesizer *synth, const COperand *base);
57 | int csynth_encode_index(CSynthesizer *synth, const COperand *index);
58 | 
59 | int csynth_encode_scale(CSynthesizer *synth, int scale);
60 | int csynth_encode_immediate(CSynthesizer *synth, long immediate);
61 | int csynth_encode_displacement(CSynthesizer *synth, long displacement);
62 | 
63 | int csynth_encode_prefix(CSynthesizer *synth, int code);
64 | 
65 | const CEncoding *csynth_encode_instruction(CSynthesizer *, CInstruction*);
66 | 
67 | 
68 | #ifdef __cplusplus
69 | }
70 | #endif
71 | 
72 | #endif
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/source/ctoken.c:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // ctoken.c - token definition
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <string.h>
 12 | #include <ctype.h>
 13 | #include <assert.h>
 14 | 
 15 | #include "ctoken.h"
 16 | 
 17 | 
 18 | //---------------------------------------------------------------------
 19 | // create a new token
 20 | //---------------------------------------------------------------------
 21 | CTOKEN *ctoken_new(enum CTokenType type, const void *str, int size)
 22 | {
 23 | 	CTOKEN *token;
 24 | 
 25 | 	token = (CTOKEN*)malloc(sizeof(CTOKEN));
 26 | 	assert(token);
 27 | 
 28 | 	token->type = type;
 29 | 	token->str = NULL;
 30 | 	token->size = 0;
 31 | 	token->lineno = -1;
 32 | 	token->fileno = -1;
 33 | 	token->keyword = -1;
 34 | 
 35 | 	switch (type)
 36 | 	{
 37 | 	case CTokenENDL:
 38 | 		break;
 39 | 	case CTokenENDF:
 40 | 		break;
 41 | 	case CTokenSTR:
 42 | 	case CTokenIDENT:
 43 | 		token->str = (char*)malloc(size + 1);
 44 | 		assert(token->str);
 45 | 		memcpy(token->str, str, size);
 46 | 		token->str[size] = 0;
 47 | 		token->size = size;
 48 | 		break;
 49 | 	case CTokenKEYWORD:
 50 | 		token->keyword = *(int*)str;
 51 | 		break;
 52 | 	case CTokenOPERATOR:
 53 | 		token->ch = *(int*)str;
 54 | 		break;
 55 | 	case CTokenINT:
 56 | 		token->intval = *(long*)str;
 57 | 		break;
 58 | 	case CTokenFLOAT:
 59 | 		token->fltval = *(double*)str;
 60 | 		break;
 61 | 	default:
 62 | 		token->type = CTokenERROR;
 63 | 		token->errcode = *(int*)str;
 64 | 		break;
 65 | 	}
 66 | 
 67 | 	token->next = token;
 68 | 	token->prev = token;
 69 | 
 70 | 	return token;
 71 | }
 72 | 
 73 | //---------------------------------------------------------------------
 74 | // release token
 75 | //---------------------------------------------------------------------
 76 | void ctoken_delete(CTOKEN *token)
 77 | {
 78 | 	if (token->type == CTokenSTR || token->type == CTokenIDENT) {
 79 | 		if (token->str) free(token->str);
 80 | 		token->str = NULL;
 81 | 	}
 82 | 	token->type = CTokenERROR;
 83 | 	free(token);
 84 | }
 85 | 
 86 | //---------------------------------------------------------------------
 87 | // create a new endl
 88 | //---------------------------------------------------------------------
 89 | CTOKEN *ctoken_new_endl(void) {
 90 | 	return ctoken_new(CTokenENDL, NULL, 0);
 91 | }
 92 | 
 93 | //---------------------------------------------------------------------
 94 | // create a new endf
 95 | //---------------------------------------------------------------------
 96 | CTOKEN *ctoken_new_endf(void) {
 97 | 	return ctoken_new(CTokenENDF, NULL, 0);
 98 | }
 99 | 
100 | //---------------------------------------------------------------------
101 | // create a new identity
102 | //---------------------------------------------------------------------
103 | CTOKEN *ctoken_new_ident(const char *ident) {
104 | 	return ctoken_new(CTokenIDENT, ident, (int)strlen(ident));
105 | }
106 | 
107 | //---------------------------------------------------------------------
108 | // create a new keyword
109 | //---------------------------------------------------------------------
110 | CTOKEN *ctoken_new_keyword(int keyid) {
111 | 	return ctoken_new(CTokenKEYWORD, &keyid, sizeof(int));
112 | }
113 | 
114 | //---------------------------------------------------------------------
115 | // create a new string
116 | //---------------------------------------------------------------------
117 | CTOKEN *ctoken_new_string(const char *string) {
118 | 	return ctoken_new(CTokenSTR, string, (int)strlen(string));
119 | }
120 | 
121 | //---------------------------------------------------------------------
122 | // create a new integer
123 | //---------------------------------------------------------------------
124 | CTOKEN *ctoken_new_int(long intval) {
125 | 	return ctoken_new(CTokenINT, &intval, sizeof(long));
126 | }
127 | 
128 | //---------------------------------------------------------------------
129 | // create a new float
130 | //---------------------------------------------------------------------
131 | CTOKEN *ctoken_new_float(double fltval) {
132 | 	return ctoken_new(CTokenFLOAT, &fltval, sizeof(double));
133 | }
134 | 
135 | //---------------------------------------------------------------------
136 | // create a new operator
137 | //---------------------------------------------------------------------
138 | CTOKEN *ctoken_new_operator(int op) {
139 | 	return ctoken_new(CTokenOPERATOR, &op, sizeof(int));
140 | }
141 | 
142 | //---------------------------------------------------------------------
143 | // create a new error
144 | //---------------------------------------------------------------------
145 | CTOKEN *ctoken_new_error(int code) {
146 | 	return ctoken_new(CTokenERROR, &code, sizeof(int));
147 | }
148 | 
149 | //---------------------------------------------------------------------
150 | // token copy
151 | //---------------------------------------------------------------------
152 | CTOKEN *ctoken_new_copy(const CTOKEN *token) {
153 | 	CTOKEN *newtoken;
154 | 
155 | 	newtoken = (CTOKEN*)malloc(sizeof(CTOKEN));
156 | 	assert(newtoken);
157 | 
158 | 	newtoken->type = token->type;
159 | 	newtoken->str = NULL;
160 | 	newtoken->size = 0;
161 | 	newtoken->lineno = token->lineno;
162 | 	newtoken->fileno = token->fileno;
163 | 
164 | 	switch (newtoken->type)
165 | 	{
166 | 	case CTokenSTR:
167 | 	case CTokenIDENT:
168 | 		newtoken->str = (char*)malloc(token->size + 1);
169 | 		assert(newtoken->str);
170 | 		memcpy(newtoken->str, token->str, token->size);
171 | 		newtoken->str[token->size] = 0;
172 | 		newtoken->size = token->size;
173 | 		break;
174 | 	case CTokenKEYWORD:
175 | 		newtoken->keyword = token->keyword;
176 | 		break;
177 | 	case CTokenOPERATOR:
178 | 		newtoken->ch = token->ch;
179 | 		break;
180 | 	case CTokenINT:
181 | 		newtoken->intval = token->intval;
182 | 		break;
183 | 	case CTokenFLOAT:
184 | 		newtoken->fltval = token->fltval;
185 | 		break;
186 | 	default:
187 | 		break;
188 | 	}
189 | 
190 | 	newtoken->next = newtoken;
191 | 	newtoken->prev = newtoken;
192 | 
193 | 	return newtoken;
194 | }
195 | 
196 | 
197 | //---------------------------------------------------------------------
198 | // get string
199 | //---------------------------------------------------------------------
200 | const char *ctoken_get_string(const CTOKEN *token)
201 | {
202 | 	if (token->type != CTokenIDENT && token->type != CTokenSTR) {
203 | 		return "";
204 | 	}
205 | 	return token->str;
206 | }
207 | 
208 | //---------------------------------------------------------------------
209 | // get integer
210 | //---------------------------------------------------------------------
211 | long ctoken_get_int(const CTOKEN *token)
212 | {
213 | 	if (token->type != CTokenINT) {
214 | 		return 0;
215 | 	}
216 | 	return token->intval;
217 | }
218 | 
219 | //---------------------------------------------------------------------
220 | // get char
221 | //---------------------------------------------------------------------
222 | int ctoken_get_char(const CTOKEN *token)
223 | {
224 | 	if (token->type != CTokenOPERATOR) {
225 | 		return '\0';
226 | 	}
227 | 	return token->ch;
228 | }
229 | 
230 | //---------------------------------------------------------------------
231 | // get float
232 | //---------------------------------------------------------------------
233 | double ctoken_get_float(const CTOKEN *token)
234 | {
235 | 	if (token->type != CTokenFLOAT) {
236 | 		return 0.0;
237 | 	}
238 | 	return token->fltval;
239 | }
240 | 
241 | //---------------------------------------------------------------------
242 | // get keyword
243 | //---------------------------------------------------------------------
244 | int ctoken_get_keyword(const CTOKEN *token)
245 | {
246 | 	if (token->type != CTokenKEYWORD) {
247 | 		return -1;
248 | 	}
249 | 	return token->keyword;
250 | }
251 | 
252 | 
253 | int ctoken_is_endl(const CTOKEN *token) {
254 | 	return token->type == CTokenENDL;
255 | }
256 | 
257 | int ctoken_is_endf(const CTOKEN *token) {
258 | 	return token->type == CTokenENDF;
259 | }
260 | 
261 | int ctoken_is_ident(const CTOKEN *token) {
262 | 	return token->type == CTokenIDENT;
263 | }
264 | 
265 | int ctoken_is_keyword(const CTOKEN *token) {
266 | 	return token->type == CTokenKEYWORD;
267 | }
268 | 
269 | int ctoken_is_string(const CTOKEN *token) {
270 | 	return token->type == CTokenSTR;
271 | }
272 | 
273 | int ctoken_is_int(const CTOKEN *token) {
274 | 	return token->type == CTokenINT;
275 | }
276 | 
277 | int ctoken_is_float(const CTOKEN *token) {
278 | 	return token->type == CTokenFLOAT;
279 | }
280 | 
281 | int ctoken_is_operator(const CTOKEN *token) {
282 | 	return token->type == CTokenOPERATOR;
283 | }
284 | 
285 | int ctoken_is_error(const CTOKEN *token) {
286 | 	return token->type == CTokenERROR;
287 | }
288 | 
289 | 
290 | //---------------------------------------------------------------------
291 | // add node to head
292 | //---------------------------------------------------------------------
293 | void ctoken_list_add(CTOKEN *node, CTOKEN *head)
294 | {
295 | 	(node)->prev = (head), (node)->next = (head)->next;
296 | 	(head)->next->prev = (node), (head)->next = (node);
297 | }
298 | 
299 | //---------------------------------------------------------------------
300 | // add node to head's tail
301 | //---------------------------------------------------------------------
302 | void ctoken_list_add_tail(CTOKEN *node, CTOKEN *head)
303 | {
304 | 	(node)->prev = (head)->prev, (node)->next = (head);
305 | 	(head)->prev->next = (node), (head)->prev = (node);
306 | }
307 | 
308 | //---------------------------------------------------------------------
309 | // delete between
310 | //---------------------------------------------------------------------
311 | void ctoken_list_del_between(CTOKEN *p, CTOKEN *n)
312 | {
313 | 	(n)->prev = (p), (p)->next = (n);
314 | }
315 | 
316 | //---------------------------------------------------------------------
317 | // remove self
318 | //---------------------------------------------------------------------
319 | void ctoken_list_del(CTOKEN *entry)
320 | {
321 | 	(entry)->next->prev = (entry)->prev;
322 | 	(entry)->prev->next = (entry)->next;
323 | 	(entry)->next = 0, (entry)->prev = 0;
324 | 	(entry)->next = entry;
325 | 	(entry)->prev = entry;
326 | }
327 | 
328 | //---------------------------------------------------------------------
329 | // check if empty
330 | //---------------------------------------------------------------------
331 | int ctoken_list_is_empty(const CTOKEN *entry)
332 | {
333 | 	return (entry) == (entry)->next;
334 | }
335 | 
336 | //---------------------------------------------------------------------
337 | // print to file
338 | //---------------------------------------------------------------------
339 | void ctoken_print(FILE *fp, const CTOKEN *token)
340 | {
341 | 	if (fp == NULL) fp = stdout;
342 | 	if (token->type == CTokenIDENT) {
343 | 		fprintf(fp, "(%s)", token->str);
344 | 	}
345 | 	else if (token->type == CTokenSTR) {
346 | 		fprintf(fp, "(\"%s\")", token->str);
347 | 	}
348 | 	else if (token->type == CTokenENDL) {
349 | 		fprintf(fp, "ENDL");
350 | 	}
351 | 	else if (token->type == CTokenENDF) {
352 | 		fprintf(fp, "ENDF");
353 | 	}
354 | 	else if (token->type == CTokenKEYWORD) {
355 | 		fprintf(fp, "<%d>", token->keyword);
356 | 	}
357 | 	else if (token->type == CTokenINT) {
358 | 		fprintf(fp, "[%ld]", token->intval);
359 | 	}
360 | 	else if (token->type == CTokenFLOAT) {
361 | 		fprintf(fp, "[%f]", token->fltval);
362 | 	}
363 | 	else if (token->type == CTokenOPERATOR) {
364 | 		fprintf(fp, "[%c]", (char)token->ch);
365 | 	}
366 | 	else if (token->type == CTokenERROR) {
367 | 		fprintf(fp, "ERROR");
368 | 	}
369 | 	fflush(fp);
370 | }
371 | 
372 | 


--------------------------------------------------------------------------------
/source/ctoken.h:
--------------------------------------------------------------------------------
  1 | //=====================================================================
  2 | //
  3 | // ctoken.h - token definition
  4 | //
  5 | // NOTE:
  6 | // for more information, please see the readme file.
  7 | //
  8 | //=====================================================================
  9 | #ifndef __CTOKEN_H__
 10 | #define __CTOKEN_H__
 11 | 
 12 | #include <stdio.h>
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | #include <ctype.h>
 16 | #include <assert.h>
 17 | 
 18 | //---------------------------------------------------------------------
 19 | // TOKEN Type
 20 | //---------------------------------------------------------------------
 21 | enum CTokenType
 22 | {
 23 | 	CTokenENDL		= 0,
 24 | 	CTokenENDF		= 1,
 25 | 	CTokenIDENT		= 2,
 26 | 	CTokenKEYWORD	= 3,
 27 | 	CTokenSTR		= 4,
 28 | 	CTokenOPERATOR	= 5,
 29 | 	CTokenINT		= 6,
 30 | 	CTokenFLOAT		= 7,
 31 | 	CTokenERROR		= 8,
 32 | };
 33 | 
 34 | 
 35 | //---------------------------------------------------------------------
 36 | // CTOKEN DEFINITION
 37 | //---------------------------------------------------------------------
 38 | struct CTOKEN
 39 | {
 40 | 	enum CTokenType type;
 41 | 	union {
 42 | 		long intval; 
 43 | 		double fltval;
 44 | 		int keyword;
 45 | 		int ch;
 46 | 		int errcode;
 47 | 		char *str;
 48 | 	};
 49 | 	long size;
 50 | 	int lineno;
 51 | 	int fileno;
 52 | 	struct CTOKEN *next;
 53 | 	struct CTOKEN *prev;
 54 | };
 55 | 
 56 | typedef struct CTOKEN CTOKEN;
 57 | 
 58 | #define ctoken_type(token) ((token)->type)
 59 | #define ctoken_int(token) ((token)->intval)
 60 | #define ctoken_str(token) ((token)->str)
 61 | #define ctoken_chr(token) ((token)->ch)
 62 | #define ctoken_len(token) ((token)->size)
 63 | #define ctoken_key(token) ((token)->keyword)
 64 | 
 65 | 
 66 | #ifdef __cplusplus
 67 | extern "C" {
 68 | #endif
 69 | 
 70 | 
 71 | //---------------------------------------------------------------------
 72 | // BASIC INTERFACE
 73 | //---------------------------------------------------------------------
 74 | // create a new token
 75 | CTOKEN *ctoken_new(enum CTokenType type, const void *data, int size);
 76 | 
 77 | // delete and free memory
 78 | void ctoken_delete(CTOKEN *token);
 79 | 
 80 | 
 81 | CTOKEN *ctoken_new_endl(void);                  // create a new endl
 82 | CTOKEN *ctoken_new_endf(void);                  // create a new endf
 83 | CTOKEN *ctoken_new_ident(const char *ident);    // create a new identity
 84 | CTOKEN *ctoken_new_keyword(int keyid);          // create a new keyword
 85 | CTOKEN *ctoken_new_string(const char *string);  // create a new string
 86 | CTOKEN *ctoken_new_int(long intval);            // create a new integer
 87 | CTOKEN *ctoken_new_float(double fltval);        // create a new float
 88 | CTOKEN *ctoken_new_operator(int op);            // create a new operator
 89 | CTOKEN *ctoken_new_error(int errcode);          // create a new errcode
 90 | CTOKEN *ctoken_new_copy(const CTOKEN *token);   // create a new copy
 91 | 
 92 | 
 93 | //---------------------------------------------------------------------
 94 | // type & value operation
 95 | //---------------------------------------------------------------------
 96 | const char *ctoken_get_string(const CTOKEN *token);  // get string
 97 | long ctoken_get_int(const CTOKEN *token);            // get integer value
 98 | int ctoken_get_char(const CTOKEN *token);            // get operator char
 99 | double ctoken_get_float(const CTOKEN *token);        // get float value
100 | int ctoken_get_keyword(const CTOKEN *token);         // get keyword
101 | 
102 | int ctoken_is_endl(const CTOKEN *token);
103 | int ctoken_is_endf(const CTOKEN *token);
104 | int ctoken_is_ident(const CTOKEN *token);
105 | int ctoken_is_keyword(const CTOKEN *token);
106 | int ctoken_is_string(const CTOKEN *token);
107 | int ctoken_is_int(const CTOKEN *token);
108 | int ctoken_is_float(const CTOKEN *token);
109 | int ctoken_is_operator(const CTOKEN *token);
110 | int ctoken_is_error(const CTOKEN *token);
111 | 
112 | 
113 | //---------------------------------------------------------------------
114 | // list operation
115 | //---------------------------------------------------------------------
116 | void ctoken_list_add(CTOKEN *node, CTOKEN *head);
117 | void ctoken_list_add_tail(CTOKEN *node, CTOKEN *head);
118 | void ctoken_list_del_between(CTOKEN *p, CTOKEN *n);
119 | void ctoken_list_del(CTOKEN *p);
120 | int ctoken_list_is_empty(const CTOKEN *p);
121 | 
122 | 
123 | //---------------------------------------------------------------------
124 | // misc
125 | //---------------------------------------------------------------------
126 | void ctoken_print(FILE *fp, const CTOKEN *token);
127 | 
128 | 
129 | #ifdef __cplusplus
130 | }
131 | #endif
132 | 
133 | #endif
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/source/test1.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <conio.h>
  4 | #include <string.h>
  5 | #include <ctype.h>
  6 | 
  7 | #include "cencoding.h"
  8 | #include "ckeywords.h"
  9 | #include "cinstruct.h"
 10 | #include "cinstset.h"
 11 | #include "ctoken.h"
 12 | #include "cscanner.h"
 13 | #include "csynthesis.h"
 14 | #include "cparser.h"
 15 | #include "casmpure.h"
 16 | 
 17 | 
 18 | /*
 19 | void CrossProduct(float *V0, float *V1, float *V2)
 20 | {
 21 | 	V2[0] = V0[1] * V1[2] - V0[2] * V1[1];
 22 | 	V2[1] = V0[2] * V1[0] - V0[0] * V1[2];
 23 | 	V2[2] = V0[0] * V1[1] - V0[1] * V1[0];
 24 | }*/
 25 | 
 26 | const char *CrossProductAsm = 
 27 | "    mov        ecx, [esp+8]\n"
 28 | "    mov        eax, [esp+4]\n"
 29 | "    mov        edx, [esp+12]\n"
 30 | "\n"    
 31 | "    fld        DWORD [ecx+8]\n"
 32 | "    fmul       DWORD [eax+4]\n"
 33 | "    fld        DWORD [eax+8]\n"
 34 | "    fmul       DWORD [ecx+4]\n"
 35 | "    fsubp      st1, st0\n"
 36 | "    fstp       DWORD [edx]\n"
 37 | "\n"    
 38 | "    fld        DWORD [eax+8]\n"
 39 | "    fmul       DWORD [ecx]\n"
 40 | "    fld        DWORD [eax]\n"
 41 | "    fmul       DWORD [ecx+8]\n"
 42 | "    fsubp      st1, st0\n"
 43 | "    fstp       DWORD [edx+4]\n"
 44 | "\n"    
 45 | "    fld        DWORD [eax]\n"
 46 | "    fmul       DWORD [ecx+4]\n"
 47 | "    fld        DWORD [ecx]\n"
 48 | "    fmul       DWORD [eax+4]\n"
 49 | "    fsubp      st1, st0\n"
 50 | "    fstp       DWORD [edx+8]\n"
 51 | "    \n"
 52 | "    ret\n";
 53 | 
 54 | 
 55 | void testCrossProduct(void)
 56 | {
 57 | 	CAssembler *casm;
 58 | 	int size, c;
 59 | 
 60 | 	void (*CrossProductPtr)(float*, float*, float*);
 61 | 
 62 | 	// create assembler
 63 | 	casm = casm_create();
 64 | 
 65 | 	// append assembly source
 66 | 	casm_source(casm, CrossProductAsm);
 67 | 
 68 | 	// calculate size
 69 | 	size = casm_compile(casm, NULL, 0);
 70 | 
 71 | 	if (size < 0) {
 72 | 		printf("compile error: %s\n", casm->error);
 73 | 		casm_release(casm);
 74 | 		return;
 75 | 	}
 76 | 
 77 | 	CrossProductPtr = (void (*)(float*, float*, float*))malloc(size);
 78 | 
 79 | 	// compile into memory
 80 | 	casm_compile(casm, (unsigned char*)CrossProductPtr, size);
 81 | 
 82 | 	printf("==================== Cross Product ====================\n");
 83 | 
 84 | 	casm_dumpinst(casm, stdout);
 85 | 
 86 | 	printf("\nExecute code (y/n)?\n\n");
 87 | 
 88 | 	do
 89 | 	{
 90 | 		c = getch();
 91 | 	}
 92 | 	while(c != 'y' && c != 'n');
 93 | 
 94 | 	if(c == 'y')
 95 | 	{
 96 | 		float V0[3] = {1, 0, 0};
 97 | 		float V1[3] = {0, 1, 0};
 98 | 		float V2[3];
 99 | 
100 | 		// execute
101 | 		CrossProductPtr(V0, V1, V2);
102 | 
103 | 		printf("output: (%.3f, %.3f, %.3f)\n\n", V2[0], V2[1], V2[2]);
104 | 	}
105 | 
106 | 	free(CrossProductPtr);
107 | 	casm_release(casm);
108 | }
109 | 
110 | 
111 | const char *HelloWorldAsm = 
112 | "    mov     eax,  [esp+8]\n"
113 | "    push    eax\n"
114 | "    call    DWORD [esp+8]\n"
115 | "    pop     ecx\n"
116 | "    ret\n";
117 | 
118 | void testHelloWorld(void)
119 | {
120 | 	CAssembler *casm;
121 | 	int size, c;
122 | 
123 | 	void (*HelloWorldPtr)(void*, const char*);
124 | 
125 | 	// create assembler
126 | 	casm = casm_create();
127 | 
128 | 	// append assembly source
129 | 	casm_source(casm, HelloWorldAsm);
130 | 
131 | 	// calculate size
132 | 	size = casm_compile(casm, NULL, 0);
133 | 
134 | 	if (size < 0) {
135 | 		printf("compile error: %s\n", casm->error);
136 | 		casm_release(casm);
137 | 		return;
138 | 	}
139 | 
140 | 	HelloWorldPtr = (void (*)(void*, const char*))malloc(size);
141 | 
142 | 	// compile into memory
143 | 	casm_compile(casm, (unsigned char*)HelloWorldPtr, size);
144 | 
145 | 	printf("==================== Hello World ====================\n");
146 | 
147 | 	casm_dumpinst(casm, stdout);
148 | 
149 | 	printf("\nExecute code (y/n)?\n\n");
150 | 
151 | 	do
152 | 	{
153 | 		c = getch();
154 | 	}
155 | 	while(c != 'y' && c != 'n');
156 | 
157 | 	if(c == 'y')
158 | 	{
159 | 		// execute
160 | 		HelloWorldPtr((void*)printf, "Hello, World !!\n");
161 | 	}
162 | 
163 | 	free(HelloWorldPtr);
164 | 	casm_release(casm);
165 | }
166 | 
167 | 
168 | const char *AlphaBlendAsm = 
169 | "PROC C1:DWORD, C2:DWORD, A:DWORD\n"
170 | "    movd mm0, A\n"
171 | "    punpcklwd mm0, mm0\n"
172 | "    punpckldq mm0, mm0\n"
173 | "    pcmpeqb mm7, mm7\n"
174 | "    psubw mm7, mm0\n"
175 | "    \n"
176 | "    punpcklbw mm1, C1\n"
177 | "    psrlw mm1, 8\n"
178 | "    punpcklbw mm2, C2\n"
179 | "    psrlw mm2, 8\n"
180 | "    \n"
181 | "    pmullw mm1, mm7\n"
182 | "    pmullw mm2, mm0\n"
183 | "    paddw mm1, mm2\n"
184 | "    \n"
185 | "    psrlw mm1, 8\n"
186 | "    packuswb mm1, mm1\n"
187 | "    movd eax, mm1\n"
188 | "    emms\n"
189 | "    ret\n"
190 | "ENDP\n";
191 | 
192 | 
193 | void testAlphaBlend(void)
194 | {
195 | 	CAssembler *casm;
196 | 	int c;
197 | 
198 | 	int (*AlphaBlendPtr)(int, int, int);
199 | 
200 | 	// create assembler
201 | 	casm = casm_create();
202 | 
203 | 	// append assembly source
204 | 	casm_source(casm, AlphaBlendAsm);
205 | 
206 | 	// compile directly
207 | 	AlphaBlendPtr = (int (*)(int, int, int))casm_callable(casm, NULL);
208 | 
209 | 	if (AlphaBlendPtr == NULL) {
210 | 		printf("error: %s\n", casm->error);
211 | 		casm_release(casm);
212 | 		return;
213 | 	}
214 | 
215 | 	printf("==================== Alpha Blend ====================\n");
216 | 
217 | 	casm_dumpinst(casm, stdout);
218 | 
219 | 	printf("\nExecute code (y/n)?\n\n");
220 | 
221 | 	do
222 | 	{
223 | 		c = getch();
224 | 	}
225 | 	while(c != 'y' && c != 'n');
226 | 
227 | 	if(c == 'y')
228 | 	{
229 | 		// execute
230 | 		int x = AlphaBlendPtr(0x00FF00FF, 0xFF00FF00, 128);
231 | 		printf("output: %.8X\n\n", x);
232 | 	}
233 | 
234 | 	free(AlphaBlendPtr);
235 | 	casm_release(casm);
236 | }
237 | 
238 | 
239 | //! src: ctoken.c, cscanner.c, csynthesis.c, cparser.c, casmpure.c
240 | //! exe: cencoding.c, cinstruct.c, cinstset.c, ckeywords.c, cloader.c
241 | int main(void)
242 | {
243 | 	testCrossProduct();
244 | 	testHelloWorld();
245 | 	testAlphaBlend();
246 | 	return 0;
247 | }
248 | 
249 | 
250 | 


--------------------------------------------------------------------------------
/source/testblit.asm:
--------------------------------------------------------------------------------
 1 | PROC dst:DWORD, src:DWORD, dpitch:DWORD, spitch:DWORD, width:DWORD, height:DWORD, mask:DWORD
 2 | 	local diff1:DWORD
 3 | 	local diff2:DWORD
 4 | 
 5 | 	mov edi, dst
 6 | 	mov esi, src
 7 | 
 8 | 	mov ebx, width
 9 | 	shl ebx, 2				; ebx = width * 4
10 | 
11 | 	mov eax, dpitch		
12 | 	sub eax, ebx
13 | 	mov diff1, eax			; diff1 = dpitch - width * 4
14 | 
15 | 	mov eax, spitch
16 | 	sub eax, ebx
17 | 	mov diff2, eax			; diff2 = spitch - width * 4
18 | 
19 | 	mov ebx, mask
20 | 	mov edx, height
21 | 
22 | ALIGN
23 | loop_line:
24 | 	mov ecx, width
25 | ALIGN
26 | loop_pixel:
27 | 	mov eax, [esi]
28 | 	cmp eax, ebx			; same to color key ??
29 | 	jz @f
30 | 	mov [edi], eax
31 | @@:
32 | 	add esi, 4
33 | 	add edi, 4
34 | 	dec ecx
35 | 	jnz loop_pixel
36 | 
37 | 	add edi, diff1
38 | 	add esi, diff2
39 | 	dec height
40 | 	jnz loop_line
41 | 
42 | 	ret
43 | ENDP
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/source/testblit.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <conio.h>
 4 | #include <string.h>
 5 | #include <ctype.h>
 6 | 
 7 | #include "cencoding.h"
 8 | #include "ckeywords.h"
 9 | #include "cinstruct.h"
10 | #include "cinstset.h"
11 | #include "ctoken.h"
12 | #include "cscanner.h"
13 | #include "csynthesis.h"
14 | #include "cparser.h"
15 | #include "casmpure.h"
16 | 
17 | //dst:DWORD, src:DWORD, dpitch:DWORD, spitch:DWORD, mask:DWORD, width:DWORD, height:DWORD
18 | typedef void (*MaskBlitProc)(void *dst, const void *src, long dpitch, long spitch,
19 | 	int width, int height, unsigned long mask);
20 | 
21 | int BMP1[16] = {
22 | 	1, 1, 1, 1,
23 | 	1, 1, 1, 1,
24 | 	1, 1, 1, 1,
25 | 	1, 1, 1, 1,
26 | };
27 | 
28 | int BMP2[16] = {
29 | 	0, 0, 0, 0,
30 | 	0, 2, 2, 0,
31 | 	0, 2, 2, 0,
32 | 	0, 0, 0, 0,
33 | };
34 | 
35 | void testBlit()
36 | {
37 | 	CAssembler *casm;
38 | 	MaskBlitProc MaskBlit;
39 | 	int ret, i, j;
40 | 	
41 | 	// create assembler
42 | 	casm = casm_create();
43 | 
44 | 	// load assembly source
45 | 	ret = casm_loadfile(casm, "testblit.asm");
46 | 
47 | 	if (ret != 0) {
48 | 		printf("error: %s\n", casm->error);
49 | 		casm_release(casm);
50 | 	}
51 | 
52 | 	MaskBlit = (MaskBlitProc)casm_callable(casm, NULL);
53 | 
54 | 	if (ret != 0) {
55 | 		printf("error: %s\n", casm->error);
56 | 		casm_release(casm);
57 | 	}
58 | 
59 | 	casm_dumpinst(casm, stdout);
60 | 
61 | 	MaskBlit(BMP1, BMP2, 16, 16, 4, 4, 0);
62 | 
63 | 	for (j = 0; j < 4; j++) {
64 | 		for (i = 0; i < 4; i++) 
65 | 			printf("%x ", BMP1[j * 4 + i]);
66 | 		printf("\n");
67 | 	}
68 | 
69 | 	casm_release(casm);
70 | }
71 | 
72 | 
73 | //! src: ctoken.c, cscanner.c, csynthesis.c, cparser.c, casmpure.c
74 | //! exe: cencoding.c, cinstruct.c, cinstset.c, ckeywords.c, cloader.c
75 | int main(void)
76 | {
77 | 	testBlit();
78 | 	return 0;
79 | }
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------