├── COPYING
├── ChangeLog
├── Makefile
├── README
├── README.xcore
├── TODO
├── goal.def
├── hashtable.c
├── insn.def
├── longlong.h
├── run_program.def
├── superopt.c
├── superopt.h
├── synth.def
└── version.h


/COPYING:
--------------------------------------------------------------------------------
  1 | 		    GNU GENERAL PUBLIC LICENSE
  2 | 		       Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
  5 |                           675 Mass Ave, Cambridge, MA 02139, USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 | 			    Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Library General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 | 		    GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 | 			    NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 | 		     END OF TERMS AND CONDITIONS
281 | 
282 | 	Appendix: How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) 19yy  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License
307 |     along with this program; if not, write to the Free Software
308 |     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) 19yy name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Library General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/ChangeLog:
--------------------------------------------------------------------------------
  1 | 2014-09-18  James Pallister  <james.pallister@embecosm.com>
  2 | 	Added the swap AVR instruction.
  3 | 
  4 | 	* superopt.c (output_assembly): Added assembly output for the above
  5 | 	* synth.def: Added calls to the above instructions.
  6 | 
  7 | 2014-09-10  James Pallister  <james.pallister@embecosm.com>
  8 | 	Added the clc and sec AVR instructions.
  9 | 
 10 | 	* superopt.c (output_assembly): Added assembly output for the above
 11 | 	* synth.def: Added calls to the above instructions.
 12 | 
 13 | 2014-09-08  James Pallister  <james.pallister@embecosm.com>
 14 | 	Added the ror, rol, asr and lsr AVR instructions
 15 | 
 16 | 	* superopt.c (output_assembly): Added assembly output for the above
 17 | 	* synth.def: Added calls to the above instructions.
 18 | 
 19 | 2014-09-08  James Pallister  <james.pallister@embecosm.com>
 20 | 	Made the hashtable usage conditional on DO_HASHTABLE
 21 | 
 22 | 	* superopt.c: Added the DO_HASHTABLE preprocessor defined, to decide
 23 | 	whether to use the hashtable speed up or not
 24 | 
 25 | 2014-09-05  James Pallister  <james.pallister@embecosm.com>
 26 | 	Added more instructions to the AVR port: dec, sbc, clr, ser, cpc
 27 | 
 28 | 	* superopt.c (output_assembly): Added assembly output for the above
 29 | 	instrucitons.
 30 | 	* synth.def: Synth the above instructions.
 31 | 
 32 | 2014-09-05  James Pallister  <james.pallister@embecosm.com>
 33 | 	Added the compare with carry instruction definition (cmpc), and updated
 34 | 	cmp to work with other bit widths.
 35 | 
 36 | 	* insn.def: Added CMPC instruction.
 37 | 	* run_program.def: Added a case to execute the CMPC instruction.
 38 | 	* superopt.h: Added CMPC and modified CMP to compute the carry for all bit
 39 | 	widths.
 40 | 
 41 | 2014-08-29  James Pallister  <james.pallister@embecosm.com>
 42 | 	Small bug fix in the pruning method.
 43 | 
 44 | 	* superopt.c (recurse): Use the accepted heuristic counter instead of
 45 | 	total success count, since we are only considering the possible
 46 | 	correctness of the one set of input values, not all.
 47 | 
 48 | 2014-08-29  James Pallister  <james.pallister@embecosm.com>
 49 | 
 50 | 	Added a new pruning method. This method records the state of the registers
 51 | 	and carry if no solutions are found after recursing. Before subsequent
 52 | 	calls to synth this hash table is checked. If the current registers and
 53 | 	carry are in the table, and the allowed cost is lower or equal to the
 54 | 	stored value then the current sequence cannot be optimal, so skip it.
 55 | 
 56 | 	* superopt.c: Include hashtable.c and add a hash_skipped global to count
 57 | 		the amount of pruning.
 58 | 	(recurse): Before calling synth look up the registers and carry in the
 59 | 		table. Don't do this if allowed_cost is small, since it is quicker
 60 | 		just to test in this case. If no more results are found, add registers
 61 | 		and carry to the hashtable
 62 | 	* hashtable.c: New file. Implementation of a hashtable.
 63 | 
 64 | 
 65 | 2014-08-29  James Pallister  <james.pallister@embecosm.com>
 66 | 
 67 | 	* superopt.h: 8 bit word should be selected if the bits per word is 8,
 68 | 		not just if AVR is selected.
 69 | 
 70 | 2014-08-19  James Pallister  <james.pallister@embecosm.com>
 71 | 
 72 | 	* superopt.c: Changed the test counters and limits to 64 bit to
 73 | 		avoid overflow.
 74 | 
 75 | 2014-08-18  James Pallister  <james.pallister@embecosm.com>
 76 | 
 77 | 	* superopt.c: Add a -test-limit option to specify the maximum
 78 | 		number of tests to evaluate.
 79 | 
 80 | 2014-08-18  James Pallister  <james.pallister@embecosm.com>
 81 | 
 82 | 	* goal.def: Changed p24 to work with smaller bit widths.
 83 | 
 84 | 2014-08-18  James Pallister  <james.pallister@embecosm.com>
 85 | 
 86 | 	* superopt.c (output_assembly: Added adc instruction for AVR.
 87 | 	* synth.def (synth): Added adc instruction for AVR.
 88 | 
 89 | 2014-08-18  James Pallister  <james.pallister@embecosm.com>
 90 | 
 91 | 	* superopt.c: Added a test count, for the number of full sequences
 92 | 		that have been evaluated.
 93 | 
 94 | 2014-08-18  James Pallister  <james.pallister@embecosm.com>
 95 | 
 96 | 	* goal.def: To keep compatability don't generate any constants
 97 | 		larger than 32-bits.
 98 | 
 99 | 2014-08-17  James Pallister  <james.pallister@embecosm.com>
100 | 
101 | 	* goal.def: Some of the goals use the 0x80000000 constant. These are
102 | 		changed to be 0x80 when the bit width of the target is 8-bits,
103 | 		for example.
104 | 
105 | 2014-08-17  James Pallister  <james.pallister@embecosm.com>
106 | 
107 | 	* All files: Add AVR support. Support for the add, sub, inc, cmp, xor,
108 | 		or, and, mov instruction.
109 | 
110 | 2014-08-17  James Pallister  <james.pallister@embecosm.com>
111 | 
112 | 	* All files: Fix white space. All tabs converted to whitespace
113 | 
114 | 2010-18-07  Richard Osborne  <richard@xmos.com>
115 | 
116 | 	* superopt.h: Fix incorrect use of unsigned_word type in PERFORM_SEXT macro
117 | 
118 | 2009-28-08  Richard Osborne  <richard@xmos.com>
119 | 
120 | 	* superopt.c: Fix printing of ldaw / lda16.
121 | 	* superopt.h: Fix error in the PERFORM macros for ldaw / lda16.
122 | 
123 | 2009-07-08  Richard Osborne  <richard@xmos.com>
124 | 
125 | 	* All files: Add XCore support
126 | 
127 | Sat Jun  3 01:28:17 1995  Torbjorn Granlund  <tege@matematik.su.se>
128 | 
129 | 	* superopt.c (random_word): Delete unused variable tot_bits.
130 | 
131 | Thu Jun  1 04:05:25 1995  Torbjorn Granlund  <tege@matematik.su.se>
132 | 
133 | 	* superopt.c (init_random_word): Make state1 have char type.
134 | 	Use random() on alpha, since srand48 doesn't work there.
135 | 
136 | Wed May 31 17:08:12 1995  Torbjorn Granlund  <tege@matematik.su.se>
137 | 
138 | 	* superopt.c (test_operands): Add -3, -2, 3, 30, 31, 32, 63, 64.
139 | 	(random_word): Return small numbers with high probability.
140 | 
141 | Sat May 27 18:32:03 1995  Torbjorn Granlund  <tege@matematik.su.se>
142 | 
143 | 	* superopt.c (N_RANDOM_TEST_OPERANDS): Set to 25000.
144 | 	(random_word): Rewrite.
145 | 	(RANDOM): New macro, internal to random_word.
146 | 
147 | 	* goal.def (DBL_SHIFT_LO, DBL_SHIFT_LO_TRUNC): Arity is 2.
148 | 
149 | Wed May 24 07:40:49 1995  Torbjorn Granlund  <tege@phydeaux.cygnus.com>
150 | 
151 | 	* superopt.c (N_RANDOM_TEST_OPERANDS): New macro.
152 | 	(init_test_sets): Use N_RANDOM_TEST_OPERANDS.
153 | 	Also, zero n_words when in declarator.
154 | 	(random_word): Conditionally complement x before loop.
155 | 
156 | 	* synth.def (synth_skip): Loop over dr also for unary operations.
157 | 	* superopt.c (recurse): Delete inline declaration.
158 | 
159 | Tue May 23 01:35:16 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
160 | 
161 | 	* goal.def: Add *_SEL goals.
162 | 
163 | Mon May 22 23:00:31 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
164 | 
165 | 	* Makefile (*.res rules): Use "./" when running superoptimizers.
166 | 	Delete spurious tab after rule.
167 | 	(ALL_MACHINES): Put hppa last.
168 | 
169 | Thu May 18 22:36:58 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
170 | 
171 | 	* synth.def (I960 synth): Break out conditional add and subtract
172 | 	instructions into separate loops, and get the pruning conditions
173 | 	right.
174 | 
175 | 	* synth.def (ALPHA synth): Try CMOVcc with (1) as 2:nd operand.
176 | 	* synth.def (ALPHA synth): Read v every time before executing
177 |         CMOVcc.  Also, don't do CMOVcc with any immediate 1:st operands.
178 | 	(I960 synth): Likewise, but for ADDO_cc_960 and SUBO_cc_960.
179 | 	* run_program.def: Read v every time before executing ADDO_cc_960
180 |         and SUBO_cc_960.
181 | 
182 | 	* superopt.c (ALPHA output_assembly): Fix typo in CMPLEU and CMPLTU.
183 | 
184 | 	* synth.def (I960 synth): Pass CY_0 in insn that sets cc to 100b.
185 | 
186 | Wed May 17 09:19:13 1995  Torbjorn Granlund  <tege@adder.cygnus.com>
187 | 
188 | 	* synth.def (I960 synth): Use CRECURSE_2OP for CHKBIT.
189 | 	* superopt.c (I960 output_assembly): Output two operands for CHKBIT.
190 | 
191 | 	* superopt.c (output_assembly): Use new PERFORM_CONCMPx_960
192 | 	name here too.
193 | 
194 | Tue May 16 00:07:12 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
195 | 
196 | 	* superopt.c (main_synth): Print C code for goal sequence before
197 | 	outputtting the sequences.
198 | 	(all functions): Print to stdout, not stderr, unless an error occured.
199 | 
200 | 	* superopt.h (PERFORM_CONCMPx_960): New name for
201 |         PERFORM_CONCMPx_NO_960.  Rewrite, they were completely wrong.
202 | 	* synth.def, run_program.def, insn.def:
203 | 	Use new name PERFORM_CONCMPx_960.
204 | 
205 | 	* synth.def (I960 synth): Delete I960_1_1 condition on CONCMPcc.
206 | 
207 | 	* Makefile: For i960 build, pass -DI960_1_1.
208 | 
209 | 	* superopt.c (recurse):  Change test of HPPA to HAS_NULLIFICATION
210 | 	in search for goal_value in values array.  Also compare v to
211 | 	goal_value, since it is not yet stored in values array.
212 | 
213 | 	* superopt.h, synth.def: Handle i960 1.1 instructions specifically.
214 | 	(POWER): Fix typo testing for POWERPC.
215 | 
216 | Mon May 15 23:49:56 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
217 | 
218 | 	* synth.def (I960 synth): Fix typos for CONCMPcc instructions.
219 | 	Try SELcc, CONCMPcc, ADDOcc, SUBOcc with immediate arguments.
220 | 	Try LSHUFTR_NT with op1 being 1.
221 | 
222 | Mon May 15 19:10:36 1995  Torbjorn Granlund  <tege@adder.cygnus.com>
223 | 
224 | 	* synth.def (I960 synth): Pass correct prune hint for SELcc.
225 | 	Try SELcc with immediate 0 and 1.
226 | 
227 | Mon May 15 10:28:20 1995  Torbjorn Granlund  <tege@cygnus.com>
228 | 
229 | 	* superopt.c (init_random_word): New function.
230 | 	(main_synth): Call init_random_word.
231 | 
232 | 	* Update copyright headers.
233 | 
234 | 	* superopt.c: Use "assembly", not "assembler" consistently.
235 | 	(main): Default maxmax_cost to 4 (was 5).
236 | 
237 | Sun May 14 12:24:44 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
238 | 
239 | 	* insn.def: Add `<' and `=' as instruction classes.
240 | 	(test_sequence): Use new sequences for suppresion of destination
241 | 	register printing.
242 | 
243 | 	* Fold in i960 port.  All files affected.
244 | 
245 | 	* superopt.h (PERFORM_LSHIFT*): Cast r1 to unsigned_word,
246 | 	not signed_word.
247 | 
248 | 	* superopt.c (recurse): In loop to find goal value when the last insn
249 | 	is nullified, loop from 0, not from goal_function_arity.
250 | 
251 | Sat May 13 12:00:46 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
252 | 
253 | 	* synth.def: New file, move all synth functions here.
254 | 
255 | 	* superopt.c: Include synth.def twice, once for generating non-leaf
256 | 	synth functions, once for generating leaf synth functions, with
257 | 	different definitions of the various RECURSE macros.
258 | 	(SYNTH): New macro.
259 | 	(recurse): Use SYNTH.
260 | 	(main_synth): Likewise.
261 | 	(recurse_last): New function, called by the leaf synth variants.
262 | 
263 | Sun May  7 11:46:50 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
264 | 
265 | 	* superopt.c (HPPA synth): Split into several smaller functions.
266 | 	(synth_nonskip, synth_condskip, synth_skip): New functions.
267 | 
268 | Sat May  6 10:35:25 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
269 | 
270 | 	* superopt.c (ALL synth): Split extract-of-1 and extract-of-2 into
271 | 	separate loops; make latter loops terminate at BITS_PER_WORD-2.
272 | 	(ALL synth): Delete SHIFTS and EXTRACTS macros; use run-time
273 | 	conditionals instead.
274 | 	(flag_shifts, flag_extracts): New variables.
275 | 	(main): Set new variables.
276 | 	* superopt.h (SHIFTS): Delete.
277 | 
278 | Wed May  3 15:35:23 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
279 | 
280 | 	* superopt.c (HPPA synth): Do comiclr with -1 and 1, not just 0.
281 | 	(output_assembler, PYR): Handle rsubw in ADC_CO case.
282 | 
283 | Tue May  2 21:09:37 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
284 | 
285 | 	* superopt.h (EXTRA_SEQUENCE_TESTS, SH): Only detect non-zero immediate
286 | 	values.  Also allow sequences that twice (or more) demand the *same*
287 | 	variable to be allocated to r0.
288 | 
289 | Tue May  2 10:39:22 1995  Torbjorn Granlund  <tege@adder.cygnus.com>
290 | 
291 | 	* superopt.c (PA_RECURSE): Don't increment N_VALUES unconditionally;
292 | 	make it depend in if D equals N_VALUES.
293 | 
294 | Mon May  1 23:04:17 1995  Torbjorn Granlund  <tege@chestnut.cygnus.com>
295 | 
296 | 	* superopt.h (word typedefs):
297 | 	Use long long also when _LONGLONG is defined.
298 | 
299 | Mon May  1 17:59:11 1995  Torbjorn Granlund  <tege@adder.cygnus.com>
300 | 
301 | 	* superopt.c (output_assembler, POWER): For SUB, use INS_SUBF
302 | 	when not immediate operand.
303 | 	(INS_SUBF): New #define.
304 | 
305 | Mon May  1 09:17:25 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
306 | 
307 | 	* superopt.c (main): Use exit instead of return consistently.
308 | 	(output_assembler, HPPA): Handle EXT[SU][12]_S and ROTATEL_S.
309 | 
310 | Sun Apr 30 00:14:14 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
311 | 
312 | 	* superopt.c (output_assembler, ALPHA, case ADD): Cast immediate
313 | 	value to int.
314 | 
315 | 	* superopt.c (RISC synth): Try COPY of registers for ALPHA.
316 | 
317 | 	* superopt.c (HPPA synth): Also COPY 0 (it might be nullified).
318 | 	(HPPA synth): Correct several typos for COPY_S variants.
319 | 
320 | 	* superopt.h (PSTR): Define as appropriate.
321 | 	* superopt.c (print_operand): New function.  Use PSTR.
322 | 	(test_sequence): Move operand printing code to print_operand.
323 | 
324 | 	* superopt.c (RISC synth): Try COPY of immediates also for ALPHA.
325 | 	(output_assembler, ALPHA): Handle COPY.
326 | 	(RISC synth): Don't do cmpltu(r,0) or cmpleu(0,r) or cmplt(r,0).
327 | 
328 | 	* superopt.h (TRUNC_CNT): Use % instead of & for portability.
329 | 	(inline): Define to empty also if DEBUG.
330 | 
331 | 	* superopt.c (synth): Add TIMING stuff to all variants of synth.
332 | 	Also, make type of time_start agree with type of cputime().
333 | 
334 | Sat Apr 29 09:32:58 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
335 | 
336 | 	* longlong.h (C umul_ppmm): Use UWtype, not USItype for temps.
337 | 	(udiv_qrnnd): For cases implemented with call to __udiv_qrnnd,
338 | 	protect with new symbol LONGLONG_STANDALONE.
339 | 
340 | 	* goal.def (CLEAR_LSB): Fix typo.
341 | 
342 | 	* longlong.h: Replace with version from GNU MP.
343 | 	* superopt.h: Set up #defines for new longlong.h.
344 | 
345 | Sat Apr 29 01:43:25 1995  Torbjorn Granlund  <tege@phydeaux.cygnus.com>
346 | 
347 | 	* superopt.h (FF1_CHECK): Delete.
348 | 	(PERFORM_FF1): Don't use FF1_CHECK.
349 | 	(PERFORM_CLZ): Rewrite to handle 64 bit words.
350 | 	(PERFORM_FFS): Use BITS_PER_WORD, not the constant 32.
351 | 
352 | 	* superopt.c (main): When printing list of goals, terminate with \n.
353 | 
354 | 	* superopt.c (RISC synth): Try ADD with immediate -1.
355 | 	(output_assembler, ALPHA): Handle ADD with negative s2.
356 | 
357 | 	* goal.def: Delete redundant goal divide_by_minus_2e31.
358 | 
359 | Fri Apr 28 11:39:51 1995  Torbjorn Granlund  <tege@adder.cygnus.com>
360 | 
361 | 	* superopt.c (HPPA synth): In code protected by #if EXTRACTS,
362 | 	loop to 30, not 31.
363 | 	(RISC synth): Likewise.
364 | 	(output_assembler, HPPA): Handle EXT[SU][12].
365 | 
366 | Thu Apr 27 10:46:24 1995  Torbjorn Granlund  <tege@rtl.cygnus.com>
367 | 
368 | 	* run_program.def (run_program): Use memset, not bzero.
369 | 
370 | 	* superopt.c (output_assembler, HPPA): Handle COPY_S.
371 | 	Rearrange code for plain COPY.  Enable unconditionally nullifying
372 | 	shift/rotate/extract.
373 | 	(HPPA synth): Delete spurious duplicate code within #if SHIFTS
374 | 	in the conditional-nullify block.  Delete spurious 0-ary and copy code
375 | 	in the same block.  Add systematically missing _S to
376 | 	shift/rotate/extract instruction names in unconditional-nullify block;
377 | 	Correct and enable 0-ary instructions and variants of COPY_S.
378 | 
379 | 	* superopt.h (PERFORM_ROTATEL): Check TRUNC_CNT(r2), not plain r2.
380 | 	(PERFORM_ROTATEL_S): Likewise.
381 | 
382 | 	* superopt.h (PERFORM_COPY_S): Define.
383 | 	(PERFORM_*SHIFT*_S, PERFORM_ROTATEL_S, PERFORM_EXT*_S): Define
384 | 	* insn.def: Corresponding changes.
385 | 	* run_program.def: Corresponding changes.
386 | 
387 | Tue Apr 25 18:58:26 1995  Torbjorn Granlund  <tege@adder.cygnus.com>
388 | 
389 | 	* Makefile (superopt): Depend on HDRS.
390 | 	(HDRS): Add many missing included files.
391 | 	(superopt-*): Depend on HDRS.
392 | 	(superopt.o): Delete rule.
393 | 	(superopt): Depend on SRCS, not OBJS.  Corresponding change to rule.
394 | 
395 | Sat Apr 22 18:51:59 1995  Torbjorn Granlund  <tege@adder.cygnus.com>
396 | 
397 | 	* Makefile (FILES): Add ChangeLog (again).
398 | 
399 | 	* superopt.c (CISC synth): Handle all immediate counts for MC68020
400 | 	(if -DSHIFTS), not just 31.
401 | 
402 | Wed Mar 15 09:20:46 1995  Michael Meissner  <meissner@tiktok.cygnus.com>
403 | 
404 | 	* Makefile (CFLAGS): Add new macro MACHINE to override the machine
405 | 	desired.
406 | 	(superopt): New alternate name for gso.
407 | 	(install): New rule.
408 | 	(all, install-all): New rule to build superopt for all of the
409 | 	supported machines.
410 | 
411 | 	* superopt.h (POWEPC): Define if _ARCH_PPC is defined.
412 | 	(I386): Also check __i386__.
413 | 
414 | 	* superopt.c (random_word): Eliminate inline, since it was
415 | 	previously declared without it.
416 | 	(output_assembler): For PowerPC, use PowerPC instruction names,
417 | 	rather than Power.  Abort if Power only instructions used.
418 | 	(test_sequence): Print newline between patterns if -nl.
419 | 	(main): Recognize -nl switch.  If unknown switch, print a usage
420 | 	message, including all supported goal functions.
421 | 
422 | Sun Nov 13 22:59:42 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
423 | 
424 | 	* superopt.c (CISC synth): Use SHIFTS macro here too, just like in
425 | 	RISC synth.
426 | 	* superopt.h (SHIFTS): Make sure it is defined to 1 or 0.
427 | 
428 | Tue Nov  8 01:33:40 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
429 | 
430 | 	* superopt.c (synth): Add missing SH conditional for "subc rx,ry"...
431 | 	(synth): Generate "add rx,ry" and "sub rx,ry" for SH...
432 | 
433 | 	superopt.c (synth): Fix several SH-specific typos with prune hint
434 | 	setting.
435 | 
436 | 	* superopt.h (EXTRA_SEQUENCE_TESTS): New macro; define for SH.
437 | 	* superopt.c (test_sequence): Use EXTRA_SEQUENCE_TESTS.
438 | 
439 | 	* superopt.c (CISC synth): Generate ext[su].[bw], dt, swap.w, xtrct,
440 | 	and tst rx,ry for SH.
441 | 	(output_assembler): Print them.
442 | 	superopt.h: Handle immediates 0xff and 0xffff.
443 | 	(init_immediates): Initialize `values' with new immediates.
444 | 
445 | 	superopt.c (synth): Shifts with count > 1 doesn't set T on the SH.
446 | 	(output_assembler, SH): Add support for carry-free shifts.
447 | 
448 | 	* All files (EXTS8, EXTS16, CYAND, DECR_CYEQ, MERGE16): New opcodes.
449 | 
450 | Sat Nov  5 13:21:11 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
451 | 
452 | 	* superopt.c (synth): Use ASHIFTR_CON for POWER consistently,
453 | 	never use ASHIFTR.
454 | 
455 | 	superopt.c (synth): Delete ROTATEXL_CIO with count BITS_PER_WORD-1.
456 | 	(synth): Try ROTATEXR_CIO with count 1.
457 | 	superopt.h (PERFORM_ROTATEXR_CIO): New definition.
458 | 	(PERFORM_ROTATEXL_CIO): Rewrite.
459 | 	(PERFORM_ROTATEL_CIO): Make sure we use logical shifts.
460 | 	(PERFORM_ROTATER_CIO): New definition.
461 | 	insn.def, run_program.def: Add new instructions.
462 | 	(output_assembler, M68000): Handle rotate right instructions.
463 | 
464 | 	* superopt.c (random_word): Use mrand48 for __svr4__.
465 | 	(operand_names): Merge I386 and PYR.
466 | 	(output_assembler, I386, ADD): Output "decl" here...
467 | 	(output_assembler, I386, SUB): ...not here.
468 | 
469 | 	(synth): Bump shift cost for I386 from 2 to 1.
470 | 	(synth): Merge I386 shift patterns that now became identical to
471 | 	other patterns.
472 | 
473 | 	(CISC synth): Don't try ROTATEXL_CIO with count 1; ADD_CIO performs
474 | 	the very same operation.
475 | 
476 | 	* insn.def (CPEQ): Use 'c' for commutative.
477 | 	(CPNEQ, CMPEQ): Likewise.
478 | 	(COMCY): This is not a binary operation; use class 'x'.
479 | 
480 | 	* superopt.c (main): Print target information for -v.
481 | 	* superopt.h (TARGET_STRING): New #define.
482 | 
483 | 	* Ported to Hitatchi SH.  Most files affected.
484 | 
485 | Sun Jul 17 04:34:49 1994  Torbjorn Granlund  (tege@tiny.cygnus.com)
486 | 
487 | 	* superopt.c (HPPA synth): Fix typos for shifting conditionalized on
488 | 	SHIFTS.
489 | 
490 | 	* Makefile (FILES): Include ChangeLog.
491 | 
492 | Thu Jun 16 19:41:10 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
493 | 
494 | 	* superopt.c (output_assembler, I386, case AND_RC):
495 | 	Fix typo in condition for andb.
496 | 	(case IOR_RC): Likewise.
497 | 	(case XOR_RC): Likewise.
498 | 	(CISC synth): Try "movl $0,d" for I386.
499 | 
500 | Sun Jan 30 22:35:02 1994  Torbjorn Granlund  (tege@adder.cygnus.com)
501 | 
502 | 	* superopt.c (ffs_internal): Initialize ci to silent compiler
503 | 	warning.
504 | 	* superopt.c (header): Declare random_word.
505 | 
506 | Sat Oct 16 21:21:51 1993  Torbjorn Granlund  (tege@adder.cygnus.com)
507 | 
508 | 	* superopt.c (CISC synth): Fix typo, MC68020 was M68020.
509 | 
510 | Mon Jul 12 20:37:12 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
511 | 
512 | 	* superopt.c (CISC synth): Try BSF86 for I386.
513 | 	* superopt.h, run_program.def, insn.def: Add defs for BSF86.
514 | 
515 | Fri May 28 11:59:43 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
516 | 
517 | 	* superopt.c (RISC synth): Try subf on POWERPC.
518 | 	(output_assembler)[POWER]: Handle subf.
519 | 
520 | Mon May 24 09:46:56 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
521 | 
522 | 	* All files: Conditionalize on POWER, not RS6000.
523 | 	* superopt.h: Add handling of POWERPC.
524 | 	* superopt.c: Exclude ABS, NABS, DOZ if POWERPC.
525 | 
526 | Sun Feb 21 14:21:20 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
527 | 
528 | 	* superopt.h (struct insn_t): Make opcode field wider at expense of
529 | 	other fields.
530 | 
531 | 	* superopt.c (HPPA synth): Make pruning less agressive after
532 | 	nullifying insn.
533 | 	* run_program.def: Special case for N_INSNS == 0.
534 | 
535 | 	* superopt.c (ALPHA synth, CMOV*): Move pruning test inside DR loop.
536 | 	Add condition (DR != LAST_DEST).
537 | 
538 | 	* superopt.c (outside of functions): Declare malloc and realloc.
539 | 	* Ported to HP-PA.  Most files affected.
540 | 
541 | Thu Feb 18 21:23:17 1993  Torbjorn Granlund  (tege@pde.nada.kth.se)
542 | 
543 | 	* superopt.h (VALUE_MIN_SIGNED): Make it work for non 32-bit
544 | 	computers.
545 | 	(VALUE_MAX_SIGNED): Likewise.
546 | 	(__immediate_val): Use VALUE_MIN_SIGNED and VALUE_MAX_SIGNED.
547 | 
548 | Mon Feb 15 11:41:16 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)
549 | 
550 | 	* version.h: Now 2.2.
551 | 
552 | 	* superopt.c (operand_names): Define constants up to 63 for ALPHA.
553 | 	(operand_names): Remove ...0,0,0... as array filler.
554 | 
555 | 	* superopt.h (BITS_PER_WORD): Conditionalize on ALPHA.
556 | 	(unsigned_word, signed_word): Define depending on BITS_PER_WORD and
557 | 	compiler.
558 | 
559 | Sun Feb 14 20:25:05 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)
560 | 
561 | 	* superopt.c (output_assembler): Define Alpha asm syntax.
562 | 	* run_program.def: Special code for CMOVxx.
563 | 
564 | Sat Feb 13 01:10:06 1993  Torbjorn Granlund  (tege@cyklop.nada.kth.se)
565 | 
566 | 	* Ported to Alpha.  Most files affected.
567 | 
568 | Sat Jan  2 15:50:40 1993  Torbjorn Granlund  (tege@sics.se)
569 | 
570 | 	* superopt.h (PERFORM_FFS): New macro.
571 | 	* superopt.c (ffs_internal): New function.
572 | 	* goal.def (FFS): Use ffs_internal.
573 | 
574 | 	* superopt.c (output_assembler)[I386]: Fix typo, %s -> %d, two
575 | 	places.
576 | 
577 | Thu Dec 17 13:58:33 1992  Torbjorn Granlund  (tege@sics.se)
578 | 
579 | 	* superopt.c (output_assembler)[RS6000,AM29K]: Handle CLZ.
580 | 
581 | Sat Dec 12 15:40:05 1992  Torbjorn Granlund  (tege@sics.se)
582 | 
583 | 	* version.h: Now 2.1.
584 | 
585 | 	* Makefile (dist): Make sed command more robust.
586 | 
587 | 	* superopt.h: Handle MC68000 and MC68020, not M68000.
588 | 	* superopt.h: Define M68000 #if MC68000 || MC68020.
589 | 	* superopt.h (SHIFT_COST): Define.  For MC68000 it's depends on
590 | 	count.
591 | 	* superopt.c (CISC synth): Use SHIFT_COST for all shifting on
592 | 	MC68000.
593 | 	* superopt.c (CISC synth): Try logical operation with #1.
594 | 	* superopt.c (output_assembler)[M68000 AND,IOR,XOR]: Output
595 | 	operation with small immediates using word (w) suffix.
596 | 	* superopt.c (output_assembler)[I386 AND,IOR,XOR]: Likewise.
597 | 
598 | 	* superopt.c (output_assembler)[M88000 ADC_CI]: Output subu.ci, not
599 | 	subu.co.
600 | 
601 | 	* superopt.c (RISC synth)[0-ary instructions]: Major enhancements.
602 | 
603 | 	Next 3 from Paul Eggert:
604 | 	* goal.def (DIVIDE_BY_MINUS_2e31): New name flo DIVIDE_BY_2e31,
605 | 	better describing the operation.
606 | 	* goal.def (DEF_GOAL for GRAY, GRAY2, DIVIDE_BY_MINUS_2e31): Avoid
607 | 	overflow by shifting -1 instead of 1.
608 | 	* superopt.h (__immediate_val): Avoid overflow by casting 1 to word.
609 | 	overflow.)
610 | 
611 | 	* superopt.c (timings): New name for 'time'.  (Clashed with defs in
612 | 	<time.h>).
613 | 
614 | 	* superopt.c (recurse): Assign sequence[n_insn] using constructor
615 | 	#if __GNUC__.  (Gives better code.)
616 | 
617 | 	* superopt.c: Update comments.
618 | 
619 | 	* superopt.c (ARITH_BITS #define): Remove.  (Not used any more.)
620 | 
621 | 	* superopt.c (cputime): #if USG, use clock() instead of getrusage.
622 | 
623 | 	* superopt.c (init_test_sets): Remove #ifdef UDIV_WITH_SDIV code.
624 | 	(Obsolete.)
625 | 
626 | Wed Dec  9 14:58:17 1992  Torbjorn Granlund  (tege@sics.se)
627 | 
628 | 	* superopt.c (init_test_sets): Remove unused label 'next'.
629 | 
630 | 	* superopt.c (CISC synth, I386, PYR)[COPY 0 => v]: Cost is 1.
631 | 	Pass n_values for s1,s2,d operands.  (Bug fix.)
632 | 
633 | 	* superopt.c (RISC synth, SPARC)[r1 + 1 - cy]: Try this, with and
634 | 	without generating carry out.  (Bug fix.)
635 | 
636 | 	* superopt.h: Define __CLOBBER_CC and __AND_CLOBBER_CC.
637 | 	(sparc asm PERFORM macros): Use __CLOBBER_CC.  (Bug fixes.)
638 | 
639 | Sat Nov 28 13:50:09 1992  Torbjorn Granlund  (tege@sics.se)
640 | 
641 | 	* version.h: Now 2.0.
642 | 
643 | 	* superopt.c (init_test_sets): Remove code inside UDIV_WITH_SDIV
644 | 	conditional.
645 | 
646 | 	* superopt.c (output_assembler): Output pyramid assembler.
647 | 	Output MUL for all CPUs that have it.
648 | 	* superopt.c (CISC synth): Fix many pyramid-related errors.
649 | 	* superopt.c (random_word): #ifdef hpux, use mrand48.
650 | 
651 | 	* superopt.h, superopt.c (synth), insn.def, run_program.def:
652 | 	UMULWIDEN_LO => MUL, PERFORM_UMULWIDEN_LO => PERFORM_MUL.
653 | 	* superopt.h (PERFORM_MUL): Simply use (r1 * r2), don't call
654 | 	umul_ppmm.
655 | 	* superopt.h: Define all PERFORM_* macros unconditionally.
656 | 	* insn.def: Remove #ifdef DM conditionals.
657 | 	* goal.def (UMULH): New goal.
658 | 	* goal.def: Remove #ifdef DM conditionals.  Include some goals only
659 | 	with GCC.  Switch off division goals due to domain problems.
660 | 	* run_program.def: Remove #ifdef DM conditionals.
661 | 
662 | 	* superopt.c (synth): Try shifts by 16, #if SHIFT16.
663 | 
664 | 	* superopt.h: Include longlong.h unconditionally.
665 | 
666 | 	* Makefile (FILES): Add longlong.h.
667 | 	(dist): Rewrite to have tar file creating a directory.
668 | 
669 | Tue Jul 28 15:05:09 1992  Torbjorn Granlund  (tege@sics.se)
670 | 
671 | 	* goel.def: Add new goals for signed division.
672 | 
673 | 	* Makefile: Create superopt-VERSION.tar.Z.
674 | 
675 | 	* version.h: New file.
676 | 	* superopt.c: Include "version.h".
677 | 	* superopt.c (main): Handle `-version' option.
678 | 
679 | 	* superopt.c (main): Better error messages.
680 | 
681 | 	* superopt.c (main): Move initialization of goal_function to handle
682 | 	empty command lines.
683 | 
684 | 	* superopt.c: Add timing per recursion level #ifdef TIMING.
685 | 
686 | Thu Jun 25 20:10:23 1992  Torbjorn Granlund  (tege@sics.se)
687 | 
688 | 	* superopt.c (test_sequence): Remove STATISTICS.
689 | 	(recurse): Put it here.
690 | 
691 | 	* superopt.c (random_word): Back to random.
692 | 
693 | 	* superopt.c (recurse): Make it static.
694 | 
695 | 	* superopt.c (RISC synth): Don't copy 0 on sparc and 88k.
696 | 
697 | Fri Jun 12 17:16:54 1992  Tom Wood  (wood@gen-rtx.rtp.dg.com)
698 | 
699 | 	* superopt.c (test_sequence): Make n_test_operands const to avoid
700 | 	re-computation.
701 | 
702 | Thu Jun 11 23:23:26 1992  Torbjorn Granlund  (tege@sics.se)
703 | 
704 | 	* Version 1.91.
705 | 
706 | 	* superopt.c (main): Check argc before calling atoi.
707 | 
708 | Thu Jun 11 20:38:20 1992  Tom Wood  (wood@gen-rtx.rtp.dg.com)
709 | 
710 | 	* goal.def: New file.
711 | 	* superopt.[ch]: Use goal.def.
712 | 
713 | Thu Jun 11 19:19:37 1992  Torbjorn Granlund  (tege@sics.se)
714 | 
715 | 	* Version 1.90.
716 | 
717 | Thu Jun 11 10:38:20 1992  Tom Wood  (wood@gen-rtx.rtp.dg.com)
718 | 
719 | 	* insn.def: New file.
720 | 	* superopt.[ch]: Use insn.def.
721 | 
722 | 	* superopt.c (output_assembler, m88k COPY): Specify use of r0 and
723 | 	print small constants right.
724 | 
725 | 	* superopt.c (synth): Correct indentation.
726 | 
727 | 	* superopt.c (main): Add -all option to run thorough all know goal
728 | 	functions.  This can be done quickly with -max-cost 2.
729 | 
730 | 	* superopt.h (IMMEDIATE_VAL): Evaluate sparse values properly.
731 | 
732 | 	* superopt.h (PERFORM_ADD_CIO, PERFORM_ADC_CIO): Operand 0 is
733 | 	written before the inputs are read.
734 | 
735 | 	* superopt.h (PERFORM_CMPPAR): Add m88110 bits and don't check
736 | 	with the native cmp instruction.
737 | 
738 | Thu Jun 11 02:37:01 1992  Torbjorn Granlund  (tege@sics.se)
739 | 
740 | 	* Version 1.16.
741 | 
742 | 	* superopt.h: Hack PERFORM_{CLZ,FF1} to be faster.
743 | 	* superopt.c (clz_tab): Corresponding changes.
744 | 	* superopt.c (ff1_tab): New table for PERFORM_FF1.
745 | 
746 | 	* superopt.c (RISC synth): Try ADD_CI(x,x).
747 | 
748 | 	* superopt.c (main_synth): Always pass NO_PRUNE to synth.
749 | 
750 | 	* superopt.c (output_assembler): Generalize "cmp" output.
751 | 
752 | 	* superopt.c (synth): Rename cy_in to ci, and cy_out to co.
753 | 
754 | 	* superopt.h (sparc asm): Clobber "cc".
755 | 	* superopt.h: Include asm iff USE_ASM is defined.  Off by default.
756 | 
757 | Wed Jun 10 15:40:45 1992  Tom Wood  (wood@gen-rtx.rtp.dg.com)
758 | 
759 | 	* superopt.c (output_assembler): Fix typos in 88k assembler.
760 | 	* superopt.c (operand_names): Make the constant names agree with
761 | 	the new values.
762 | 
763 | 	* run_program.def (run_program): Have this return the value of the
764 | 	carry flag or -1 if the flag was never set.
765 | 	* superopt.h: run_program now returns an int.
766 | 	* superopt.c (main_synth): Allow the specification of an initial
767 | 	sequence of instructions and provide an example sequence.
768 | 
769 | 	* superopt.h (PERFORM_FF1, PERFORM_CMPPAR): When running native,
770 | 	compare the native instruction's output to the generic
771 | 	computation.
772 | 
773 | Thu Jun 10 02:00:22 1992  Torbjorn Granlund  (tege@sics.se)
774 | 
775 | 	* Version 1.15.
776 | 
777 | 	* Add for CLZ, CTZ, and 88k's FF0, FF1, EXT*, and CMPPAR.
778 | 	* superopt.[ch]: Rewrite handling of immediates to allow arbitrary
779 | 	shifts.
780 | 
781 | Sat Jun  6 20:04:03 1992  Torbjorn Granlund  (tege@sics.se)
782 | 
783 | 	* Version 1.14.
784 | 
785 | 	* superopt.c: 88k subu.c* and addu.c* insn were incorrectly asumed
786 | 	to accept immediate values.
787 | 
788 | Wed Apr  1 22:03:04 1992  Torbjorn Granlund  (tege@sics.se)
789 | 
790 | 	* superopt.[ch], run_program.def: ROTATEXL_CIO, new name for
791 | 	ROTATEXL_CO.
792 | 	* superopt.c: Only use ROTATEXL_CIO when carry is defined.
793 | 
794 | 	* superopt.c (test_sequence): Add some values to test_operands
795 | 	vector.
796 | 	* superopt.c (RISC synth): Try add of immediate 1.
797 | 	* superopt.[ch]: Add new goals for GS paper.
798 | 
799 | Thu Mar  5 05:56:12 1992  Torbjorn Granlund  (tege@sics.se)
800 | 
801 | 	* superopt.[ch], run_program.def: Ported to pyramid.  Added
802 | 	<LOGICAL>_CC internal insns.
803 | 
804 | Fri Feb 14 23:19:11 1992  Torbjorn Granlund  (tege@sics.se)
805 | 
806 | 	* superopt.c (output_assembler M88100): Handle negative values for
807 | 	ADD_CIO, by outputting subu.
808 | 	* superopt.c (RISC synth): Fix comment add ADD_CIO (..., -1).
809 | 
810 | Thu Dec 12 21:54:14 1991  Torbjorn Granlund  (tege@sics.se)
811 | 
812 | 	* superopt.c (test_sequence): Add comments.
813 | 
814 | Tue Dec 10 21:37:01 1991  Torbjorn Granlund  (tege@sics.se)
815 | 
816 | 	* superopt.c (main_synth): Make nested for loops have different
817 | 	induction variables...
818 | 
819 | Thu Dec  5 19:33:40 1991  Torbjorn Granlund  (tege@sics.se)
820 | 
821 | 	* superopt.c (output_assembler several places): Cast IMMEDIATE_VAL
822 | 	to signed_word when comparing to zero.
823 | 
824 | Wed Nov 13 21:20:11 1991  Torbjorn Granlund  (tege@sics.se)
825 | 
826 | 	* superopt.c (main_synth): Hack to generate the initial random
827 | 	arguments such that the goal function take a value != 0.
828 | 
829 | Mon Nov 11 11:41:50 1991  Torbjorn Granlund  (tege@sics.se)
830 | 
831 | 	* superopt.c (output_assembler RS6000 and M88000):  Handle (op1 & 1).
832 | 	* superopt.c (RISC synth): Try (op1 & 1) on all machines.
833 | 
834 | Sat Nov  2 16:24:35 1991  Torbjorn Granlund  (tege@sics.se)
835 | 
836 | 	* superopt.c (RISC synth): Try 29k CPXX insns with immediate 0.
837 | 
838 | 	* superopt.c (test_sequence): Output small negative operands in
839 | 	decimal.
840 | 
841 | Fri Oct 25 01:01:54 1991  Torbjorn Granlund  (tege@sics.se)
842 | 
843 | 	* superopt.c (test_sequence): Test with VALUE_MAX_SIGNED too.
844 | 
845 | 	* superopt.c (CISC synth): Try CMP operation for VALUE_MAX_SIGNED
846 | 	and VALUE_MIN_SIGNED on '386.
847 | 
848 | 	* superopt.c, superopt.h, run_program.def: Add new operation
849 | 	ROTATEXL_CO.  Try it for CISCs.
850 | 	* ROTATEL new name for ROTATE.
851 | 
852 | 	* superopt.c (output_assembler I386): Fix syntax for if statement.
853 | 
854 | 	* superopt.h (PERFORM_*SHIFT*, PERFORM_ROTATE*): Truncate shift
855 | 	counts with BITS_PER_WORD.
856 | 	* superopt.h (TRUNC_CNT): New macro for shift count truncation.
857 | 
858 | 	* superopt.c (output_assembler M68000 COPY): Output "moveq" for
859 | 	negative numbers as intended, by casting operands to signed_word.
860 | 
861 | 	* superopt.c (RISC synth): Try SUB for two regs even on M88000.
862 | 
863 | 	* superopt.c (RISC synth, CISC synth): Try rotate and shift
864 | 	instruction with count 1.
865 | 	* superopt.c (output_assembler): Output rotate instructions.
866 | 
867 | 	* superopt.c, superopt.h, run_program.def: Add new operation
868 | 	"ASHIFTR_CON" for RS/6000 arithmetic right shifts.  (The
869 | 	arithmetic shift insns used to be incorrectly described as not
870 | 	affecting carry.)
871 | 
872 | Local Variables:
873 | mode: indented-text
874 | left-margin: 8
875 | fill-column: 76
876 | version-control: never
877 | End:
878 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for GNU superoptimizer
  2 | 
  3 | MAXCOST = 3
  4 | EXTRA = -shifts -extracts
  5 | 
  6 | CC = gcc
  7 | DBG = -g
  8 | OPT = -O
  9 | CFLAGS = $(CPU) $(OPT) $(DBG)
 10 | ALL_MACHINES =	superopt-sparc \
 11 | 		superopt-power \
 12 | 		superopt-powerpc \
 13 | 		superopt-m88000 \
 14 | 		superopt-am29k \
 15 | 		superopt-mc68000 \
 16 | 		superopt-mc68020 \
 17 | 		superopt-i386 \
 18 | 		superopt-i960a \
 19 | 		superopt-i960b \
 20 | 		superopt-pyr \
 21 | 		superopt-alpha \
 22 | 		superopt-sh \
 23 | 		superopt-hppa \
 24 | 		superopt-xcore \
 25 | 		superopt-avr
 26 | 
 27 | OBJS	= superopt.o
 28 | SRCS	= superopt.c
 29 | HDRS	= run_program.def insn.def goal.def synth.def superopt.h version.h \
 30 | 	  longlong.h
 31 | BINDIR	= /usr/local/bin
 32 | INSTALL	= install -c
 33 | FILES	= README COPYING Makefile TODO ChangeLog superopt.c synth.def $(HDRS)
 34 | 
 35 | VERSION = `sed 's,char \*version_string = "\([0-9.]*\)";,\1,' < version.h`
 36 | 
 37 | superopt: $(SRCS) $(HDRS)
 38 | 	$(CC) $(CFLAGS) -o superopt $(SRCS)
 39 | 
 40 | clean:
 41 | 	rm -f $(OBJS) superopt $(ALL_MACHINES) *.tmp
 42 | 
 43 | install: superopt
 44 | 	$(INSTALL) superopt $(BINDIR)/superopt
 45 | 
 46 | all: $(ALL_MACHINES)
 47 | 
 48 | install-all: all
 49 | 	for x in $(ALL_MACHINES); do $(INSTALL) $$x $(BINDIR)/$$x; done
 50 | 
 51 | run-all: sparc.res power.res powerpc.res m88000.res am29k.res mc68000.res \
 52 | 	mc68020.res i386.res i960a.res i960b.res pyr.res alpha.res sh.res \
 53 | 	hppa.res
 54 | 	@echo "Done!"
 55 | 
 56 | superopt-sparc: $(SRCS) $(HDRS)
 57 | 	$(CC) $(CFLAGS) -DSPARC -o superopt-sparc $(SRCS)
 58 | sparc.res: superopt-sparc
 59 | 	./superopt-sparc -all $(EXTRA) -max $(MAXCOST) -as >sparc.tmp 2>&1
 60 | 	mv sparc.tmp sparc.res
 61 | 
 62 | superopt-power: $(SRCS) $(HDRS)
 63 | 	$(CC) $(CFLAGS) -DPOWER -o superopt-power $(SRCS)
 64 | power.res: superopt-power
 65 | 	./superopt-power -all $(EXTRA) -max $(MAXCOST) -as >power.tmp 2>&1
 66 | 	mv power.tmp power.res
 67 | 
 68 | superopt-powerpc: $(SRCS) $(HDRS)
 69 | 	$(CC) $(CFLAGS) -DPOWERPC -o superopt-powerpc $(SRCS)
 70 | powerpc.res: superopt-powerpc
 71 | 	./superopt-powerpc -all $(EXTRA) -max $(MAXCOST) -as >powerpc.tmp 2>&1
 72 | 	mv powerpc.tmp powerpc.res
 73 | 
 74 | superopt-m88000: $(SRCS) $(HDRS)
 75 | 	$(CC) $(CFLAGS) -DM88000 -o superopt-m88000 $(SRCS)
 76 | m88000.res: superopt-m88000
 77 | 	./superopt-m88000 -all $(EXTRA) -max $(MAXCOST) -as >m88000.tmp 2>&1
 78 | 	mv m88000.tmp m88000.res
 79 | 
 80 | superopt-am29k: $(SRCS) $(HDRS)
 81 | 	$(CC) $(CFLAGS) -DAM29K -o superopt-am29k $(SRCS)
 82 | am29k.res: superopt-am29k
 83 | 	./superopt-am29k -all $(EXTRA) -max $(MAXCOST) -as >am29k.tmp 2>&1
 84 | 	mv am29k.tmp am29k.res
 85 | 
 86 | superopt-mc68000: $(SRCS) $(HDRS)
 87 | 	$(CC) $(CFLAGS) -DMC68000 -o superopt-mc68000 $(SRCS)
 88 | mc68000.res: superopt-mc68000
 89 | 	./superopt-mc68000 -all $(EXTRA) -max $(MAXCOST) -as >mc68000.tmp 2>&1
 90 | 	mv mc68000.tmp mc68000.res
 91 | 
 92 | superopt-mc68020: $(SRCS) $(HDRS)
 93 | 	$(CC) $(CFLAGS) -DMC68020 -o superopt-mc68020 $(SRCS)
 94 | mc68020.res: superopt-mc68020
 95 | 	./superopt-mc68020 -all $(EXTRA) -max $(MAXCOST) -as >mc68020.tmp 2>&1
 96 | 	mv mc68020.tmp mc68020.res
 97 | 
 98 | superopt-i386: $(SRCS) $(HDRS)
 99 | 	$(CC) $(CFLAGS) -DI386 -o superopt-i386 $(SRCS)
100 | i386.res: superopt-i386
101 | 	./superopt-i386 -all $(EXTRA) -max $(MAXCOST) -as >i386.tmp 2>&1
102 | 	mv i386.tmp i386.res
103 | 
104 | superopt-i960a: $(SRCS) $(HDRS)
105 | 	$(CC) $(CFLAGS) -DI960 -o superopt-i960a $(SRCS)
106 | i960a.res: superopt-i960a
107 | 	./superopt-i960a -all $(EXTRA) -max $(MAXCOST) -as >i960a.tmp 2>&1
108 | 	mv i960a.tmp i960a.res
109 | 
110 | superopt-i960b: $(SRCS) $(HDRS)
111 | 	$(CC) $(CFLAGS) -DI960B -o superopt-i960b $(SRCS)
112 | i960b.res: superopt-i960b
113 | 	./superopt-i960b -all $(EXTRA) -max $(MAXCOST) -as >i960b.tmp 2>&1
114 | 	mv i960b.tmp i960b.res
115 | 
116 | superopt-pyr: $(SRCS) $(HDRS)
117 | 	$(CC) $(CFLAGS) -DPYR -o superopt-pyr $(SRCS)
118 | pyr.res: superopt-pyr
119 | 	./superopt-pyr -all $(EXTRA) -max $(MAXCOST) -as >pyr.tmp 2>&1
120 | 	mv pyr.tmp pyr.res
121 | 
122 | superopt-alpha: $(SRCS) $(HDRS)
123 | 	$(CC) $(CFLAGS) -DALPHA -o superopt-alpha $(SRCS)
124 | alpha.res: superopt-alpha
125 | 	./superopt-alpha -all $(EXTRA) -max $(MAXCOST) -as >alpha.tmp 2>&1
126 | 	mv alpha.tmp alpha.res
127 | 
128 | superopt-sh: $(SRCS) $(HDRS)
129 | 	$(CC) $(CFLAGS) -DSH -o superopt-sh $(SRCS)
130 | sh.res: superopt-sh
131 | 	./superopt-sh -all $(EXTRA) -max $(MAXCOST) -as >sh.tmp 2>&1
132 | 	mv sh.tmp sh.res
133 | 
134 | superopt-hppa: $(SRCS) $(HDRS)
135 | 	$(CC) $(CFLAGS) -DHPPA -o superopt-hppa $(SRCS)
136 | hppa.res: superopt-hppa
137 | 	./superopt-hppa -all $(EXTRA) -max $(MAXCOST) -as >hppa.tmp 2>&1
138 | 	mv hppa.tmp hppa.res
139 | 
140 | superopt-xcore: $(SRCS) $(HDRS)
141 | 	$(CC) $(CFLAGS) -DXCORE -o superopt-xcore $(SRCS)
142 | xcore.res: superopt-xcore
143 | 	./superopt-xcore -all $(EXTRA) -max $(MAXCOST) -as >xcore.tmp 2>&1
144 | 	mv xcore.tmp xcore.res
145 | 
146 | superopt-avr: $(SRCS) $(HDRS)
147 | 	$(CC) $(CFLAGS) -DAVR -o superopt-avr $(SRCS)
148 | avr.res: superopt-avr
149 | 	./superopt-avr -all $(EXTRA) -max $(MAXCOST) -as >avr.tmp 2>&1
150 | 	mv avr.tmp avr.res
151 | 
152 | dist:
153 | 	mkdir superopt-$(VERSION)
154 | 	ln $(FILES) superopt-$(VERSION)
155 | 	tar cf - superopt-$(VERSION) | gzip --best > superopt-$(VERSION).tar.gz
156 | 	rm -rf superopt-$(VERSION)
157 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
  1 | 
  2 | 			GNU SUPEROPTIMIZER
  3 | 
  4 | The superoptimizer is a function sequence generator that uses an exhaustive
  5 | generate-and-test approach to finding the shortest instruction sequence for
  6 | a given function.  You have to tell the superoptimizer which function and
  7 | which CPU you want to generate code for, and how many instructions you can
  8 | accept.
  9 | 
 10 | The superoptimizer can't generate very long sequences, unless you have a
 11 | very fast computer or very much spare time.  The time complexity of the used
 12 | algorithm is approximately
 13 | 
 14 | 	     2n
 15 | 	O(m n  )
 16 | 
 17 | where m is the number of available instructions on the architecture and n is
 18 | the shortest sequence for the goal function.  The practical sequence length
 19 | limit depends on the target architecture and goal function arity; In most
 20 | cases it is about 5, but for a rich instruction set as the HPPA it is just
 21 | 4.  The longest sequence ever generated was for the MC68020 and 7
 22 | instructions long.  It took several weeks to generate it...
 23 | 
 24 | The superoptimizer can't guarantee that it finds the best possible
 25 | instruction sequences for all possible goal functions.  For example, it
 26 | doesn't even try to include immediate constants (other that -1, 0, +1, and
 27 | the smallest negative and biggest positive numbers) in the sequences.
 28 | 
 29 | Other reasons why not optimal sequences might be found is that not all
 30 | instructions are included, not even in their register-only form.  Also, some
 31 | instructions included might not be correctly simulated.  If you encounter
 32 | any of these problems, please report them to the address below.
 33 | 
 34 | WARNING!  The generated sequences might be incorrect with a very small
 35 | probability.  Always make sure a sequence is correct before using it.  So
 36 | far, I have never encountered any incorrect sequences.  If you find one,
 37 | please let me know about it!
 38 | 
 39 | Having said this, note that the superoptimizer practically always finds
 40 | optimal and correct sequences for functions that depend on registers only.
 41 | 
 42 | 
 43 | 			USAGE INSTRUCTIONS
 44 | 
 45 | The superoptimizer supports these CPUs: SPARC v7, Motorola 68000, 68020, and
 46 | 88000, IBM POWER and PowerPC, AMD 29000, Intel x86 and 960 1.0 and 1.1,
 47 | Pyramid, DEC Alpha, HP PA-RISC, and Hitachi SH.  SGI Mips is not supported,
 48 | since it doesn't have instructions whose use in non-obvious.  Some new
 49 | instructions, like the Intel P6 and Sparc v9 conditional moves are not
 50 | supported.
 51 | 
 52 | You need an ANSI C compiler, for example GCC, to compile the superoptimizer.
 53 | Type
 54 | 
 55 | 	make CPU=-D<cpuname> superopt
 56 | 
 57 | where <cpuname> is one of SPARC, MC68000, MC68020, M88000, POWER, POWERPC,
 58 | AM29K, I386, I960 (for i960 1.0), I960B (for I960B 1.1), PYR, ALPHA, HPPA,
 59 | or SH.  The compilation might take a long time and use up a lot of memory,
 60 | especially for HPPA.
 61 | 
 62 | You can also build all superoptimizers by typing:
 63 | 
 64 | 	make all
 65 | 
 66 | This will create superopt-sparc, superopt-power, etc.
 67 | 
 68 | There are also install targets, use `make install' to install a single
 69 | superoptimizer and `make install-all' to install all of them.
 70 | 
 71 | To run the superoptimizer, type
 72 | 
 73 | 	superopt -f<goal-function> | -all  [-assembly] [-max-cost n]
 74 | 	   [-shifts] [-extracts] [-no-carry-insns] [-extra-cost n]
 75 | 
 76 | and wait until the found instructions sequences are printed.  For example,
 77 | 
 78 | 	superopt -flts0 -as
 79 | 
 80 | will print all sequences computing the statement
 81 | 
 82 | 	{ r = (signed_word) v0 < 0; }.
 83 | 
 84 | See below for some examples of possible goal functions.
 85 | 
 86 | By default, the superoptimizer doesn't try all immediate shift counts.  To
 87 | enable all shift counts, pass -shifts as a command line option.  To enable
 88 | all bit field extracts, use -extracts.
 89 | 
 90 | 			OPTIONS
 91 | 
 92 | The `-f' option has always to be defined to tell the superoptimizer for
 93 | which function it should try to to find an instruction sequence.  See below
 94 | for possible function names.
 95 | 
 96 | Option names may be abbreviated.
 97 | 
 98 | -assembly
 99 | 	Output assembly suitable to feed the assembler instead of pseudo-
100 | 	code suitable for humans.
101 | 
102 | -max-cost n
103 | 	Limit the `cost' of the instruction sequence to n.  May be used to
104 | 	stop the search if no instruction sequence of that length or
105 | 	shorter is found.  By default this is 4.
106 | 
107 | -extra-cost n
108 | 	Search for sequences n more expensive than the cheapest found
109 | 	sequence.  Default is 0 meaning that only the cheapest sequence(s)
110 | 	are printed.
111 | 
112 | -no-carry-insns
113 | 	Don't use instructions that use the carry flag.  This might be
114 | 	desirable on RISCs to simplify instruction scheduling.
115 | 
116 | -shifts
117 | 	Include all shift counts supported by the target architecture in
118 | 	the search.  This slows down the search considerably.
119 | 
120 | -extracts
121 | 	Include all bit field extracts supported by the target architecture
122 | 	in the search.  This slows down the search considerably.
123 | 
124 | -f<goal-function-name>
125 | 	
126 | 	where <goal-function-name> is one of eq, ne, les, ges, lts, gts,
127 | 	leu, geu, ltu, gtu, eq0, ne0, les0, ges0, lts0, gts0, neq, nne,
128 | 	nles, nges, nlts, ngts, nleu, ngeu, nltu, ngtu, neq0, nne0, nles0,
129 | 	nges0, nlts0, ngts0, maxs, mins, maxu, minu, sgn, abs, nabs, gray,
130 | 	or gray2, etc, etc.
131 | 
132 | 	eq, ne, les, etc, computes the C expression "a == b", "a != b", "a
133 | 	<= b", etc, where the operation codes ending in `s' indicates
134 | 	signed comparison; `u` indicates unsigned comparison.
135 | 
136 | 	eq0,... computes "a == 0", ...
137 | 
138 | 	The `n' before the names means that the corresponding function
139 | 	value is negated, e.g. nlt is the C expression "-(a < b)".
140 | 
141 | 	maxs, mins, maxu, minu are binary (i.e. two argument) signed
142 | 	respectively unsigned max and min.
143 | 
144 | 	sgn is the unary sign function; -1 for negative, 0 for zero, and +1
145 | 	for positive arguments.
146 | 
147 | 	abs and nabs are absolute value and negative absolute value,
148 | 	respectively.
149 | 
150 | 	For a complete list of goal function and their definitions, look in
151 | 	the file goal.def.  You can easily add your own goal functions to
152 | 	that file.  After having added a new function, you have to recompile
153 | 	the superoptimizer.
154 | 
155 | 
156 | 		READING SUPEROPTIMIZER OUTPUT
157 | 
158 | The superoptimizer by default outputs sequences in high-level language like
159 | syntax.  For example, this is the output for M88000/abs:
160 | 
161 | 1:	r1:=arith_shift_right(r0,0x1f)
162 | 	r2:=add_co(r1,r0)
163 | 	r3:=xor(r2,r1)
164 | 2:	r1:=arith_shift_right(r0,0x1f)
165 | 	r2:=add(r1,r0)
166 | 	r3:=xor(r2,r1)
167 | 3:	r1:=arith_shift_right(r0,0x1f)
168 | 	r2:=xor(r1,r0)
169 | 	r3:=adc_co(r2,r1)
170 | 
171 | r1:=arith_shift_right(r0,0x1f) means "shift r0 right 31 steps
172 | arithmetically and put the result in r1".  add_co is "add and set carry".
173 | adc_co is the subtraction instruction found on most RISCs, i.e. "add with
174 | complement and set carry".  This may seem dumb, but there is an important
175 | difference in the way carry is set after an addition-with-complement and a
176 | subtraction.  The suffixes "_ci" and "_cio" means respectively that carry
177 | is input but not affected, and that carry is both input and generated.
178 | 
179 | The interesting value is always the value computed by the last instruction.
180 | 
181 | 
182 | 		*********************************
183 | 
184 | Please send comments, improvements and new ports to tege@gnu.ai.mit.edu.
185 | 
186 | The GNU superoptimizer was written by Torbjorn Granlund (currently with
187 | Cygnus Support).  Tom Wood (at the time with Data General, now at Motorola)
188 | made major improvements, like the clean way to describe goal functions and
189 | internal instructions.  The original superoptimizer idea is due to Henry
190 | Massalin.
191 | 
192 | The GNU superoptimizer and it's application for tuning GCC are described in
193 | the proceedings of the ACM SIGPLAN conference on Programming Language
194 | Design an Implementation (PLDI), 1992.
195 | 


--------------------------------------------------------------------------------
/README.xcore:
--------------------------------------------------------------------------------
 1 | Build using:
 2 | 
 3 | make superopt-xcore
 4 | 
 5 | To start searching optimal sequence for a particular goal function (in this example
 6 | abs) use:
 7 | 
 8 | superopt-xcore -fabs -assembly -max-cost 3
 9 | 
10 | This should produce the following output:
11 | 
12 | Searching for { r = (signed_word) v0 < 0 ? -v0 : v0; }
13 | Superoptimizing at cost 1 2 3
14 | 1: ashr   r1,r0,32
15 |    add   r2,r1,r0
16 |    xor   r3,r2,r1
17 | 2: ashr   r1,r0,32
18 |    xor   r2,r1,r0
19 |    sub   r3,r2,r1
20 | [2 sequences found]
21 | 
22 | To add new goal functions you will need to edit goal.def and recompile, the syntax
23 | should be clear from the existing examples.
24 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
  1 | * For the 960, we often see sequences "chkbit;concmp" where the chkbit is
  2 |   just used to unconditionally clear bit 2 of Ac.cc.  We might want to prune
  3 |   the variants of chkbit to decrease the number of printed sequences.
  4 | 
  5 | * For the 960, we pass the prune hints based on bit 1 of the Ac.cc.  This
  6 |   might lead to undesirable pruning.
  7 | 
  8 | * For the 960 and Alpha, we let conditionally executed instructions like
  9 |   ADDO_cc_960, SUBO_cc_960, and CMOVEcc overwrite an existing register.
 10 |   This is OK, but we should also write to a new register, since several
 11 |   conditional adds and subtracts with different conditions and the same
 12 |   destination register might make the result well-defined anyway.
 13 | 
 14 | * When we have conditional execution (HPPA, i960, Alpha, Sparc9, Coldfire,
 15 |   etc) we have to make sure we prune instruction 3 carefully in this
 16 |   situation:
 17 | 
 18 | 	insn1
 19 | 	insn2 is conditionally executed
 20 | 	insn3
 21 | 
 22 |   Depending on the flag settings of insn1 and insn2, the correct set of
 23 |   insn3 to try is tricky.  Now, we might prune too much.
 24 | 
 25 | * Later 29k models have additional logical ops.  Add them!
 26 | 
 27 | * add_co(a,a) and shiftl_co(a,1) are identical.  Affects m68k, x86, pyr, etc.
 28 | 
 29 | * For goal functions with the same arity, the exact same computations are
 30 |   made in synth.  This suggests that we could search for many goal functions
 31 |   in parallel instead of serially.  We could maintain an array of goal
 32 |   values, one for each goal.  Instead of simply comparing the last generated
 33 |   value to goal_value, we would loop through a goal_values[] array, and call
 34 |   test_sequence for each goal that matches.  This would speed up searches by
 35 |   as much as a factor of 10.
 36 | 
 37 | * Adding the bsfl instruction revealed a deficiency: We can't deal with
 38 |   instructions that give an undefined result for some inputs.  This is so
 39 |   because the sequences might fail to work only when the undefined result
 40 |   happen to become a certain value.  To cope with this, we have to make
 41 |   test_sequence try lots of values, but it can only do that if it knows
 42 |   about these instructions.
 43 | 
 44 |   A cleaner way would be to add a valid bit to each computed value.
 45 | 
 46 | * Now we require equality between a computed goal value and a computed
 47 |   result.  Permit fuzzier function, like "something negative".  E.g., a
 48 |   fuzzy sgn function might be useful.
 49 | 
 50 | * Most importantly: Generalize the class of possible goal functions.  Allow
 51 |   them to be any mapping from a vector of words to another vector of words,
 52 |   each of arbitrary length.
 53 | 
 54 |   To make it fast, record after each instruction if it generates a value
 55 |   that is in (the vector) goal_value, and prune a sequence if it has not
 56 |   produced N-M requested values when M more instructions are allowed [N the
 57 |   number of words in goal_value].
 58 | 
 59 |   We should split `synth'.  The leaf search `synth' function could be
 60 |   written like currently, but with the leaf-test "if (allowed_cost > 0)"
 61 |   removed.  The non-leaf `synth' need to loop and look for the generated
 62 |   value in goal_value.  To avoid massive code replication, we have to put
 63 |   the synth function in a separate file, and play with cpp and #include.
 64 | 
 65 |   Make sure to handle the case were you find all values before the last
 66 |   instruction.  This might be non-trivial!  We know that we have to use the
 67 |   value from the ultimate instruction, otherwise we would have found this
 68 |   sequence before.  Problem is, we will either have to loop and look for
 69 |   the value in goal_value, or, probably much better, just accept the
 70 |   sequence.
 71 | 
 72 | * Add -test-on-cpu option triggering a mechanism for testing the generated
 73 |   sequences on the real hardware.  That would help debug the simulation
 74 |   code.
 75 | 
 76 | * I'd like to have a means to define that a goal function is not defined
 77 |   for all possible input values.  An extra parameter, ALLOWED_ARGUMENTS, to
 78 |   DEF_GOAL could take care of that.
 79 | 
 80 |   Also I'd like the user to have the possibility to add a list of immediate
 81 |   values to try for each goal function.  For example, 31 and 32 could be
 82 |   useful for ffs.
 83 | 
 84 | * Make it possible to handle more immediate values, for example by putting
 85 |   them in the immediate_val array.
 86 | 
 87 | * Interpret goal functions so the user doesn't need to recompile.
 88 |   Interpretation would make goal function evaluation slower than it is now,
 89 |   but goal function evaluation is not critical.
 90 | 
 91 | * Add code to algebraically prove that generated sequences are correct.
 92 | 
 93 | * Add bsrl/bsfl and bfffo to CISC synth.
 94 | 
 95 | * Check that PERFORM_CLZ works like RS/6000's cntlz and 29k's clz.  Is it
 96 |   ok for input == 0?
 97 | 
 98 | * A major speed improvement would be to make independent insn have a
 99 |   canonical order.  Consider `gts' on the SPARC.  This is probably not very
100 |   hard, if insns are enumerated in some clever way and loop variables are
101 |   passed down.  A very simple but potentially quite powerful mechanism: If
102 |   the putative instruction doesn't depend on the last instruction, compare
103 |   the putative instruction's opcode with the last instruction's opcode, and
104 |   proceed iff, say, the < relation holds.
105 | 
106 |   After an instruction that sets carry (and there is another instruction
107 |   with the same effect apart from that it doesn't affect carry), the
108 |   generated carry has to be used.  [Fix this with a reservation vector
109 |   --allow both making and deleting a reservation.  Make reservation when
110 |   carry is generated and delete it when it is used.]  The leaf instructions
111 |   have to input carry if an unused carry is pending.
112 | 
113 |   Make sure all computed values are used by subsequent instructions.  For
114 |   example, if we have just two more values to compute and three yet unused
115 |   values, the last two instructions have to restrict their input operands.
116 | 
117 | * Efficient pruning of sequences not using generated resources:
118 | 
119 |   Each generated instruction should record it's computed 'resources' in a
120 |   list of unused resources.  (A written register is such a resource, and the
121 |   carry flag is such a resource.)  When a resource is used by an
122 |   instruction, it's removed from the data base.
123 | 
124 |   At each recursion, we check that the unused resources can be consumed
125 |   with the allowed number of instructions.  If not, we back-track.
126 | 
127 |   Beware: A resource is not 'consumed' when it has been used.  I have seen
128 |   optimal sequences that uses a generated carry more than once.
129 | 
130 | * Shift 32 steps on 68k is well-defined.  LSHIFTR_CO can be used to zero a
131 |   word and simultaneously move the sign bit to the X flag, ASHIFTR_CO can
132 |   be used to propagate the sign bit to the whole word and to the X flag.
133 |   Useful?
134 | 
135 | * Model the exact timing, i.e., instruction overlap, superscalar issue,
136 |   etc.  Requires modelling the CPU internal function units.
137 | 
138 | * `386: bt, clc, cmc, cdq[0->1], lea, shld, shrd, stc.
139 | 
140 | * Make the instruction description cleaner.  Something of this kind would
141 |   be great:
142 | 
143 |   88k:
144 | 	{ADD,		"addu		%d{r},%1{r,0},%2{r,[0-FFFF]}"},
145 | 	{ADD_CI,	"addu.ci	%d{r},%1{r,0},%2{r,[0-FFFF]}"},
146 | 	...
147 | 
148 |   sparc:
149 | 	{ADD,		"add		%1{r,0},%2{r,[-1000,+FFF]},%d{r}"},
150 | 	{ADD_CI,	"addx		%1{r,0},%2{r,[-1000,+FFF]},%d{r}"},
151 | 	...
152 | 
153 |   We would need a tool to extract the information and generate a 'synth'
154 |   function.  (That instruction description format would be useful to
155 |   assemblers, disassemblers, and simulators too.)
156 | 
157 | * Include a 'synth' function for several targets in one gso binary.  Have a
158 |   command line option -t<target> select which one to use.
159 | 
160 | * The following XCore instructions are missing: crc, crc8, ladd, lmul, lsub,
161 |   maccs, maccu. In addition not all possible immediate values for the  add, eq,
162 |   ldaw, ldc, mkmsk, sext, sub and zext instructions are used.
163 | 


--------------------------------------------------------------------------------
/goal.def:
--------------------------------------------------------------------------------
  1 | /* This file contains the definitions and documentation for the goal functions
  2 |    used in the Superoptimizer.
  3 | 
  4 |    Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
  5 | 
  6 |    This program is free software; you can redistribute it and/or modify it
  7 |    under the terms of the GNU General Public License as published by the
  8 |    Free Software Foundation; either version 2, or (at your option) any
  9 |    later version.
 10 | 
 11 |    This program is distributed in the hope that it will be useful, but
 12 |    WITHOUT ANY WARRANTY; without even the implied warranty of
 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14 |    General Public License for more details.
 15 | 
 16 |    You should have received a copy of the GNU General Public License along
 17 |    with this program; see the file COPYING.  If not, write to the Free
 18 |    Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
 19 | 
 20 | /* The fields in the cpp macro call "DEF_GOAL()" are:
 21 | 
 22 |    1. The internal name of the goal function.
 23 | 
 24 |    2. The number of input operands.
 25 | 
 26 |    3. The printed name of the instruction.
 27 | 
 28 |    4. C code that implements the function.
 29 | 
 30 |    */
 31 | 
 32 | DEF_GOAL (P1,             1, "p1",          { r = (v0-1) & v0; })
 33 | DEF_GOAL (P2,             1, "p2",          { r = (v0+1) & v0; })
 34 | DEF_GOAL (P3,             1, "p3",          { r = (-v0)  & v0; })
 35 | DEF_GOAL (P4,             1, "p4",          { r = (v0-1) ^ v0; })
 36 | DEF_GOAL (P5,             1, "p5",          { r = (v0-1) | v0; })
 37 | DEF_GOAL (P6,             1, "p6",          { r = (v0+1) | v0; })
 38 | DEF_GOAL (P7,             1, "p7",          { r = (v0+1) & ~v0; })
 39 | DEF_GOAL (P8,             1, "p8",          { r = (v0-1) & ~v0; })
 40 | DEF_GOAL (P9,             1, "p9",          { r = ((signed_word) v0 < 0)?-v0:v0; })
 41 | DEF_GOAL (P10,            2, "p10",         { r =  (v0^v1) <= (v0&v1); })
 42 | DEF_GOAL (P11,            2, "p11",         { r =  (v0&~v1) > v1 ; })
 43 | DEF_GOAL (P12,            2, "p12",         { r =  (v0&~v1) <= v1 ; })
 44 | DEF_GOAL (P13,            1, "p13",         { r = (signed_word) v0 > 0 ? 1 : ((signed_word) v0 < 0 ? -1 : 0); })
 45 | DEF_GOAL (P14,            2, "p14",         { r =  (v0&v1) + ((v0^v1)>>1) ; })
 46 | DEF_GOAL (P15,            2, "p15",         { r =  (v0|v1) - ((v0^v1)>>1) ; })
 47 | DEF_GOAL (P16,            2, "p16",         { r =  (v0 > v1)?v0:v1 ; })
 48 | DEF_GOAL (P17,            1, "p17",         { r = (((v0-1) | v0) + 1) & v0; })
 49 | DEF_GOAL (P18,            1, "p18",         { r = !((v0-1) & v0) && v0; })
 50 | DEF_GOAL (P19,            3, "p19",         { unsigned_word r__o3 = ((v0 >> v2) ^ v0) & v1; r = ((r__o3 << 1) ^ r__o3) ^ v0; })
 51 | DEF_GOAL (P20,            3, "p20",         { unsigned_word r__o2 = v0 & -v0; \
 52 |                                               unsigned_word r__o3 = r__o2 + v0; \
 53 |                                               unsigned_word r__o3a = ((v0 ^ r__o2) >> 2); \
 54 |                                                             r = (r__o2?r__o3a / r__o2:0) | r__o3; })
 55 | DEF_GOAL (P21,            4, "p21",         { r = (v0 == v1)? v2 : ((v0 == v2)?v3:v1); })
 56 | DEF_GOAL (P22,            1, "p22",         { r =  __builtin_popcountl(v0)&1 ; })
 57 | DEF_GOAL (P23,            1, "p23",         { r =  __builtin_popcountl(v0) ; })
 58 | DEF_GOAL (P24,            1, "p24",         { r =  (v0 <= 2)? v0 : (1 << (sizeof(v0)*8 - __builtin_clz(v0 - 1))) ; })
 59 | DEF_GOAL (P25,            2, "p25",         ({ word t1; word t2; umul_ppmm (t1, t2, v0, v1); r = t2; }))
 60 | 
 61 | DEF_GOAL (EQ,             2, "eq",          { r = v0 == v1; })
 62 | DEF_GOAL (NE,             2, "ne",          { r = v0 != v1; })
 63 | DEF_GOAL (LES,            2, "les",         { r = (signed_word) v0 <= (signed_word) v1; })
 64 | DEF_GOAL (GES,            2, "ges",         { r = (signed_word) v0 >= (signed_word) v1; })
 65 | DEF_GOAL (LTS,            2, "lts",         { r = (signed_word) v0 < (signed_word) v1; })
 66 | DEF_GOAL (GTS,            2, "gts",         { r = (signed_word) v0 > (signed_word) v1; })
 67 | DEF_GOAL (LEU,            2, "leu",         { r = (unsigned_word) v0 <= (unsigned_word) v1; })
 68 | DEF_GOAL (GEU,            2, "geu",         { r = (unsigned_word) v0 >= (unsigned_word) v1; })
 69 | DEF_GOAL (LTU,            2, "ltu",         { r = (unsigned_word) v0 < (unsigned_word) v1; })
 70 | DEF_GOAL (GTU,            2, "gtu",         { r = (unsigned_word) v0 > (unsigned_word) v1; })
 71 | DEF_GOAL (LESU,           2, "lesu",        { r = (signed_word) v0 < 0 || (unsigned_word) v0 <= (unsigned_word) v1; })
 72 | DEF_GOAL (GESU,           2, "gesu",        { r = (signed_word) v0 >= 0 && (unsigned_word) v0 >= (unsigned_word) v1; })
 73 | DEF_GOAL (LTSU,           2, "ltsu",        { r = (signed_word) v0 < 0 || (unsigned_word) v0 < (unsigned_word) v1; })
 74 | DEF_GOAL (GTSU,           2, "gtsu",        { r = (signed_word) v0 >= 0 && (unsigned_word) v0 > (unsigned_word) v1; })
 75 | 
 76 | DEF_GOAL (EQ0,            1, "eq0",         { r = v0 == 0; })
 77 | DEF_GOAL (NE0,            1, "ne0",         { r = v0 != 0; })
 78 | DEF_GOAL (LES0,           1, "les0",        { r = (signed_word) v0 <= 0; })
 79 | DEF_GOAL (GES0,           1, "ges0",        { r = (signed_word) v0 >= 0; })
 80 | DEF_GOAL (LTS0,           1, "lts0",        { r = (signed_word) v0 < 0; })
 81 | DEF_GOAL (GTS0,           1, "gts0",        { r = (signed_word) v0 > 0; })
 82 | 
 83 | DEF_GOAL (NEQ,            2, "neq",         { r = -(v0 == v1); })
 84 | DEF_GOAL (NNE,            2, "nne",         { r = -(v0 != v1); })
 85 | DEF_GOAL (NLES,           2, "nles",        { r = -((signed_word) v0 <= (signed_word) v1); })
 86 | DEF_GOAL (NGES,           2, "nges",        { r = -((signed_word) v0 >= (signed_word) v1); })
 87 | DEF_GOAL (NLTS,           2, "nlts",        { r = -((signed_word) v0 < (signed_word) v1); })
 88 | DEF_GOAL (NGTS,           2, "ngts",        { r = -((signed_word) v0 > (signed_word) v1); })
 89 | DEF_GOAL (NLEU,           2, "nleu",        { r = -((unsigned_word) v0 <= (unsigned_word) v1); })
 90 | DEF_GOAL (NGEU,           2, "ngeu",        { r = -((unsigned_word) v0 >= (unsigned_word) v1); })
 91 | DEF_GOAL (NLTU,           2, "nltu",        { r = -((unsigned_word) v0 < (unsigned_word) v1); })
 92 | DEF_GOAL (NGTU,           2, "ngtu",        { r = -((unsigned_word) v0 > (unsigned_word) v1); })
 93 | 
 94 | DEF_GOAL (NEQ0,           1, "neq0",        { r = -(v0 == 0); })
 95 | DEF_GOAL (NNE0,           1, "nne0",        { r = -(v0 != 0); })
 96 | DEF_GOAL (NLES0,          1, "nles0",       { r = -((signed_word) v0 <= 0); })
 97 | DEF_GOAL (NGES0,          1, "nges0",       { r = -((signed_word) v0 >= 0); })
 98 | DEF_GOAL (NLTS0,          1, "nlts0",       { r = -((signed_word) v0 < 0); })
 99 | DEF_GOAL (NGTS0,          1, "ngts0",       { r = -((signed_word) v0 > 0); })
100 | 
101 | /* Maximum of 32 for compatability */
102 | #define N_BITS             ((sizeof(unsigned_word)*8) > 32 ? 32 : (sizeof(unsigned_word)*8))
103 | /* Negative high one */
104 | #define HIGHONE_N         ((unsigned_word)1<<(N_BITS-1))
105 | /* Positive high one */
106 | #define HIGHONE_P         ((signed_word)1<<(N_BITS-2))
107 | DEF_GOAL (EQ80000000,            2, "eq80000000",         { r = HIGHONE_N & -(v0 == v1); })
108 | DEF_GOAL (NE80000000,            2, "ne80000000",         { r = HIGHONE_N & -(v0 != v1); })
109 | DEF_GOAL (LES80000000,           2, "les80000000",        { r = HIGHONE_N & -((signed_word) v0 <= (signed_word) v1); })
110 | DEF_GOAL (GES80000000,    2, "ges80000000",   { r = HIGHONE_N & -((signed_word) v0 >= (signed_word) v1); })
111 | DEF_GOAL (LTS80000000,    2, "lts80000000",   { r = HIGHONE_N & -((signed_word) v0 < (signed_word) v1); })
112 | DEF_GOAL (GTS80000000,    2, "gts80000000",   { r = HIGHONE_N & -((signed_word) v0 > (signed_word) v1); })
113 | DEF_GOAL (LEU80000000,    2, "leu80000000",   { r = HIGHONE_N & -((unsigned_word) v0 <= (unsigned_word) v1); })
114 | DEF_GOAL (GEU80000000,    2, "geu80000000",   { r = HIGHONE_N & -((unsigned_word) v0 >= (unsigned_word) v1); })
115 | DEF_GOAL (LTU80000000,    2, "ltu80000000",   { r = HIGHONE_N & -((unsigned_word) v0 < (unsigned_word) v1); })
116 | DEF_GOAL (GTU80000000,    2, "gtu80000000",   { r = HIGHONE_N & -((unsigned_word) v0 > (unsigned_word) v1); })
117 | 
118 | DEF_GOAL (EQ080000000,    1, "eq080000000",   { r = HIGHONE_N & -(v0 == 0); })
119 | DEF_GOAL (NE080000000,    1, "ne080000000",   { r = HIGHONE_N & -(v0 != 0); })
120 | DEF_GOAL (LES080000000,   1, "les080000000",  { r = HIGHONE_N & -((signed_word) v0 <= 0); })
121 | DEF_GOAL (GES080000000,   1, "ges080000000",  { r = HIGHONE_N & -((signed_word) v0 >= 0); })
122 | DEF_GOAL (LTS080000000,   1, "lts080000000",  { r = HIGHONE_N & -((signed_word) v0 < 0); })
123 | DEF_GOAL (GTS080000000,   1, "gts080000000",  { r = HIGHONE_N & -((signed_word) v0 > 0); })
124 | 
125 | DEF_GOAL (MAXS,           2, "maxs",        { r = (signed_word) v0 > (signed_word) v1 ? v0 : v1; })
126 | DEF_GOAL (MINS,           2, "mins",        { r = (signed_word) v0 < (signed_word) v1 ? v0 : v1; })
127 | DEF_GOAL (MAXU,           2, "maxu",        { r = (unsigned_word) v0 > (unsigned_word) v1 ? v0 : v1; })
128 | DEF_GOAL (MINU,           2, "minu",        { r = (unsigned_word) v0 < (unsigned_word) v1 ? v0 : v1; })
129 | DEF_GOAL (CMPS,           2, "cmps",        { r = (signed_word) v0 > (signed_word) v1? 1 : ((signed_word) v0 < (signed_word) v1 ? -1 : 0); })
130 | DEF_GOAL (CMPU,           2, "cmpu",        { r = (unsigned_word) v0 > (unsigned_word) v1? 1 : ((unsigned_word) v0 < (unsigned_word) v1 ? -1 : 0); })
131 | DEF_GOAL (SGN,            1, "sgn",         { r = (signed_word) v0 > 0 ? 1 : ((signed_word) v0 < 0 ? -1 : 0); })
132 | DEF_GOAL (ABS,            1, "abs",         { r = (signed_word) v0 < 0 ? -v0 : v0; })
133 | DEF_GOAL (NABS,           1, "nabs",        { r = (signed_word) v0 > 0 ? -v0 : v0; })
134 | DEF_GOAL (GRAY,           1, "gray",        { r = ((~(v0 ^ (v0 << 1)) & (-1 << (N_BITS-1))) | ((v0 << 1) & ~(-1 << (N_BITS-1))) | (v0 >> (N_BITS-1))); })
135 | DEF_GOAL (GRAY2,          1, "gray2",       { v0 = ((~(v0 ^ (v0 << 1)) & (-1 << (N_BITS-1))) | ((v0 << 1) & ~(-1 << (N_BITS-1))) | (v0 >> (N_BITS-1))); r = ((~(v0 ^ (v0 << 1)) & (-1 << (N_BITS-1))) | ((v0 << 1) & ~(-1 << (N_BITS-1))) | (v0 >> 31)); })
136 | DEF_GOAL (DIVIDE_BY_2,    1, "divide_by_2", { r = (signed_word) v0 / 2; })
137 | DEF_GOAL (DIVIDE_BY_4,    1, "divide_by_4", { r = (signed_word) v0 / 4; })
138 | DEF_GOAL (DIVIDE_BY_2e30,    1, "divide_by_2e30", { r = (signed_word) v0 / HIGHONE_P; })
139 | DEF_GOAL (MOD_BY_2,    1, "mod_by_2", { r = (signed_word) v0 % 2; })
140 | DEF_GOAL (MOD_BY_4,    1, "mod_by_4", { r = (signed_word) v0 % 4; })
141 | DEF_GOAL (MOD_BY_2e30,    1, "mod_by_2e30", { r = (signed_word) v0 % HIGHONE_P; })
142 | 
143 | DEF_GOAL (EQ_PLUS,        3, "eq+",         { r = (v0 == v1) + v2; })
144 | DEF_GOAL (NE_PLUS,        3, "ne+",         { r = (v0 != v1) + v2; })
145 | DEF_GOAL (LES_PLUS,       3, "les+",        { r = ((signed_word) v0 <= (signed_word) v1) + v2; })
146 | DEF_GOAL (GES_PLUS,       3, "ges+",        { r = ((signed_word) v0 >= (signed_word) v1) + v2; })
147 | DEF_GOAL (LTS_PLUS,       3, "lts+",        { r = ((signed_word) v0 < (signed_word) v1) + v2; })
148 | DEF_GOAL (GTS_PLUS,       3, "gts+",        { r = ((signed_word) v0 > (signed_word) v1) + v2; })
149 | DEF_GOAL (LEU_PLUS,       3, "leu+",        { r = ((unsigned_word) v0 <= (unsigned_word) v1) + v2; })
150 | DEF_GOAL (GEU_PLUS,       3, "geu+",        { r = ((unsigned_word) v0 >= (unsigned_word) v1) + v2; })
151 | DEF_GOAL (LTU_PLUS,       3, "ltu+",        { r = ((unsigned_word) v0 < (unsigned_word) v1) + v2; })
152 | DEF_GOAL (GTU_PLUS,       3, "gtu+",        { r = ((unsigned_word) v0 > (unsigned_word) v1) + v2; })
153 | DEF_GOAL (LESU_PLUS,      3, "lesu+",       { r = ((signed_word) v0 < 0 || (unsigned_word) v0 <= (unsigned_word) v1) + v2; })
154 | DEF_GOAL (GESU_PLUS,      3, "gesu+",       { r = ((signed_word) v0 >= 0 && (unsigned_word) v0 >= (unsigned_word) v1) + v2; })
155 | DEF_GOAL (LTSU_PLUS,      3, "ltsu+",       { r = ((signed_word) v0 < 0 || (unsigned_word) v0 < (unsigned_word) v1) + v2; })
156 | DEF_GOAL (GTSU_PLUS,      3, "gtsu+",       { r = ((signed_word) v0 >= 0 && (unsigned_word) v0 > (unsigned_word) v1) + v2; })
157 | DEF_GOAL (EQ0_PLUS,       2, "eq0+",        { r = (v0 == 0) + v1; })
158 | DEF_GOAL (NE0_PLUS,       2, "ne0+",        { r = (v0 != 0) + v1; })
159 | DEF_GOAL (LES0_PLUS,      2, "les0+",       { r = ((signed_word) v0 <= 0) + v1; })
160 | DEF_GOAL (GES0_PLUS,      2, "ges0+",       { r = ((signed_word) v0 >= 0) + v1; })
161 | DEF_GOAL (LTS0_PLUS,      2, "lts0+",       { r = ((signed_word) v0 < 0) + v1; })
162 | DEF_GOAL (GTS0_PLUS,      2, "gts0+",       { r = ((signed_word) v0 > 0) + v1; })
163 | 
164 | DEF_SYNONYM (EQ_PLUS,        "peq")
165 | DEF_SYNONYM (NE_PLUS,        "pne")
166 | DEF_SYNONYM (LES_PLUS,       "ples")
167 | DEF_SYNONYM (GES_PLUS,       "pges")
168 | DEF_SYNONYM (LTS_PLUS,       "plts")
169 | DEF_SYNONYM (GTS_PLUS,       "pgts")
170 | DEF_SYNONYM (LEU_PLUS,       "pleu")
171 | DEF_SYNONYM (GEU_PLUS,       "pgeu")
172 | DEF_SYNONYM (LTU_PLUS,       "pltu")
173 | DEF_SYNONYM (GTU_PLUS,       "pgtu")
174 | DEF_SYNONYM (LESU_PLUS,      "plesu")
175 | DEF_SYNONYM (GESU_PLUS,      "pgesu")
176 | DEF_SYNONYM (LTSU_PLUS,      "pltsu")
177 | DEF_SYNONYM (GTSU_PLUS,      "pgtsu")
178 | DEF_SYNONYM (EQ0_PLUS,       "peq0")
179 | DEF_SYNONYM (NE0_PLUS,       "pne0")
180 | DEF_SYNONYM (LES0_PLUS,      "ples0")
181 | DEF_SYNONYM (GES0_PLUS,      "pges0")
182 | DEF_SYNONYM (LTS0_PLUS,      "plts0")
183 | DEF_SYNONYM (GTS0_PLUS,      "pgts0")
184 | 
185 | DEF_GOAL (EQ_MINUS,       3, "eq-",         { r = v2 - (v0 == v1); })
186 | DEF_GOAL (NE_MINUS,       3, "ne-",         { r = v2 - (v0 != v1); })
187 | DEF_GOAL (LES_MINUS,      3, "les-",        { r = v2 - ((signed_word) v0 <= (signed_word) v1); })
188 | DEF_GOAL (GES_MINUS,      3, "ges-",        { r = v2 - ((signed_word) v0 >= (signed_word) v1); })
189 | DEF_GOAL (LTS_MINUS,      3, "lts-",        { r = v2 - ((signed_word) v0 < (signed_word) v1); })
190 | DEF_GOAL (GTS_MINUS,      3, "gts-",        { r = v2 - ((signed_word) v0 > (signed_word) v1); })
191 | DEF_GOAL (LEU_MINUS,      3, "leu-",        { r = v2 - ((unsigned_word) v0 <= (unsigned_word) v1); })
192 | DEF_GOAL (GEU_MINUS,      3, "geu-",        { r = v2 - ((unsigned_word) v0 >= (unsigned_word) v1); })
193 | DEF_GOAL (LTU_MINUS,      3, "ltu-",        { r = v2 - ((unsigned_word) v0 < (unsigned_word) v1); })
194 | DEF_GOAL (GTU_MINUS,      3, "gtu-",        { r = v2 - ((unsigned_word) v0 > (unsigned_word) v1); })
195 | DEF_GOAL (LESU_MINUS,     3, "lesu-",       { r = v2 - ((signed_word) v0 < 0 || (unsigned_word) v0 <= (unsigned_word) v1); })
196 | DEF_GOAL (GESU_MINUS,     3, "gesu-",       { r = v2 - ((signed_word) v0 >= 0 && (unsigned_word) v0 >= (unsigned_word) v1); })
197 | DEF_GOAL (LTSU_MINUS,     3, "ltsu-",       { r = v2 - ((signed_word) v0 < 0 || (unsigned_word) v0 < (unsigned_word) v1); })
198 | DEF_GOAL (GTSU_MINUS,     3, "gtsu-",       { r = v2 - ((signed_word) v0 >= 0 && (unsigned_word) v0 > (unsigned_word) v1); })
199 | 
200 | DEF_GOAL (EQ0_MINUS,      2, "eq0-",        { r = v1 - (v0 == 0); })
201 | DEF_GOAL (NE0_MINUS,      2, "ne0-",        { r = v1 - (v0 != 0); })
202 | DEF_GOAL (LES0_MINUS,     2, "les0-",       { r = v1 - ((signed_word) v0 <= 0); })
203 | DEF_GOAL (GES0_MINUS,     2, "ges0-",       { r = v1 - ((signed_word) v0 >= 0); })
204 | DEF_GOAL (LTS0_MINUS,     2, "lts0-",       { r = v1 - ((signed_word) v0 < 0); })
205 | DEF_GOAL (GTS0_MINUS,     2, "gts0-",       { r = v1 - ((signed_word) v0 > 0); })
206 | 
207 | DEF_GOAL (NEQ_AND,        3, "naeq",        { r = -(v0 == v1) & v2; })
208 | DEF_GOAL (NNE_AND,        3, "nane",        { r = -(v0 != v1) & v2; })
209 | DEF_GOAL (NLES_AND,       3, "nales",       { r = -((signed_word) v0 <= (signed_word) v1) & v2; })
210 | DEF_GOAL (NGES_AND,       3, "nages",       { r = -((signed_word) v0 >= (signed_word) v1) & v2; })
211 | DEF_GOAL (NLTS_AND,       3, "nalts",       { r = -((signed_word) v0 < (signed_word) v1) & v2; })
212 | DEF_GOAL (NGTS_AND,       3, "nagts",       { r = -((signed_word) v0 > (signed_word) v1) & v2; })
213 | DEF_GOAL (NLEU_AND,       3, "naleu",       { r = -((unsigned_word) v0 <= (unsigned_word) v1) & v2; })
214 | DEF_GOAL (NGEU_AND,       3, "nageu",       { r = -((unsigned_word) v0 >= (unsigned_word) v1) & v2; })
215 | DEF_GOAL (NLTU_AND,       3, "naltu",       { r = -((unsigned_word) v0 < (unsigned_word) v1) & v2; })
216 | DEF_GOAL (NGTU_AND,       3, "nagtu",       { r = -((unsigned_word) v0 > (unsigned_word) v1) & v2; })
217 | 
218 | DEF_GOAL (NEQ0_AND,       2, "naeq0",       { r = -(v0 == 0) & v1; })
219 | DEF_GOAL (NNE0_AND,       2, "nane0",       { r = -(v0 != 0) & v1; })
220 | DEF_GOAL (NLES0_AND,      2, "nales0",      { r = -((signed_word) v0 <= 0) & v1; })
221 | DEF_GOAL (NGES0_AND,      2, "nages0",      { r = -((signed_word) v0 >= 0) & v1; })
222 | DEF_GOAL (NLTS0_AND,      2, "nalts0",      { r = -((signed_word) v0 < 0) & v1; })
223 | DEF_GOAL (NGTS0_AND,      2, "nagts0",      { r = -((signed_word) v0 > 0) & v1; })
224 | 
225 | /* Don't do all variants here, since it is trivial to deduce the rest.  */
226 | DEF_GOAL (EQ_SEL,       4, "eq-sel",         { r = (v0 == v1) ? v2 : v3; })
227 | DEF_GOAL (LES_SEL,      4, "les-sel",        { r = ((signed_word) v0 <= (signed_word) v1) ? v2 : v3; })
228 | DEF_GOAL (LTS_SEL,      4, "lts-sel",        { r = ((signed_word) v0 < (signed_word) v1) ? v2 : v3; })
229 | DEF_GOAL (LEU_SEL,      4, "leu-sel",        { r = ((unsigned_word) v0 <= (unsigned_word) v1) ? v2 : v3; })
230 | DEF_GOAL (LTU_SEL,      4, "ltu-sel",        { r = ((unsigned_word) v0 < (unsigned_word) v1) ? v2 : v3; })
231 | 
232 | DEF_GOAL (EQ0_SEL,      3, "eq0-sel",        { r = (v0 == 0) ? v1 : v2; })
233 | DEF_GOAL (LES0_SEL,     3, "les0-sel",       { r = ((signed_word) v0 <= 0) ? v1 : v2; })
234 | DEF_GOAL (LTS0_SEL,     3, "lts0-sel",       { r = ((signed_word) v0 < 0) ? v1 : v2; })
235 | 
236 | DEF_GOAL (FFS,            1, "ffs",         { r = ffs_internal (v0); })
237 | DEF_GOAL (CLOG2,          1, "ceil_log2",   { r = ceil_log2 (v0); })
238 | DEF_GOAL (FLOG2,          1, "floor_log2",   { r = floor_log2 (v0); })
239 | DEF_GOAL (MULTADJ,        3, "multadj",     { r = v1 + ((signed_word) v0 < 0 ? v2 : 0); })
240 | 
241 | DEF_GOAL (SHIFTL_1, 1, "sll1", { r = v0 << 1; })
242 | DEF_GOAL (SHIFTL_2, 1, "sll2", { r = v0 << 2; })
243 | DEF_GOAL (SHIFTL_3, 1, "sll3", { r = v0 << 3; })
244 | DEF_GOAL (SHIFTL_4, 1, "sll4", { r = v0 << 4; })
245 | DEF_GOAL (SHIFTL_5, 1, "sll5", { r = v0 << 5; })
246 | DEF_GOAL (SHIFTL_6, 1, "sll6", { r = v0 << 6; })
247 | DEF_GOAL (SHIFTL_7, 1, "sll7", { r = v0 << 7; })
248 | DEF_GOAL (SHIFTL_8, 1, "sll8", { r = v0 << 8; })
249 | DEF_GOAL (SHIFTL_9, 1, "sll9", { r = v0 << 9; })
250 | DEF_GOAL (SHIFTL_10, 1, "sll10", { r = v0 << 10; })
251 | DEF_GOAL (SHIFTL_11, 1, "sll11", { r = v0 << 11; })
252 | DEF_GOAL (SHIFTL_12, 1, "sll12", { r = v0 << 12; })
253 | DEF_GOAL (SHIFTL_13, 1, "sll13", { r = v0 << 13; })
254 | DEF_GOAL (SHIFTL_14, 1, "sll14", { r = v0 << 14; })
255 | DEF_GOAL (SHIFTL_15, 1, "sll15", { r = v0 << 15; })
256 | DEF_GOAL (SHIFTL_16, 1, "sll16", { r = v0 << 16; })
257 | DEF_GOAL (SHIFTL_17, 1, "sll17", { r = v0 << 17; })
258 | DEF_GOAL (SHIFTL_18, 1, "sll18", { r = v0 << 18; })
259 | DEF_GOAL (SHIFTL_19, 1, "sll19", { r = v0 << 19; })
260 | DEF_GOAL (SHIFTL_20, 1, "sll20", { r = v0 << 20; })
261 | DEF_GOAL (SHIFTL_21, 1, "sll21", { r = v0 << 21; })
262 | DEF_GOAL (SHIFTL_22, 1, "sll22", { r = v0 << 22; })
263 | DEF_GOAL (SHIFTL_23, 1, "sll23", { r = v0 << 23; })
264 | DEF_GOAL (SHIFTL_24, 1, "sll24", { r = v0 << 24; })
265 | DEF_GOAL (SHIFTL_25, 1, "sll25", { r = v0 << 25; })
266 | DEF_GOAL (SHIFTL_26, 1, "sll26", { r = v0 << 26; })
267 | DEF_GOAL (SHIFTL_27, 1, "sll27", { r = v0 << 27; })
268 | DEF_GOAL (SHIFTL_28, 1, "sll28", { r = v0 << 28; })
269 | DEF_GOAL (SHIFTL_29, 1, "sll29", { r = v0 << 29; })
270 | DEF_GOAL (SHIFTL_30, 1, "sll30", { r = v0 << 30; })
271 | DEF_GOAL (SHIFTL_31, 1, "sll31", { r = v0 << 31; })
272 | 
273 | DEF_GOAL (LSHIFTR_1, 1, "srl1", { r = v0 >> 1; })
274 | DEF_GOAL (LSHIFTR_2, 1, "srl2", { r = v0 >> 2; })
275 | DEF_GOAL (LSHIFTR_3, 1, "srl3", { r = v0 >> 3; })
276 | DEF_GOAL (LSHIFTR_4, 1, "srl4", { r = v0 >> 4; })
277 | DEF_GOAL (LSHIFTR_5, 1, "srl5", { r = v0 >> 5; })
278 | DEF_GOAL (LSHIFTR_6, 1, "srl6", { r = v0 >> 6; })
279 | DEF_GOAL (LSHIFTR_7, 1, "srl7", { r = v0 >> 7; })
280 | DEF_GOAL (LSHIFTR_8, 1, "srl8", { r = v0 >> 8; })
281 | DEF_GOAL (LSHIFTR_9, 1, "srl9", { r = v0 >> 9; })
282 | DEF_GOAL (LSHIFTR_10, 1, "srl10", { r = v0 >> 10; })
283 | DEF_GOAL (LSHIFTR_11, 1, "srl11", { r = v0 >> 11; })
284 | DEF_GOAL (LSHIFTR_12, 1, "srl12", { r = v0 >> 12; })
285 | DEF_GOAL (LSHIFTR_13, 1, "srl13", { r = v0 >> 13; })
286 | DEF_GOAL (LSHIFTR_14, 1, "srl14", { r = v0 >> 14; })
287 | DEF_GOAL (LSHIFTR_15, 1, "srl15", { r = v0 >> 15; })
288 | DEF_GOAL (LSHIFTR_16, 1, "srl16", { r = v0 >> 16; })
289 | DEF_GOAL (LSHIFTR_17, 1, "srl17", { r = v0 >> 17; })
290 | DEF_GOAL (LSHIFTR_18, 1, "srl18", { r = v0 >> 18; })
291 | DEF_GOAL (LSHIFTR_19, 1, "srl19", { r = v0 >> 19; })
292 | DEF_GOAL (LSHIFTR_20, 1, "srl20", { r = v0 >> 20; })
293 | DEF_GOAL (LSHIFTR_21, 1, "srl21", { r = v0 >> 21; })
294 | DEF_GOAL (LSHIFTR_22, 1, "srl22", { r = v0 >> 22; })
295 | DEF_GOAL (LSHIFTR_23, 1, "srl23", { r = v0 >> 23; })
296 | DEF_GOAL (LSHIFTR_24, 1, "srl24", { r = v0 >> 24; })
297 | DEF_GOAL (LSHIFTR_25, 1, "srl25", { r = v0 >> 25; })
298 | DEF_GOAL (LSHIFTR_26, 1, "srl26", { r = v0 >> 26; })
299 | DEF_GOAL (LSHIFTR_27, 1, "srl27", { r = v0 >> 27; })
300 | DEF_GOAL (LSHIFTR_28, 1, "srl28", { r = v0 >> 28; })
301 | DEF_GOAL (LSHIFTR_29, 1, "srl29", { r = v0 >> 29; })
302 | DEF_GOAL (LSHIFTR_30, 1, "srl30", { r = v0 >> 30; })
303 | DEF_GOAL (LSHIFTR_31, 1, "srl31", { r = v0 >> 31; })
304 | 
305 | DEF_GOAL (ASHIFTR_1, 1, "sra1", { r = (signed_word) v0 >> 1; })
306 | DEF_GOAL (ASHIFTR_2, 1, "sra2", { r = (signed_word) v0 >> 2; })
307 | DEF_GOAL (ASHIFTR_3, 1, "sra3", { r = (signed_word) v0 >> 3; })
308 | DEF_GOAL (ASHIFTR_4, 1, "sra4", { r = (signed_word) v0 >> 4; })
309 | DEF_GOAL (ASHIFTR_5, 1, "sra5", { r = (signed_word) v0 >> 5; })
310 | DEF_GOAL (ASHIFTR_6, 1, "sra6", { r = (signed_word) v0 >> 6; })
311 | DEF_GOAL (ASHIFTR_7, 1, "sra7", { r = (signed_word) v0 >> 7; })
312 | DEF_GOAL (ASHIFTR_8, 1, "sra8", { r = (signed_word) v0 >> 8; })
313 | DEF_GOAL (ASHIFTR_9, 1, "sra9", { r = (signed_word) v0 >> 9; })
314 | DEF_GOAL (ASHIFTR_10, 1, "sra10", { r = (signed_word) v0 >> 10; })
315 | DEF_GOAL (ASHIFTR_11, 1, "sra11", { r = (signed_word) v0 >> 11; })
316 | DEF_GOAL (ASHIFTR_12, 1, "sra12", { r = (signed_word) v0 >> 12; })
317 | DEF_GOAL (ASHIFTR_13, 1, "sra13", { r = (signed_word) v0 >> 13; })
318 | DEF_GOAL (ASHIFTR_14, 1, "sra14", { r = (signed_word) v0 >> 14; })
319 | DEF_GOAL (ASHIFTR_15, 1, "sra15", { r = (signed_word) v0 >> 15; })
320 | DEF_GOAL (ASHIFTR_16, 1, "sra16", { r = (signed_word) v0 >> 16; })
321 | DEF_GOAL (ASHIFTR_17, 1, "sra17", { r = (signed_word) v0 >> 17; })
322 | DEF_GOAL (ASHIFTR_18, 1, "sra18", { r = (signed_word) v0 >> 18; })
323 | DEF_GOAL (ASHIFTR_19, 1, "sra19", { r = (signed_word) v0 >> 19; })
324 | DEF_GOAL (ASHIFTR_20, 1, "sra20", { r = (signed_word) v0 >> 20; })
325 | DEF_GOAL (ASHIFTR_21, 1, "sra21", { r = (signed_word) v0 >> 21; })
326 | DEF_GOAL (ASHIFTR_22, 1, "sra22", { r = (signed_word) v0 >> 22; })
327 | DEF_GOAL (ASHIFTR_23, 1, "sra23", { r = (signed_word) v0 >> 23; })
328 | DEF_GOAL (ASHIFTR_24, 1, "sra24", { r = (signed_word) v0 >> 24; })
329 | DEF_GOAL (ASHIFTR_25, 1, "sra25", { r = (signed_word) v0 >> 25; })
330 | DEF_GOAL (ASHIFTR_26, 1, "sra26", { r = (signed_word) v0 >> 26; })
331 | DEF_GOAL (ASHIFTR_27, 1, "sra27", { r = (signed_word) v0 >> 27; })
332 | DEF_GOAL (ASHIFTR_28, 1, "sra28", { r = (signed_word) v0 >> 28; })
333 | DEF_GOAL (ASHIFTR_29, 1, "sra29", { r = (signed_word) v0 >> 29; })
334 | DEF_GOAL (ASHIFTR_30, 1, "sra30", { r = (signed_word) v0 >> 30; })
335 | DEF_GOAL (ASHIFTR_31, 1, "sra31", { r = (signed_word) v0 >> 31; })
336 | 
337 | 
338 | DEF_GOAL (CMPBYTES,     2, "cmpbytes",
339 |   {
340 |     union { word w; char b[4]; } __r1;
341 |     union { word w; char b[4]; } __r2;
342 |     __r1.w = (v0); __r2.w = (v1);
343 |     r = ((__r1.b[0] != __r2.b[0]) && (__r1.b[1] != __r2.b[1])
344 |          && (__r1.b[2] != __r2.b[2]) && (__r1.b[3] != __r2.b[3]));
345 |   })
346 | 
347 | #ifdef __GNUC__
348 | #if 0
349 | DEF_GOAL (UDIV_QRNND,     3, "udiv_qrnnd",  ({ word t1, t2; udiv_qrnnd (t1, t2, v0, v1, v2); r = t1; }))
350 | DEF_GOAL (UMOD_QRNND,     3, "umod_qrnnd",  ({ word t1, t2; udiv_qrnnd (t1, t2, v0, v1, v2); r = t2; }))
351 | #endif
352 | DEF_GOAL (UMULH,     2, "umulh",  ({ word t1; word t2; umul_ppmm (t1, t2, v0, v1); r = t1; }))
353 | DEF_GOAL (UMULH3,     1, "umulh3",  ({ word t1; word t2; umul_ppmm (t1, t2, v0, 3); r = t1; }))
354 | DEF_GOAL (UMULH5,     1, "umulh5",  ({ word t1; word t2; umul_ppmm (t1, t2, v0, 5); r = t1; }))
355 | DEF_GOAL (UMULH6,     1, "umulh6",  ({ word t1; word t2; umul_ppmm (t1, t2, v0, 6); r = t1; }))
356 | DEF_GOAL (UMULH7,     1, "umulh7",  ({ word t1; word t2; umul_ppmm (t1, t2, v0, 7); r = t1; }))
357 | DEF_GOAL (UMULH9,     1, "umulh9",  ({ word t1; word t2; umul_ppmm (t1, t2, v0, 9); r = t1; }))
358 | DEF_GOAL (UMULH10,     1, "umulh10",  ({ word t1; word t2; umul_ppmm (t1, t2, v0, 10); r = t1; }))
359 | DEF_GOAL (UMULH11,     1, "umulh11",  ({ word t1; word t2; umul_ppmm (t1, t2, v0, 11); r = t1; }))
360 | #endif /* __GNUC__ */
361 | 
362 | #ifdef __GNUC__
363 | DEF_GOAL (MORE_EVEN,     2, "more_even",  ({ r = (v0 & -v0) > (v1 & -v1) ; }))
364 | #endif /* __GNUC__ */
365 | DEF_GOAL (CLEAR_LSB,    1, "clear_lsb", { r = v0 & ~(v0 & -v0); })
366 | DEF_GOAL (CLEAR_LSB2,   2, "clear_lsb2", { r = v1 & ~(v0 & -v0); })
367 | 
368 | #if 0
369 | DEF_GOAL (UDIV,           2, "udiv",        { r = v0 / v1; })
370 | #endif
371 | 
372 | /* After "v0 = v1 + v2", compute if we got signed overflow.  */
373 | DEF_GOAL (OVERFLOW_AFTER_ADD, 3, "add_ovfl",  { r = (signed_word) (~(v1 ^ v2) & (v0 ^ v1)) < 0; })
374 | 
375 | /* The most and least significant bits of a double word left shift.  */
376 | DEF_GOAL (DBL_SHIFT_HI, 3, "dbl_shift_hi",
377 |           { r = (v2 >= BITS_PER_WORD
378 |                  ? v0 << (v2 - BITS_PER_WORD)
379 |                  : (v1 << v2) | (v0 >> (BITS_PER_WORD - v2))); })
380 | DEF_GOAL (DBL_SHIFT_LO, 2, "dbl_shift_lo",
381 |           { r = (v1 >= BITS_PER_WORD
382 |                  ? 0
383 |                  : v0 << v1); })
384 | DEF_GOAL (DBL_SHIFT_HI_TRUNC, 3, "dbl_shift_hi_trunc",
385 |           { r = ((v2 & 2*BITS_PER_WORD-1) >= BITS_PER_WORD
386 |                  ? v0 << ((v2 & 2*BITS_PER_WORD-1) - BITS_PER_WORD)
387 |                  : (v1 << (v2 & 2*BITS_PER_WORD-1)) | (v0 >> (BITS_PER_WORD - (v2 & 2*BITS_PER_WORD-1)))); })
388 | DEF_GOAL (DBL_SHIFT_LO_TRUNC, 2, "dbl_shift_lo_trunc",
389 |           { r = ((v1 & 2*BITS_PER_WORD-1) >= BITS_PER_WORD
390 |                  ? 0
391 |                  : v0 << (v1 & 2*BITS_PER_WORD-1)); })
392 | 
393 | /*
394 | Local variables:
395 | mode:c
396 | version-control: t
397 | End:
398 | */
399 | 


--------------------------------------------------------------------------------
/hashtable.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #define HASHTABLE_BUCKETS   (1024*8)
  3 | 
  4 | 
  5 | struct hashtable_entry_t {
  6 |   word *values;
  7 |   int n_values;
  8 |   int carry;
  9 |   int cost;
 10 |   struct hashtable_entry_t *next;
 11 | };
 12 | 
 13 | typedef struct hashtable_entry_t hashtable_entry;
 14 | 
 15 | hashtable_entry hashtable[HASHTABLE_BUCKETS];
 16 | 
 17 | void hashtable_init();
 18 | void hashtable_insert(word *values, int n_values, int carry, int cost);
 19 | int hashtable_find(word *values, int n_values, int carry);
 20 | void hashtable_free();
 21 | 
 22 | void hashtable_init()
 23 | {
 24 |   int i;
 25 | 
 26 |   for (i = 0; i < HASHTABLE_BUCKETS; ++i)
 27 |     {
 28 |       hashtable[i].values = NULL;
 29 |       hashtable[i].n_values = -1;
 30 |       hashtable[i].carry = 0;
 31 |       hashtable[i].next = NULL;
 32 |     }
 33 | }
 34 | 
 35 | unsigned hash(word *values, int n_values, int carry)
 36 | {
 37 |   unsigned v = 0;
 38 |   int i;
 39 | 
 40 |   for (i = 0; i < n_values; ++i)
 41 |     {
 42 |       v += values[i] * (i+1) * carry;
 43 |     }
 44 | 
 45 |   return v % HASHTABLE_BUCKETS;
 46 | }
 47 | 
 48 | int compare(word *values_1, int n_values_1, int carry_1, word *values_2, int n_values_2, int carry_2)
 49 | {
 50 |   int i;
 51 | 
 52 |   if (n_values_1 != n_values_2 || carry_1 != carry_2)
 53 |     return 0;
 54 | 
 55 |   return memcmp(values_1, values_2, n_values_1*sizeof(word))==0;
 56 | }
 57 | 
 58 | void hashtable_insert(word *values, int n_values, int carry, int cost)
 59 | {
 60 |   unsigned h = hash(values, n_values, carry);
 61 |   hashtable_entry *e = &hashtable[h];
 62 |   int exists = 0;
 63 | 
 64 |   while(1)
 65 |     {
 66 |       if (compare(values, n_values, carry, e->values, e->n_values, e->carry))
 67 |         {
 68 |           if (cost > e->cost)
 69 |               e->cost = cost;
 70 |           return;
 71 |         }
 72 | 
 73 |       if(!e->next)
 74 |         {
 75 |           e->next = malloc(sizeof(hashtable_entry));
 76 |           e = e->next;
 77 |           e->cost = cost;
 78 |           e->n_values = n_values;
 79 |           e->values = malloc(sizeof(word)*n_values);
 80 |           e->carry = carry;
 81 |           memcpy(e->values, values, sizeof(word)*n_values);
 82 |           e->next = NULL;
 83 |           return;
 84 |         }
 85 | 
 86 |       e = e->next;
 87 |     }
 88 | }
 89 | 
 90 | int hashtable_find(word *values, int n_values, int carry)
 91 | {
 92 |   unsigned h = hash(values, n_values, carry);
 93 |   hashtable_entry *e = &hashtable[h];
 94 |   int exists = 0;
 95 | 
 96 |   while(1)
 97 |     {
 98 |       if (compare(values, n_values, carry, e->values, e->n_values, e->carry))
 99 |         {
100 |           return e->cost;
101 |         }
102 | 
103 |       if(!e->next)
104 |         {
105 |           return 0;
106 |         }
107 | 
108 |       e = e->next;
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/insn.def:
--------------------------------------------------------------------------------
  1 | /* This file contains the definitions and documentation for the instructions
  2 |    used in the Superoptimizer.
  3 | 
  4 |    Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
  5 | 
  6 |    This program is free software; you can redistribute it and/or modify it
  7 |    under the terms of the GNU General Public License as published by the
  8 |    Free Software Foundation; either version 2, or (at your option) any
  9 |    later version.
 10 | 
 11 |    This program is distributed in the hope that it will be useful, but
 12 |    WITHOUT ANY WARRANTY; without even the implied warranty of
 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14 |    General Public License for more details.
 15 | 
 16 |    You should have received a copy of the GNU General Public License along
 17 |    with this program; see the file COPYING.  If not, write to the Free
 18 |    Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
 19 | 
 20 | /* The fields in the cpp macro call "DEF_INSN()" are:
 21 | 
 22 |    1. The internal name of the instruction.
 23 | 
 24 |    2. The class of the instruction.  These are stored in the insn_class
 25 |    array and are accessed via the GET_INSN_CLASS macro.  They are defined
 26 |    as  follows:
 27 | 
 28 |      "1" an rtx code for a unary arithmetic expression (e.g, ABSVAL, CLZ)
 29 |      "c" an rtx code for a commutative binary operation (e.g,  ADD, XOR)
 30 |      "2" an rtx code for a non-commutative binary operation (e.g., SUB, CMP)
 31 |      "<" an rtx code for a non-commutative binary instruction that only
 32 |          writes to the condition code
 33 |      "=" an rtx code for a commutative binary instruction that only writes
 34 |          to the condition code
 35 |      "x" everything else
 36 | 
 37 |    3. The printed name of the instruction.
 38 | 
 39 |    */
 40 | 
 41 | /* Arithmetic insns.  d = r1 + r2 [+ cy] */
 42 | DEF_INSN (ADD,          'b', "add")
 43 | DEF_INSN (ADD_CI,       'b', "add_ci")
 44 | DEF_INSN (ADD_CO,       'b', "add_co")
 45 | DEF_INSN (ADD_CIO,      'b', "add_cio")
 46 | /* Arithmetic insns.  d = r1 - r2 [- cy] */
 47 | DEF_INSN (SUB,          '2', "sub")
 48 | DEF_INSN (SUB_CI,       '2', "sub_ci")
 49 | DEF_INSN (SUB_CO,       '2', "sub_co")
 50 | DEF_INSN (SUB_CIO,      '2', "sub_cio")
 51 | /* Arithmetic insns.  d = r1 + ~r2 + cy/1 */
 52 | DEF_INSN (ADC_CI,       '2', "adc_ci")
 53 | DEF_INSN (ADC_CO,       '2', "adc_co")
 54 | DEF_INSN (ADC_CIO,      '2', "adc_cio")
 55 | /* Arithmetic insns.  d = r1 + ~r2  */
 56 | DEF_INSN (ADDCMPL,      '2', "addcmpl")
 57 | /* XCore address arithmetic.  */
 58 | DEF_INSN (LDA16F,               '2', "lda16f")
 59 | DEF_INSN (LDA16B,               '2', "lda16b")
 60 | DEF_INSN (LDAWF,                '2', "ldawf")
 61 | DEF_INSN (LDAWB,                '2', "ldawb")
 62 | 
 63 | /* Logical insns that don't affect the carry flag.  */
 64 | DEF_INSN (AND,          'b', "and")
 65 | DEF_INSN (IOR,          'b', "ior")
 66 | DEF_INSN (XOR,          'b', "xor")
 67 | DEF_INSN (ANDC,         '2', "andc")
 68 | DEF_INSN (IORC,         '2', "iorc")
 69 | DEF_INSN (EQV,          '2', "eqv")
 70 | DEF_INSN (NAND,         'b', "nand")
 71 | DEF_INSN (NOR,          'b', "nor")
 72 | 
 73 | /* Logical insns that reset the carry flag.  */
 74 | DEF_INSN (AND_RC,       'b', "and_rc")
 75 | DEF_INSN (IOR_RC,       'b', "ior_rc")
 76 | DEF_INSN (XOR_RC,       'b', "xor_rc")
 77 | DEF_INSN (ANDC_RC,      '2', "andc_rc")
 78 | DEF_INSN (IORC_RC,      '2', "iorc_rc")
 79 | DEF_INSN (EQV_RC,       '2', "eqv_rc")
 80 | DEF_INSN (NAND_RC,      'b', "nand_rc")
 81 | DEF_INSN (NOR_RC,       'b', "nor_rc")
 82 | 
 83 | /* Logical insns that clobber the carry flag.  */
 84 | DEF_INSN (AND_CC,       'b', "and_cc")
 85 | DEF_INSN (IOR_CC,       'b', "ior_cc")
 86 | DEF_INSN (XOR_CC,       'b', "xor_cc")
 87 | DEF_INSN (ANDC_CC,      '2', "andc_cc")
 88 | DEF_INSN (IORC_CC,      '2', "iorc_cc")
 89 | DEF_INSN (EQV_CC,       '2', "eqv_cc")
 90 | DEF_INSN (NAND_CC,      'b', "nand_cc")
 91 | DEF_INSN (NOR_CC,       'b', "nor_cc")
 92 | 
 93 | /* Bit shift and count.  */
 94 | DEF_INSN (LSHIFTR,      '2', "log_shift_right")
 95 | DEF_INSN (ASHIFTR,      '2', "arith_shift_right")
 96 | DEF_INSN (SHIFTL,       '2', "shift_left")
 97 | DEF_INSN (LSHIFTR_CO,   '2', "log_shift_right_co")
 98 | DEF_INSN (ASHIFTR_CO,   '2', "arith_shift_right_co")
 99 | DEF_INSN (SHIFTL_CO,    '2', "shift_left_co")
100 | DEF_INSN (ROTATEL,      '2', "rotate_left")
101 | DEF_INSN (ROTATEL_CO,   '2', "rotate_left_co")
102 | DEF_INSN (ROTATER_CO,   '2', "rotate_right_co")
103 | DEF_INSN (SHIFTL_NT,    '2', "shiftl_nt")
104 | DEF_INSN (LSHIFTR_NT,   '2', "lshiftr_nt")
105 | DEF_INSN (ASHIFTR_NT,   '2', "ashiftr_nt")
106 | 
107 | /* Rotate thru carry.  */
108 | DEF_INSN (ROTATEXL_CIO, '2', "rotate_thru_carry_left_co")
109 | DEF_INSN (ROTATEXR_CIO, '2', "rotate_thru_carry_right_co")
110 | 
111 | /* Shift arithmetic right instruction and set carry iff the shifted
112 |    operand is negative and any bit shifted out is 1.  */
113 | DEF_INSN (ASHIFTR_CON,  '2', "arith_shift_right_set_carry_if_neg_and_nzbit_lost")
114 | 
115 | /* Extract.  */
116 | DEF_INSN (EXTS1,        '2', "exts1")
117 | DEF_INSN (EXTS2,        '2', "exts2")
118 | DEF_INSN (EXTS8,        '2', "exts8")
119 | DEF_INSN (EXTS16,       '2', "exts16")
120 | DEF_INSN (EXTU1,        '2', "extu1")
121 | DEF_INSN (EXTU2,        '2', "extu2")
122 | 
123 | /* XCore extend instructions.  */
124 | DEF_INSN (ZEXT, '2', "zext")
125 | DEF_INSN (SEXT, '2', "sext")
126 | 
127 | /* Count leading/trailing zero instructions.  */
128 | DEF_INSN (CLZ,          '1', "clz")
129 | DEF_INSN (CTZ,          '1', "ctz")
130 | DEF_INSN (FF1,          '1', "ff1")
131 | DEF_INSN (FF0,          '1', "ff0")
132 | DEF_INSN (BSF86,        '1', "bsf86")
133 | 
134 | DEF_INSN (ABSVAL,       '1', "abs")
135 | DEF_INSN (NABSVAL,      '1', "nabs")
136 | DEF_INSN (CMP,          '<', "cmp")
137 | DEF_INSN (CMPC,         '<', "cmpc")
138 | 
139 | /* XCore bit/byte reverse instructions. */
140 | DEF_INSN (BITREV,       '1', "bitrev")
141 | DEF_INSN (BYTEREV,      '1', "byterev")
142 | 
143 | /* 29k CMP instructions.  */
144 | DEF_INSN (CPEQ,         'c', "cpeq")
145 | DEF_INSN (CPGE,         '2', "cpge")
146 | DEF_INSN (CPGEU,        '2', "cpgeu")
147 | DEF_INSN (CPGT,         '2', "cpgt")
148 | DEF_INSN (CPGTU,        '2', "cpgtu")
149 | DEF_INSN (CPLE,         '2', "cple")
150 | DEF_INSN (CPLEU,        '2', "cpleu")
151 | DEF_INSN (CPLT,         '2', "cplt")
152 | DEF_INSN (CPLTU,        '2', "cpltu")
153 | DEF_INSN (CPNEQ,        'c', "cpneq")
154 | 
155 | /* Alpha/XCore CMP instructions.  */
156 | DEF_INSN (CMPEQ,        'c', "cmpeq")
157 | DEF_INSN (CMPLE,        '2', "cmple")
158 | DEF_INSN (CMPLEU,       '2', "cmpleu")
159 | DEF_INSN (CMPLT,        '2', "cmplt")
160 | DEF_INSN (CMPLTU,       '2', "cmpltu")
161 | 
162 | /* 88100 CMP instruction.  */
163 | DEF_INSN (CMPPAR,       '2', "cmppar")
164 | 
165 | /* SH CMP instructions.  */
166 | DEF_INSN (CYEQ,         '=', "cyeq")
167 | DEF_INSN (CYGTU,        '<', "cygtu")
168 | DEF_INSN (CYGEU,        '<', "cygeu")
169 | DEF_INSN (CYGTS,        '<', "cygts")
170 | DEF_INSN (CYGES,        '<', "cyges")
171 | DEF_INSN (CYAND,        '=', "cyand")
172 | 
173 | /* SH specific instruction.  */
174 | DEF_INSN (MERGE16,      '2', "merge16")
175 | DEF_INSN (DECR_CYEQ,    '2', "decr_cyeq")
176 | 
177 | /* XCore specific instruction. */
178 | DEF_INSN (MKMSK,        '1', "mkmsk")
179 | 
180 | /* Difference-or-zero (rs6000) */
181 | DEF_INSN (DOZ,          '2', "difference_or_zero")
182 | 
183 | DEF_INSN (COPY,         '1', "copy")
184 | DEF_INSN (EXCHANGE,     '2', "exchange")
185 | 
186 | /* Set, clear, complement carry */
187 | DEF_INSN (SETCY,        'x', "set_cy")
188 | DEF_INSN (CLRCY,        'x', "clear_cy")
189 | DEF_INSN (COMCY,        'x', "complement_cy")
190 | 
191 | /* Alpha conditional move */
192 | DEF_INSN (CMOVEQ,       '2', "cmoveq")
193 | DEF_INSN (CMOVNE,       '2', "cmovne")
194 | DEF_INSN (CMOVLT,       '2', "cmovlt")
195 | DEF_INSN (CMOVGE,       '2', "cmovge")
196 | DEF_INSN (CMOVLE,       '2', "cmovle")
197 | DEF_INSN (CMOVGT,       '2', "cmovgt")
198 | 
199 | DEF_INSN (INVDIV,       '2', "invdiv")
200 | DEF_INSN (INVMOD,       '2', "invmod")
201 | DEF_INSN (UMULWIDEN_HI, '2', "umulwiden_hi")
202 | DEF_INSN (MUL,          '2', "mul")
203 | 
204 | #if UDIV_WITH_SDIV
205 | DEF_INSN (SDIV,         '2', "sdiv")
206 | #endif
207 | 
208 | DEF_INSN (ADD_SEQ,      '2', "add_seq")
209 | DEF_INSN (ADD_SNE,      '2', "add_sne")
210 | DEF_INSN (ADD_SLTS,     '2', "add_slts")
211 | DEF_INSN (ADD_SGES,     '2', "add_sges")
212 | DEF_INSN (ADD_SLES,     '2', "add_sles")
213 | DEF_INSN (ADD_SGTS,     '2', "add_sgts")
214 | DEF_INSN (ADD_SLTU,     '2', "add_sltu")
215 | DEF_INSN (ADD_SGEU,     '2', "add_sgeu")
216 | DEF_INSN (ADD_SLEU,     '2', "add_sleu")
217 | DEF_INSN (ADD_SGTU,     '2', "add_sgtu")
218 | DEF_INSN (ADD_SOVS,     '2', "add_sovs")
219 | DEF_INSN (ADD_SNVS,     '2', "add_snvs")
220 | DEF_INSN (ADD_SODD,     '2', "add_sodd")
221 | DEF_INSN (ADD_SEVN,     '2', "add_sevn")
222 | DEF_INSN (ADD_S,        '2', "add_s")
223 | DEF_INSN (ADD_CIO_SEQ,  '2', "add_cio_seq")
224 | DEF_INSN (ADD_CIO_SNE,  '2', "add_cio_sne")
225 | DEF_INSN (ADD_CIO_SLTU, '2', "add_cio_sltu")
226 | DEF_INSN (ADD_CIO_SGEU, '2', "add_cio_sgeu")
227 | DEF_INSN (ADD_CIO_SLEU, '2', "add_cio_sleu")
228 | DEF_INSN (ADD_CIO_SGTU, '2', "add_cio_sgtu")
229 | DEF_INSN (ADD_CIO_SODD, '2', "add_cio_sodd")
230 | DEF_INSN (ADD_CIO_SEVN, '2', "add_cio_sevn")
231 | DEF_INSN (ADD_CIO_S,    '2', "add_cio_s")
232 | DEF_INSN (ADD_CO_SEQ,   '2', "add_co_seq")
233 | DEF_INSN (ADD_CO_SNE,   '2', "add_co_sne")
234 | DEF_INSN (ADD_CO_SLTU,  '2', "add_co_sltu")
235 | DEF_INSN (ADD_CO_SGEU,  '2', "add_co_sgeu")
236 | DEF_INSN (ADD_CO_SLEU,  '2', "add_co_sleu")
237 | DEF_INSN (ADD_CO_SGTU,  '2', "add_co_sgtu")
238 | DEF_INSN (ADD_CO_SODD,  '2', "add_co_sodd")
239 | DEF_INSN (ADD_CO_SEVN,  '2', "add_co_sevn")
240 | DEF_INSN (ADD_CO_S,     '2', "add_co_s")
241 | 
242 | DEF_INSN (SUB_SEQ,      '2', "sub_seq")
243 | DEF_INSN (SUB_SNE,      '2', "sub_sne")
244 | DEF_INSN (SUB_SLTS,     '2', "sub_slts")
245 | DEF_INSN (SUB_SGES,     '2', "sub_sges")
246 | DEF_INSN (SUB_SLES,     '2', "sub_sles")
247 | DEF_INSN (SUB_SGTS,     '2', "sub_sgts")
248 | DEF_INSN (SUB_SODD,     '2', "sub_sodd")
249 | DEF_INSN (SUB_SEVN,     '2', "sub_sevn")
250 | DEF_INSN (SUB_S,        '2', "sub_s")
251 | 
252 | DEF_INSN (ADC_CIO_SEQ,  '2', "adc_cio_seq")
253 | DEF_INSN (ADC_CIO_SNE,  '2', "adc_cio_sne")
254 | DEF_INSN (ADC_CIO_SLTU, '2', "adc_cio_sltu")
255 | DEF_INSN (ADC_CIO_SGEU, '2', "adc_cio_sgeu")
256 | DEF_INSN (ADC_CIO_SLEU, '2', "adc_cio_sleu")
257 | DEF_INSN (ADC_CIO_SGTU, '2', "adc_cio_sgtu")
258 | DEF_INSN (ADC_CIO_SODD, '2', "adc_cio_sodd")
259 | DEF_INSN (ADC_CIO_SEVN, '2', "adc_cio_sevn")
260 | DEF_INSN (ADC_CIO_S,    '2', "adc_cio_s")
261 | DEF_INSN (ADC_CO_SEQ,   '2', "adc_co_seq")
262 | DEF_INSN (ADC_CO_SNE,   '2', "adc_co_sne")
263 | DEF_INSN (ADC_CO_SLTU,  '2', "adc_co_sltu")
264 | DEF_INSN (ADC_CO_SGEU,  '2', "adc_co_sgeu")
265 | DEF_INSN (ADC_CO_SLEU,  '2', "adc_co_sleu")
266 | DEF_INSN (ADC_CO_SGTU,  '2', "adc_co_sgtu")
267 | DEF_INSN (ADC_CO_SODD,  '2', "adc_co_sodd")
268 | DEF_INSN (ADC_CO_SEVN,  '2', "adc_co_sevn")
269 | DEF_INSN (ADC_CO_S,     '2', "adc_co_s")
270 | 
271 | DEF_INSN (COMCLR_SEQ,   '2', "comclr_seq")
272 | DEF_INSN (COMCLR_SNE,   '2', "comclr_sne")
273 | DEF_INSN (COMCLR_SLTS,  '2', "comclr_slts")
274 | DEF_INSN (COMCLR_SGES,  '2', "comclr_sges")
275 | DEF_INSN (COMCLR_SLES,  '2', "comclr_sles")
276 | DEF_INSN (COMCLR_SGTS,  '2', "comclr_sgts")
277 | DEF_INSN (COMCLR_SLTU,  '2', "comclr_sltu")
278 | DEF_INSN (COMCLR_SGEU,  '2', "comclr_sgeu")
279 | DEF_INSN (COMCLR_SLEU,  '2', "comclr_sleu")
280 | DEF_INSN (COMCLR_SGTU,  '2', "comclr_sgtu")
281 | DEF_INSN (COMCLR_SODD,  '2', "comclr_sodd")
282 | DEF_INSN (COMCLR_SEVN,  '2', "comclr_sevn")
283 | /* DEF_INSN (COMCLR_S,  '2', "comclr_s") */
284 | 
285 | DEF_INSN (AND_SEQ,      '2', "and_seq")
286 | DEF_INSN (AND_SNE,      '2', "and_sne")
287 | DEF_INSN (AND_SLTS,     '2', "and_slts")
288 | DEF_INSN (AND_SGES,     '2', "and_sges")
289 | DEF_INSN (AND_SLES,     '2', "and_sles")
290 | DEF_INSN (AND_SGTS,     '2', "and_sgts")
291 | DEF_INSN (AND_SODD,     '2', "and_sodd")
292 | DEF_INSN (AND_SEVN,     '2', "and_sevn")
293 | DEF_INSN (AND_S,        '2', "and_s")
294 | DEF_INSN (IOR_SEQ,      '2', "ior_seq")
295 | DEF_INSN (IOR_SNE,      '2', "ior_sne")
296 | DEF_INSN (IOR_SLTS,     '2', "ior_slts")
297 | DEF_INSN (IOR_SGES,     '2', "ior_sges")
298 | DEF_INSN (IOR_SLES,     '2', "ior_sles")
299 | DEF_INSN (IOR_SGTS,     '2', "ior_sgts")
300 | DEF_INSN (IOR_SODD,     '2', "ior_sodd")
301 | DEF_INSN (IOR_SEVN,     '2', "ior_sevn")
302 | DEF_INSN (IOR_S,        '2', "ior_s")
303 | DEF_INSN (XOR_SEQ,      '2', "xor_seq")
304 | DEF_INSN (XOR_SNE,      '2', "xor_sne")
305 | DEF_INSN (XOR_SLTS,     '2', "xor_slts")
306 | DEF_INSN (XOR_SGES,     '2', "xor_sges")
307 | DEF_INSN (XOR_SLES,     '2', "xor_sles")
308 | DEF_INSN (XOR_SGTS,     '2', "xor_sgts")
309 | DEF_INSN (XOR_SODD,     '2', "xor_sodd")
310 | DEF_INSN (XOR_SEVN,     '2', "xor_sevn")
311 | DEF_INSN (XOR_S,        '2', "xor_s")
312 | DEF_INSN (ANDC_SEQ,     '2', "andc_seq")
313 | DEF_INSN (ANDC_SNE,     '2', "andc_sne")
314 | DEF_INSN (ANDC_SLTS,    '2', "andc_slts")
315 | DEF_INSN (ANDC_SGES,    '2', "andc_sges")
316 | DEF_INSN (ANDC_SLES,    '2', "andc_sles")
317 | DEF_INSN (ANDC_SGTS,    '2', "andc_sgts")
318 | DEF_INSN (ANDC_SODD,    '2', "andc_sodd")
319 | DEF_INSN (ANDC_SEVN,    '2', "andc_sevn")
320 | DEF_INSN (ANDC_S,       '2', "andc_s")
321 | 
322 | /* Bit shift and count.  */
323 | DEF_INSN (LSHIFTR_S,    '2', "log_shift_right_s")
324 | DEF_INSN (ASHIFTR_S,    '2', "arith_shift_right_s")
325 | DEF_INSN (SHIFTL_S,     '2', "shift_left_s")
326 | DEF_INSN (ROTATEL_S,    '2', "rotate_left_s")
327 | 
328 | /* Extract.  */
329 | DEF_INSN (EXTS1_S,      '2', "exts1_s")
330 | DEF_INSN (EXTS2_S,      '2', "exts2_s")
331 | DEF_INSN (EXTS8_S,      '2', "exts8_s")
332 | DEF_INSN (EXTS16_S,     '2', "exts16_s")
333 | DEF_INSN (EXTU1_S,      '2', "extu1_s")
334 | DEF_INSN (EXTU2_S,      '2', "extu2_s")
335 | 
336 | DEF_INSN (COPY_S,       '1', "copy_s")
337 | 
338 | 
339 | /* Inte 960 specific instructions.  */
340 | DEF_INSN (ADDC_960,     '2', "addc_960")
341 | DEF_INSN (SUBC_960,     '2', "subc_960")
342 | 
343 | DEF_INSN (SEL_NO_960,   '2', "sel_no_960")
344 | DEF_INSN (SEL_G_960,    '2', "sel_g_960")
345 | DEF_INSN (SEL_E_960,    '2', "sel_e_960")
346 | DEF_INSN (SEL_GE_960,   '2', "sel_ge_960")
347 | DEF_INSN (SEL_L_960,    '2', "sel_l_960")
348 | DEF_INSN (SEL_NE_960,   '2', "sel_ne_960")
349 | DEF_INSN (SEL_LE_960,   '2', "sel_le_960")
350 | DEF_INSN (SEL_O_960,    '2', "sel_o_960")
351 | 
352 | DEF_INSN (CONCMPO_960,  '<', "concmpo_960")
353 | DEF_INSN (CONCMPI_960,  '<', "concmpi_960")
354 | DEF_INSN (CMPO_960,     '<', "cmpo_960")
355 | DEF_INSN (CMPI_960,     '<', "cmpi_960")
356 | DEF_INSN (ADDO_NO_960,  '2', "addo_no_960")
357 | DEF_INSN (ADDO_G_960,   '2', "addo_g_960")
358 | DEF_INSN (ADDO_E_960,   '2', "addo_e_960")
359 | DEF_INSN (ADDO_GE_960,  '2', "addo_ge_960")
360 | DEF_INSN (ADDO_L_960,   '2', "addo_l_960")
361 | DEF_INSN (ADDO_NE_960,  '2', "addo_ne_960")
362 | DEF_INSN (ADDO_LE_960,  '2', "addo_le_960")
363 | DEF_INSN (ADDO_O_960,   '2', "addo_o_960")
364 | DEF_INSN (SUBO_NO_960,  '2', "subo_no_960")
365 | DEF_INSN (SUBO_G_960,   '2', "subo_g_960")
366 | DEF_INSN (SUBO_E_960,   '2', "subo_e_960")
367 | DEF_INSN (SUBO_GE_960,  '2', "subo_ge_960")
368 | DEF_INSN (SUBO_L_960,   '2', "subo_l_960")
369 | DEF_INSN (SUBO_NE_960,  '2', "subo_ne_960")
370 | DEF_INSN (SUBO_LE_960,  '2', "subo_le_960")
371 | DEF_INSN (SUBO_O_960,   '2', "subo_o_960")
372 | 
373 | DEF_INSN (ALTERBIT,     '2', "alterbit")
374 | DEF_INSN (SETBIT,       '2', "setbit")
375 | DEF_INSN (CLRBIT,       '2', "clrbit")
376 | DEF_INSN (CHKBIT,       '<', "chkbit")
377 | DEF_INSN (NOTBIT,       '2', "notbit")
378 | 
379 | /*
380 | Local variables:
381 | mode:c
382 | version-control: t
383 | End:
384 | */
385 | 


--------------------------------------------------------------------------------
/longlong.h:
--------------------------------------------------------------------------------
   1 | /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
   2 | 
   3 | Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
   4 | 
   5 | This file is free software; you can redistribute it and/or modify
   6 | it under the terms of the GNU Library General Public License as published by
   7 | the Free Software Foundation; either version 2 of the License, or (at your
   8 | option) any later version.
   9 | 
  10 | This file is distributed in the hope that it will be useful, but
  11 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12 | or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
  13 | License for more details.
  14 | 
  15 | You should have received a copy of the GNU Library General Public License
  16 | along with this file; see the file COPYING.LIB.  If not, write to
  17 | the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
  18 | 
  19 | /* You have to define the following before including this file:
  20 | 
  21 |    UWtype -- An unsigned type, default type for operations (typically a "word")
  22 |    UHWtype -- An unsigned type, at least half the size of UWtype.
  23 |    UDWtype -- An unsigned type, at least twice as large a UWtype
  24 |    W_TYPE_SIZE -- size in bits of UWtype
  25 | 
  26 |    SItype, USItype -- Signed and unsigned 32 bit types.
  27 |    DItype, UDItype -- Signed and unsigned 64 bit types.
  28 | 
  29 |    On a 32 bit machine UWtype should typically be USItype;
  30 |    on a 64 bit machine, UWtype should typically be UDItype.
  31 | */
  32 | 
  33 | #define __BITS4 (W_TYPE_SIZE / 4)
  34 | #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
  35 | #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
  36 | #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
  37 | 
  38 | /* Define auxiliary asm macros.
  39 | 
  40 |    1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
  41 |    UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
  42 |    word product in HIGH_PROD and LOW_PROD.
  43 | 
  44 |    2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
  45 |    UDWtype product.  This is just a variant of umul_ppmm.
  46 | 
  47 |    3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  48 |    denominator) divides a UDWtype, composed by the UWtype integers
  49 |    HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
  50 |    in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
  51 |    than DENOMINATOR for correct operation.  If, in addition, the most
  52 |    significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  53 |    UDIV_NEEDS_NORMALIZATION is defined to 1.
  54 | 
  55 |    4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
  56 |    denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
  57 |    is rounded towards 0.
  58 | 
  59 |    5) count_leading_zeros(count, x) counts the number of zero-bits from the
  60 |    msb to the first non-zero bit in the UWtype X.  This is the number of
  61 |    steps X needs to be shifted left to set the msb.  Undefined for X == 0,
  62 |    unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
  63 | 
  64 |    6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
  65 |    from the least significant end.
  66 | 
  67 |    7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
  68 |    high_addend_2, low_addend_2) adds two UWtype integers, composed by
  69 |    HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
  70 |    respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  71 |    (i.e. carry out) is not stored anywhere, and is lost.
  72 | 
  73 |    8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
  74 |    high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
  75 |    composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
  76 |    LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
  77 |    and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
  78 |    and is lost.
  79 | 
  80 |    If any of these macros are left undefined for a particular CPU,
  81 |    C macros are used.  */
  82 | 
  83 | /* The CPUs come in alphabetical order below.
  84 | 
  85 |    Please add support for more CPUs here, or improve the current support
  86 |    for the CPUs below!  */
  87 | 
  88 | #if defined (__GNUC__) && !defined (NO_ASM)
  89 | 
  90 | /* We sometimes need to clobber "cc" with gcc2, but that would not be
  91 |    understood by gcc1.  Use cpp to avoid major code duplication.  */
  92 | #if __GNUC__ < 2
  93 | #define __CLOBBER_CC
  94 | #define __AND_CLOBBER_CC
  95 | #else /* __GNUC__ >= 2 */
  96 | #define __CLOBBER_CC : "cc"
  97 | #define __AND_CLOBBER_CC , "cc"
  98 | #endif /* __GNUC__ < 2 */
  99 | 
 100 | #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
 101 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 102 |   __asm__ ("add %1,%4,%5 \
 103 | 	addc %0,%2,%3"							\
 104 | 	   : "=r" ((USItype)(sh)),					\
 105 | 	    "=&r" ((USItype)(sl))					\
 106 | 	   : "%r" ((USItype)(ah)),					\
 107 | 	     "rI" ((USItype)(bh)),					\
 108 | 	     "%r" ((USItype)(al)),					\
 109 | 	     "rI" ((USItype)(bl)))
 110 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 111 |   __asm__ ("sub %1,%4,%5 \
 112 | 	subc %0,%2,%3"							\
 113 | 	   : "=r" ((USItype)(sh)),					\
 114 | 	     "=&r" ((USItype)(sl))					\
 115 | 	   : "r" ((USItype)(ah)),					\
 116 | 	     "rI" ((USItype)(bh)),					\
 117 | 	     "r" ((USItype)(al)),					\
 118 | 	     "rI" ((USItype)(bl)))
 119 | #define umul_ppmm(xh, xl, m0, m1) \
 120 |   do {									\
 121 |     USItype __m0 = (m0), __m1 = (m1);					\
 122 |     __asm__ ("multiplu %0,%1,%2"					\
 123 | 	     : "=r" ((USItype)(xl))					\
 124 | 	     : "r" (__m0),						\
 125 | 	       "r" (__m1));						\
 126 |     __asm__ ("multmu %0,%1,%2"						\
 127 | 	     : "=r" ((USItype)(xh))					\
 128 | 	     : "r" (__m0),						\
 129 | 	       "r" (__m1));						\
 130 |   } while (0)
 131 | #define udiv_qrnnd(q, r, n1, n0, d) \
 132 |   __asm__ ("dividu %0,%3,%4"						\
 133 | 	   : "=r" ((USItype)(q)),					\
 134 | 	     "=q" ((USItype)(r))					\
 135 | 	   : "1" ((USItype)(n1)),					\
 136 | 	     "r" ((USItype)(n0)),					\
 137 | 	     "r" ((USItype)(d)))
 138 | #define count_leading_zeros(count, x) \
 139 |     __asm__ ("clz %0,%1"						\
 140 | 	     : "=r" ((USItype)(count))					\
 141 | 	     : "r" ((USItype)(x)))
 142 | #endif /* __a29k__ */
 143 | 
 144 | #if defined (__alpha__) && W_TYPE_SIZE == 64
 145 | #define umul_ppmm(ph, pl, m0, m1) \
 146 |   do {									\
 147 |     UDItype __m0 = (m0), __m1 = (m1);					\
 148 |     __asm__ ("umulh %r1,%2,%0"						\
 149 | 	     : "=r" ((UDItype) ph)					\
 150 | 	     : "%rJ" (__m0),						\
 151 | 	       "rI" (__m1));						\
 152 |     (pl) = __m0 * __m1;							\
 153 |   } while (0)
 154 | #define UMUL_TIME 46
 155 | #ifndef LONGLONG_STANDALONE
 156 | #define udiv_qrnnd(q, r, n1, n0, d) \
 157 |   do { UDItype __r;							\
 158 |     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
 159 |     (r) = __r;								\
 160 |   } while (0)
 161 | extern UDItype __udiv_qrnnd ();
 162 | #define UDIV_TIME 220
 163 | #endif /* LONGLONG_STANDALONE */
 164 | #endif /* __alpha__ */
 165 | 
 166 | #if defined (__arm__) && W_TYPE_SIZE == 32
 167 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 168 |   __asm__ ("adds	%1, %4, %5 \
 169 | 	adc	%0, %2, %3"						\
 170 | 	   : "=r" ((USItype)(sh)),					\
 171 | 	     "=&r" ((USItype)(sl))					\
 172 | 	   : "%r" ((USItype)(ah)),					\
 173 | 	     "rI" ((USItype)(bh)),					\
 174 | 	     "%r" ((USItype)(al)),					\
 175 | 	     "rI" ((USItype)(bl)))
 176 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 177 |   __asm__ ("subs	%1, %4, %5 \
 178 | 	sbc	%0, %2, %3"						\
 179 | 	   : "=r" ((USItype)(sh)),					\
 180 | 	     "=&r" ((USItype)(sl))					\
 181 | 	   : "r" ((USItype)(ah)),					\
 182 | 	     "rI" ((USItype)(bh)),					\
 183 | 	     "r" ((USItype)(al)),					\
 184 | 	     "rI" ((USItype)(bl)))
 185 | #define umul_ppmm(xh, xl, a, b) \
 186 |   __asm__ ("%@ Inlined umul_ppmm \
 187 | 	mov	%|r0, %2, lsr #16 \
 188 | 	mov	%|r2, %3, lsr #16 \
 189 | 	bic	%|r1, %2, %|r0, lsl #16 \
 190 | 	bic	%|r2, %3, %|r2, lsl #16 \
 191 | 	mul	%1, %|r1, %|r2 \
 192 | 	mul	%|r2, %|r0, %|r2 \
 193 | 	mul	%|r1, %0, %|r1 \
 194 | 	mul	%0, %|r0, %0 \
 195 | 	adds	%|r1, %|r2, %|r1 \
 196 | 	addcs	%0, %0, #65536 \
 197 | 	adds	%1, %1, %|r1, lsl #16 \
 198 | 	adc	%0, %0, %|r1, lsr #16"					\
 199 | 	   : "=&r" ((USItype)(xh)),					\
 200 | 	     "=r" ((USItype)(xl))					\
 201 | 	   : "r" ((USItype)(a)),					\
 202 | 	     "r" ((USItype)(b))						\
 203 | 	   : "r0", "r1", "r2")
 204 | #define UMUL_TIME 20
 205 | #define UDIV_TIME 100
 206 | #endif /* __arm__ */
 207 | 
 208 | #if defined (__clipper__) && W_TYPE_SIZE == 32
 209 | #define umul_ppmm(w1, w0, u, v) \
 210 |   ({union {UDItype __ll;						\
 211 | 	   struct {USItype __l, __h;} __i;				\
 212 | 	  } __xx;							\
 213 |   __asm__ ("mulwux %2,%0"						\
 214 | 	   : "=r" (__xx.__ll)						\
 215 | 	   : "%0" ((USItype)(u)),					\
 216 | 	     "r" ((USItype)(v)));					\
 217 |   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 218 | #define smul_ppmm(w1, w0, u, v) \
 219 |   ({union {DItype __ll;							\
 220 | 	   struct {SItype __l, __h;} __i;				\
 221 | 	  } __xx;							\
 222 |   __asm__ ("mulwx %2,%0"						\
 223 | 	   : "=r" (__xx.__ll)						\
 224 | 	   : "%0" ((SItype)(u)),					\
 225 | 	     "r" ((SItype)(v)));					\
 226 |   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 227 | #define __umulsidi3(u, v) \
 228 |   ({UDItype __w;							\
 229 |     __asm__ ("mulwux %2,%0"						\
 230 | 	     : "=r" (__w)						\
 231 | 	     : "%0" ((USItype)(u)),					\
 232 | 	       "r" ((USItype)(v)));					\
 233 |     __w; })
 234 | #endif /* __clipper__ */
 235 | 
 236 | #if defined (__gmicro__) && W_TYPE_SIZE == 32
 237 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 238 |   __asm__ ("add.w %5,%1 \
 239 | 	addx %3,%0"							\
 240 | 	   : "=g" ((USItype)(sh)),					\
 241 | 	     "=&g" ((USItype)(sl))					\
 242 | 	   : "%0" ((USItype)(ah)),					\
 243 | 	     "g" ((USItype)(bh)),					\
 244 | 	     "%1" ((USItype)(al)),					\
 245 | 	     "g" ((USItype)(bl)))
 246 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 247 |   __asm__ ("sub.w %5,%1 \
 248 | 	subx %3,%0"							\
 249 | 	   : "=g" ((USItype)(sh)),					\
 250 | 	     "=&g" ((USItype)(sl))					\
 251 | 	   : "0" ((USItype)(ah)),					\
 252 | 	     "g" ((USItype)(bh)),					\
 253 | 	     "1" ((USItype)(al)),					\
 254 | 	     "g" ((USItype)(bl)))
 255 | #define umul_ppmm(ph, pl, m0, m1) \
 256 |   __asm__ ("mulx %3,%0,%1"						\
 257 | 	   : "=g" ((USItype)(ph)),					\
 258 | 	     "=r" ((USItype)(pl))					\
 259 | 	   : "%0" ((USItype)(m0)),					\
 260 | 	     "g" ((USItype)(m1)))
 261 | #define udiv_qrnnd(q, r, nh, nl, d) \
 262 |   __asm__ ("divx %4,%0,%1"						\
 263 | 	   : "=g" ((USItype)(q)),					\
 264 | 	     "=r" ((USItype)(r))					\
 265 | 	   : "1" ((USItype)(nh)),					\
 266 | 	     "0" ((USItype)(nl)),					\
 267 | 	     "g" ((USItype)(d)))
 268 | #define count_leading_zeros(count, x) \
 269 |   __asm__ ("bsch/1 %1,%0"						\
 270 | 	   : "=g" (count)						\
 271 | 	   : "g" ((USItype)(x)),					\
 272 | 	     "0" ((USItype)0))
 273 | #endif
 274 | 
 275 | #if defined (__hppa) && W_TYPE_SIZE == 32
 276 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 277 |   __asm__ ("add %4,%5,%1 \
 278 | 	addc %2,%3,%0"							\
 279 | 	   : "=r" ((USItype)(sh)),					\
 280 | 	     "=&r" ((USItype)(sl))					\
 281 | 	   : "%rM" ((USItype)(ah)),					\
 282 | 	     "rM" ((USItype)(bh)),					\
 283 | 	     "%rM" ((USItype)(al)),					\
 284 | 	     "rM" ((USItype)(bl)))
 285 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 286 |   __asm__ ("sub %4,%5,%1 \
 287 | 	subb %2,%3,%0"							\
 288 | 	   : "=r" ((USItype)(sh)),					\
 289 | 	     "=&r" ((USItype)(sl))					\
 290 | 	   : "rM" ((USItype)(ah)),					\
 291 | 	     "rM" ((USItype)(bh)),					\
 292 | 	     "rM" ((USItype)(al)),					\
 293 | 	     "rM" ((USItype)(bl)))
 294 | #if defined (_PA_RISC1_1)
 295 | #define umul_ppmm(wh, wl, u, v) \
 296 |   do {									\
 297 |     union {UDItype __ll;						\
 298 | 	   struct {USItype __h, __l;} __i;				\
 299 | 	  } __xx;							\
 300 |     __asm__ ("xmpyu %1,%2,%0"						\
 301 | 	     : "=fx" (__xx.__ll)					\
 302 | 	     : "fx" ((USItype)(u)),					\
 303 | 	       "fx" ((USItype)(v)));					\
 304 |     (wh) = __xx.__i.__h;						\
 305 |     (wl) = __xx.__i.__l;						\
 306 |   } while (0)
 307 | #define UMUL_TIME 8
 308 | #define UDIV_TIME 60
 309 | #else
 310 | #define UMUL_TIME 40
 311 | #define UDIV_TIME 80
 312 | #endif
 313 | #ifndef LONGLONG_STANDALONE
 314 | #define udiv_qrnnd(q, r, n1, n0, d) \
 315 |   do { USItype __r;							\
 316 |     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
 317 |     (r) = __r;								\
 318 |   } while (0)
 319 | extern USItype __udiv_qrnnd ();
 320 | #endif /* LONGLONG_STANDALONE */
 321 | #define count_leading_zeros(count, x) \
 322 |   do {									\
 323 |     USItype __tmp;							\
 324 |     __asm__ (								\
 325 |        "ldi		1,%0 \
 326 | 	extru,=		%1,15,16,%%r0		; Bits 31..16 zero? \
 327 | 	extru,tr	%1,15,16,%1		; No.  Shift down, skip add. \
 328 | 	ldo		16(%0),%0		; Yes.  Perform add. \
 329 | 	extru,=		%1,23,8,%%r0		; Bits 15..8 zero? \
 330 | 	extru,tr	%1,23,8,%1		; No.  Shift down, skip add. \
 331 | 	ldo		8(%0),%0		; Yes.  Perform add. \
 332 | 	extru,=		%1,27,4,%%r0		; Bits 7..4 zero? \
 333 | 	extru,tr	%1,27,4,%1		; No.  Shift down, skip add. \
 334 | 	ldo		4(%0),%0		; Yes.  Perform add. \
 335 | 	extru,=		%1,29,2,%%r0		; Bits 3..2 zero? \
 336 | 	extru,tr	%1,29,2,%1		; No.  Shift down, skip add. \
 337 | 	ldo		2(%0),%0		; Yes.  Perform add. \
 338 | 	extru		%1,30,1,%1		; Extract bit 1. \
 339 | 	sub		%0,%1,%0		; Subtract it. \
 340 | 	" : "=r" (count), "=r" (__tmp) : "1" (x));			\
 341 |   } while (0)
 342 | #endif
 343 | 
 344 | #if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32
 345 | #define umul_ppmm(xh, xl, m0, m1) \
 346 |   do {									\
 347 |     union {UDItype __ll;						\
 348 | 	   struct {USItype __h, __l;} __i;				\
 349 | 	  } __xx;							\
 350 |     USItype __m0 = (m0), __m1 = (m1);					\
 351 |     __asm__ ("mr %0,%3"							\
 352 | 	     : "=r" (__xx.__i.__h),					\
 353 | 	       "=r" (__xx.__i.__l)					\
 354 | 	     : "%1" (__m0),						\
 355 | 	       "r" (__m1));						\
 356 |     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
 357 |     (xh) += ((((SItype) __m0 >> 31) & __m1)				\
 358 | 	     + (((SItype) __m1 >> 31) & __m0));				\
 359 |   } while (0)
 360 | #define smul_ppmm(xh, xl, m0, m1) \
 361 |   do {									\
 362 |     union {DItype __ll;							\
 363 | 	   struct {USItype __h, __l;} __i;				\
 364 | 	  } __xx;							\
 365 |     __asm__ ("mr %0,%3"							\
 366 | 	     : "=r" (__xx.__i.__h),					\
 367 | 	       "=r" (__xx.__i.__l)					\
 368 | 	     : "%1" (m0),						\
 369 | 	       "r" (m1));						\
 370 |     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
 371 |   } while (0)
 372 | #define sdiv_qrnnd(q, r, n1, n0, d) \
 373 |   do {									\
 374 |     union {DItype __ll;							\
 375 | 	   struct {USItype __h, __l;} __i;				\
 376 | 	  } __xx;							\
 377 |     __xx.__i.__h = n1; __xx.__i.__l = n0;				\
 378 |     __asm__ ("dr %0,%2"							\
 379 | 	     : "=r" (__xx.__ll)						\
 380 | 	     : "0" (__xx.__ll), "r" (d));				\
 381 |     (q) = __xx.__i.__l; (r) = __xx.__i.__h;				\
 382 |   } while (0)
 383 | #endif
 384 | 
 385 | #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
 386 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 387 |   __asm__ ("addl %5,%1 \
 388 | 	adcl %3,%0"							\
 389 | 	   : "=r" ((USItype)(sh)),					\
 390 | 	     "=&r" ((USItype)(sl))					\
 391 | 	   : "%0" ((USItype)(ah)),					\
 392 | 	     "g" ((USItype)(bh)),					\
 393 | 	     "%1" ((USItype)(al)),					\
 394 | 	     "g" ((USItype)(bl)))
 395 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 396 |   __asm__ ("subl %5,%1 \
 397 | 	sbbl %3,%0"							\
 398 | 	   : "=r" ((USItype)(sh)),					\
 399 | 	     "=&r" ((USItype)(sl))					\
 400 | 	   : "0" ((USItype)(ah)),					\
 401 | 	     "g" ((USItype)(bh)),					\
 402 | 	     "1" ((USItype)(al)),					\
 403 | 	     "g" ((USItype)(bl)))
 404 | #define umul_ppmm(w1, w0, u, v) \
 405 |   __asm__ ("mull %3"							\
 406 | 	   : "=a" ((USItype)(w0)),					\
 407 | 	     "=d" ((USItype)(w1))					\
 408 | 	   : "%0" ((USItype)(u)),					\
 409 | 	     "rm" ((USItype)(v)))
 410 | #define udiv_qrnnd(q, r, n1, n0, d) \
 411 |   __asm__ ("divl %4"							\
 412 | 	   : "=a" ((USItype)(q)),					\
 413 | 	     "=d" ((USItype)(r))					\
 414 | 	   : "0" ((USItype)(n0)),					\
 415 | 	     "1" ((USItype)(n1)),					\
 416 | 	     "rm" ((USItype)(d)))
 417 | #define count_leading_zeros(count, x) \
 418 |   do {									\
 419 |     USItype __cbtmp;							\
 420 |     __asm__ ("bsrl %1,%0"						\
 421 | 	     : "=r" (__cbtmp) : "rm" ((USItype)(x)));			\
 422 |     (count) = __cbtmp ^ 31;						\
 423 |   } while (0)
 424 | #define count_trailing_zeros(count, x) \
 425 |   __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
 426 | #define UMUL_TIME 40
 427 | #define UDIV_TIME 40
 428 | #endif /* 80x86 */
 429 | 
 430 | #if defined (__i960__) && W_TYPE_SIZE == 32
 431 | #define umul_ppmm(w1, w0, u, v) \
 432 |   ({union {UDItype __ll;						\
 433 | 	   struct {USItype __l, __h;} __i;				\
 434 | 	  } __xx;							\
 435 |   __asm__ ("emul	%2,%1,%0"					\
 436 | 	   : "=d" (__xx.__ll)						\
 437 | 	   : "%dI" ((USItype)(u)),					\
 438 | 	     "dI" ((USItype)(v)));					\
 439 |   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 440 | #define __umulsidi3(u, v) \
 441 |   ({UDItype __w;							\
 442 |     __asm__ ("emul	%2,%1,%0"					\
 443 | 	     : "=d" (__w)						\
 444 | 	     : "%dI" ((USItype)(u)),					\
 445 | 	       "dI" ((USItype)(v)));					\
 446 |     __w; })  
 447 | #endif /* __i960__ */
 448 | 
 449 | #if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
 450 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 451 |   __asm__ ("add%.l %5,%1 \
 452 | 	addx%.l %3,%0"							\
 453 | 	   : "=d" ((USItype)(sh)),					\
 454 | 	     "=&d" ((USItype)(sl))					\
 455 | 	   : "%0" ((USItype)(ah)),					\
 456 | 	     "d" ((USItype)(bh)),					\
 457 | 	     "%1" ((USItype)(al)),					\
 458 | 	     "g" ((USItype)(bl)))
 459 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 460 |   __asm__ ("sub%.l %5,%1 \
 461 | 	subx%.l %3,%0"							\
 462 | 	   : "=d" ((USItype)(sh)),					\
 463 | 	     "=&d" ((USItype)(sl))					\
 464 | 	   : "0" ((USItype)(ah)),					\
 465 | 	     "d" ((USItype)(bh)),					\
 466 | 	     "1" ((USItype)(al)),					\
 467 | 	     "g" ((USItype)(bl)))
 468 | #if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
 469 | #define umul_ppmm(w1, w0, u, v) \
 470 |   __asm__ ("mulu%.l %3,%1:%0"						\
 471 | 	   : "=d" ((USItype)(w0)),					\
 472 | 	     "=d" ((USItype)(w1))					\
 473 | 	   : "%0" ((USItype)(u)),					\
 474 | 	     "dmi" ((USItype)(v)))
 475 | #define UMUL_TIME 45
 476 | #define udiv_qrnnd(q, r, n1, n0, d) \
 477 |   __asm__ ("divu%.l %4,%1:%0"						\
 478 | 	   : "=d" ((USItype)(q)),					\
 479 | 	     "=d" ((USItype)(r))					\
 480 | 	   : "0" ((USItype)(n0)),					\
 481 | 	     "1" ((USItype)(n1)),					\
 482 | 	     "dmi" ((USItype)(d)))
 483 | #define UDIV_TIME 90
 484 | #define sdiv_qrnnd(q, r, n1, n0, d) \
 485 |   __asm__ ("divs%.l %4,%1:%0"						\
 486 | 	   : "=d" ((USItype)(q)),					\
 487 | 	     "=d" ((USItype)(r))					\
 488 | 	   : "0" ((USItype)(n0)),					\
 489 | 	     "1" ((USItype)(n1)),					\
 490 | 	     "dmi" ((USItype)(d)))
 491 | #define count_leading_zeros(count, x) \
 492 |   __asm__ ("bfffo %1{%b2:%b2},%0"					\
 493 | 	   : "=d" ((USItype)(count))					\
 494 | 	   : "od" ((USItype)(x)), "n" (0))
 495 | #else /* not mc68020 */
 496 | #define umul_ppmmxx(xh, xl, a, b) \
 497 |   do { USItype __umul_tmp1, __umul_tmp2;				\
 498 | 	__asm__ ("| Inlined umul_ppmm \
 499 | 	move%.l	%5,%3 \
 500 | 	move%.l	%2,%0 \
 501 | 	move%.w	%3,%1 \
 502 | 	swap	%3 \
 503 | 	swap	%0 \
 504 | 	mulu	%2,%1 \
 505 | 	mulu	%3,%0 \
 506 | 	mulu	%2,%3 \
 507 | 	swap	%2 \
 508 | 	mulu	%5,%2 \
 509 | 	add%.l	%3,%2 \
 510 | 	jcc	1f \
 511 | 	add%.l	#0x10000,%0 \
 512 | 1:	move%.l	%2,%3 \
 513 | 	clr%.w	%2 \
 514 | 	swap	%2 \
 515 | 	swap	%3 \
 516 | 	clr%.w	%3 \
 517 | 	add%.l	%3,%1 \
 518 | 	addx%.l	%2,%0 \
 519 | 	| End inlined umul_ppmm"					\
 520 | 	      : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)),		\
 521 | 	        "=d" (__umul_tmp1), "=&d" (__umul_tmp2)			\
 522 | 	      : "%2" ((USItype)(a)), "d" ((USItype)(b)));		\
 523 |   } while (0)
 524 | #define UMUL_TIME 100
 525 | #define UDIV_TIME 400
 526 | #endif /* not mc68020 */
 527 | #endif /* mc68000 */
 528 | 
 529 | #if defined (__m88000__) && W_TYPE_SIZE == 32
 530 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 531 |   __asm__ ("addu.co %1,%r4,%r5 \
 532 | 	addu.ci %0,%r2,%r3"						\
 533 | 	   : "=r" ((USItype)(sh)),					\
 534 | 	     "=&r" ((USItype)(sl))					\
 535 | 	   : "%rJ" ((USItype)(ah)),					\
 536 | 	     "rJ" ((USItype)(bh)),					\
 537 | 	     "%rJ" ((USItype)(al)),					\
 538 | 	     "rJ" ((USItype)(bl)))
 539 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 540 |   __asm__ ("subu.co %1,%r4,%r5 \
 541 | 	subu.ci %0,%r2,%r3"						\
 542 | 	   : "=r" ((USItype)(sh)),					\
 543 | 	     "=&r" ((USItype)(sl))					\
 544 | 	   : "rJ" ((USItype)(ah)),					\
 545 | 	     "rJ" ((USItype)(bh)),					\
 546 | 	     "rJ" ((USItype)(al)),					\
 547 | 	     "rJ" ((USItype)(bl)))
 548 | #define count_leading_zeros(count, x) \
 549 |   do {									\
 550 |     USItype __cbtmp;							\
 551 |     __asm__ ("ff1 %0,%1"						\
 552 | 	     : "=r" (__cbtmp)						\
 553 | 	     : "r" ((USItype)(x)));					\
 554 |     (count) = __cbtmp ^ 31;						\
 555 |   } while (0)
 556 | #if defined (__mc88110__)
 557 | #define umul_ppmm(wh, wl, u, v) \
 558 |   do {									\
 559 |     union {UDItype __ll;						\
 560 | 	   struct {USItype __h, __l;} __i;				\
 561 | 	  } __xx;							\
 562 |     __asm__ ("mulu.d	%0,%1,%2"					\
 563 | 	     : "=r" (__xx.__ll)						\
 564 | 	     : "r" ((USItype)(u)),					\
 565 | 	       "r" ((USItype)(v)));					\
 566 |     (wh) = __xx.__i.__h;						\
 567 |     (wl) = __xx.__i.__l;						\
 568 |   } while (0)
 569 | #define udiv_qrnnd(q, r, n1, n0, d) \
 570 |   ({union {UDItype __ll;						\
 571 | 	   struct {USItype __h, __l;} __i;				\
 572 | 	  } __xx;							\
 573 |   USItype __q;								\
 574 |   __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
 575 |   __asm__ ("divu.d %0,%1,%2"						\
 576 | 	   : "=r" (__q)							\
 577 | 	   : "r" (__xx.__ll),						\
 578 | 	     "r" ((USItype)(d)));					\
 579 |   (r) = (n0) - __q * (d); (q) = __q; })
 580 | #define UMUL_TIME 5
 581 | #define UDIV_TIME 25
 582 | #else
 583 | #define UMUL_TIME 17
 584 | #define UDIV_TIME 150
 585 | #endif /* __mc88110__ */
 586 | #endif /* __m88000__ */
 587 | 
 588 | #if defined (__mips__) && W_TYPE_SIZE == 32
 589 | #define umul_ppmm(w1, w0, u, v) \
 590 |   __asm__ ("multu %2,%3 \
 591 | 	mflo %0 \
 592 | 	mfhi %1"							\
 593 | 	   : "=d" ((USItype)(w0)),					\
 594 | 	     "=d" ((USItype)(w1))					\
 595 | 	   : "d" ((USItype)(u)),					\
 596 | 	     "d" ((USItype)(v)))
 597 | #define UMUL_TIME 10
 598 | #define UDIV_TIME 100
 599 | #endif /* __mips__ */
 600 | 
 601 | #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
 602 | #define umul_ppmm(w1, w0, u, v) \
 603 |   __asm__ ("dmultu %2,%3 \
 604 | 	mflo %0 \
 605 | 	mfhi %1"							\
 606 | 	   : "=d" ((UDItype)(w0)),					\
 607 | 	     "=d" ((UDItype)(w1))					\
 608 | 	   : "d" ((UDItype)(u)),					\
 609 | 	     "d" ((UDItype)(v)))
 610 | #define UMUL_TIME 10
 611 | #define UDIV_TIME 100
 612 | #endif /* __mips__ */
 613 | 
 614 | #if defined (__ns32000__) && W_TYPE_SIZE == 32
 615 | #define umul_ppmm(w1, w0, u, v) \
 616 |   ({union {UDItype __ll;						\
 617 | 	   struct {USItype __l, __h;} __i;				\
 618 | 	  } __xx;							\
 619 |   __asm__ ("meid %2,%0"							\
 620 | 	   : "=g" (__xx.__ll)						\
 621 | 	   : "%0" ((USItype)(u)),					\
 622 | 	     "g" ((USItype)(v)));					\
 623 |   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 624 | #define __umulsidi3(u, v) \
 625 |   ({UDItype __w;							\
 626 |     __asm__ ("meid %2,%0"						\
 627 | 	     : "=g" (__w)						\
 628 | 	     : "%0" ((USItype)(u)),					\
 629 | 	       "g" ((USItype)(v)));					\
 630 |     __w; })
 631 | #define udiv_qrnnd(q, r, n1, n0, d) \
 632 |   ({union {UDItype __ll;						\
 633 | 	   struct {USItype __l, __h;} __i;				\
 634 | 	  } __xx;							\
 635 |   __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
 636 |   __asm__ ("deid %2,%0"							\
 637 | 	   : "=g" (__xx.__ll)						\
 638 | 	   : "0" (__xx.__ll),						\
 639 | 	     "g" ((USItype)(d)));					\
 640 |   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
 641 | #define count_trailing_zeros(count,x) \
 642 |   do { \
 643 |     __asm__ ("ffsd	%2,%0"						\
 644 | 	     : "=r" ((USItype) (count))					\
 645 | 	     : "0" ((USItype) 0),					\
 646 | 	       "r" ((USItype) (x)));					\
 647 |   } while (0)
 648 | #endif /* __ns32000__ */
 649 | 
 650 | #if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32
 651 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 652 |   do {									\
 653 |     if (__builtin_constant_p (bh) && (bh) == 0)				\
 654 |       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"		\
 655 | 	     : "=r" ((USItype)(sh)),					\
 656 | 	       "=&r" ((USItype)(sl))					\
 657 | 	     : "%r" ((USItype)(ah)),					\
 658 | 	       "%r" ((USItype)(al)),					\
 659 | 	       "rI" ((USItype)(bl)));					\
 660 |     else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)		\
 661 |       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"		\
 662 | 	     : "=r" ((USItype)(sh)),					\
 663 | 	       "=&r" ((USItype)(sl))					\
 664 | 	     : "%r" ((USItype)(ah)),					\
 665 | 	       "%r" ((USItype)(al)),					\
 666 | 	       "rI" ((USItype)(bl)));					\
 667 |     else								\
 668 |       __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"		\
 669 | 	     : "=r" ((USItype)(sh)),					\
 670 | 	       "=&r" ((USItype)(sl))					\
 671 | 	     : "%r" ((USItype)(ah)),					\
 672 | 	       "r" ((USItype)(bh)),					\
 673 | 	       "%r" ((USItype)(al)),					\
 674 | 	       "rI" ((USItype)(bl)));					\
 675 |   } while (0)
 676 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 677 |   do {									\
 678 |     if (__builtin_constant_p (ah) && (ah) == 0)				\
 679 |       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"	\
 680 | 	       : "=r" ((USItype)(sh)),					\
 681 | 		 "=&r" ((USItype)(sl))					\
 682 | 	       : "r" ((USItype)(bh)),					\
 683 | 		 "rI" ((USItype)(al)),					\
 684 | 		 "r" ((USItype)(bl)));					\
 685 |     else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0)		\
 686 |       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"	\
 687 | 	       : "=r" ((USItype)(sh)),					\
 688 | 		 "=&r" ((USItype)(sl))					\
 689 | 	       : "r" ((USItype)(bh)),					\
 690 | 		 "rI" ((USItype)(al)),					\
 691 | 		 "r" ((USItype)(bl)));					\
 692 |     else if (__builtin_constant_p (bh) && (bh) == 0)			\
 693 |       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"		\
 694 | 	       : "=r" ((USItype)(sh)),					\
 695 | 		 "=&r" ((USItype)(sl))					\
 696 | 	       : "r" ((USItype)(ah)),					\
 697 | 		 "rI" ((USItype)(al)),					\
 698 | 		 "r" ((USItype)(bl)));					\
 699 |     else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)		\
 700 |       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"		\
 701 | 	       : "=r" ((USItype)(sh)),					\
 702 | 		 "=&r" ((USItype)(sl))					\
 703 | 	       : "r" ((USItype)(ah)),					\
 704 | 		 "rI" ((USItype)(al)),					\
 705 | 		 "r" ((USItype)(bl)));					\
 706 |     else								\
 707 |       __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"	\
 708 | 	       : "=r" ((USItype)(sh)),					\
 709 | 		 "=&r" ((USItype)(sl))					\
 710 | 	       : "r" ((USItype)(ah)),					\
 711 | 		 "r" ((USItype)(bh)),					\
 712 | 		 "rI" ((USItype)(al)),					\
 713 | 		 "r" ((USItype)(bl)));					\
 714 |   } while (0)
 715 | #define count_leading_zeros(count, x) \
 716 |   __asm__ ("{cntlz|cntlzw} %0,%1"					\
 717 | 	   : "=r" ((USItype)(count))					\
 718 | 	   : "r" ((USItype)(x)))
 719 | #if defined (_ARCH_PPC)
 720 | #define umul_ppmm(ph, pl, m0, m1) \
 721 |   do {									\
 722 |     USItype __m0 = (m0), __m1 = (m1);					\
 723 |     __asm__ ("mulhwu %0,%1,%2"						\
 724 | 	     : "=r" ((USItype) ph)					\
 725 | 	     : "%r" (__m0),						\
 726 | 	       "r" (__m1));						\
 727 |     (pl) = __m0 * __m1;							\
 728 |   } while (0)
 729 | #define UMUL_TIME 15
 730 | #define smul_ppmm(ph, pl, m0, m1) \
 731 |   do {									\
 732 |     SItype __m0 = (m0), __m1 = (m1);					\
 733 |     __asm__ ("mulhw %0,%1,%2"						\
 734 | 	     : "=r" ((SItype) ph)					\
 735 | 	     : "%r" (__m0),						\
 736 | 	       "r" (__m1));						\
 737 |     (pl) = __m0 * __m1;							\
 738 |   } while (0)
 739 | #define SMUL_TIME 14
 740 | #define UDIV_TIME 120
 741 | #else
 742 | #define umul_ppmm(xh, xl, m0, m1) \
 743 |   do {									\
 744 |     USItype __m0 = (m0), __m1 = (m1);					\
 745 |     __asm__ ("mul %0,%2,%3"						\
 746 | 	     : "=r" ((USItype)(xh)),					\
 747 | 	       "=q" ((USItype)(xl))					\
 748 | 	     : "r" (__m0),						\
 749 | 	       "r" (__m1));						\
 750 |     (xh) += ((((SItype) __m0 >> 31) & __m1)				\
 751 | 	     + (((SItype) __m1 >> 31) & __m0));				\
 752 |   } while (0)
 753 | #define UMUL_TIME 8
 754 | #define smul_ppmm(xh, xl, m0, m1) \
 755 |   __asm__ ("mul %0,%2,%3"						\
 756 | 	   : "=r" ((SItype)(xh)),					\
 757 | 	     "=q" ((SItype)(xl))					\
 758 | 	   : "r" (m0),							\
 759 | 	     "r" (m1))
 760 | #define SMUL_TIME 4
 761 | #define sdiv_qrnnd(q, r, nh, nl, d) \
 762 |   __asm__ ("div %0,%2,%4"						\
 763 | 	   : "=r" ((SItype)(q)), "=q" ((SItype)(r))			\
 764 | 	   : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
 765 | #define UDIV_TIME 100
 766 | #endif
 767 | #endif /* Power architecture variants.  */
 768 | 
 769 | #if defined (__pyr__) && W_TYPE_SIZE == 32
 770 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 771 |   __asm__ ("addw	%5,%1 \
 772 | 	addwc	%3,%0"							\
 773 | 	   : "=r" ((USItype)(sh)),					\
 774 | 	     "=&r" ((USItype)(sl))					\
 775 | 	   : "%0" ((USItype)(ah)),					\
 776 | 	     "g" ((USItype)(bh)),					\
 777 | 	     "%1" ((USItype)(al)),					\
 778 | 	     "g" ((USItype)(bl)))
 779 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 780 |   __asm__ ("subw	%5,%1 \
 781 | 	subwb	%3,%0"							\
 782 | 	   : "=r" ((USItype)(sh)),					\
 783 | 	     "=&r" ((USItype)(sl))					\
 784 | 	   : "0" ((USItype)(ah)),					\
 785 | 	     "g" ((USItype)(bh)),					\
 786 | 	     "1" ((USItype)(al)),					\
 787 | 	     "g" ((USItype)(bl)))
 788 | /* This insn doesn't work on ancient pyramids.  */
 789 | #define umul_ppmm(w1, w0, u, v) \
 790 |   ({union {UDItype __ll;						\
 791 | 	   struct {USItype __h, __l;} __i;				\
 792 | 	  } __xx;							\
 793 |   __xx.__i.__l = u;							\
 794 |   __asm__ ("uemul %3,%0"						\
 795 | 	   : "=r" (__xx.__i.__h),					\
 796 | 	     "=r" (__xx.__i.__l)					\
 797 | 	   : "1" (__xx.__i.__l),					\
 798 | 	     "g" ((USItype)(v)));					\
 799 |   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
 800 | #endif /* __pyr__ */
 801 | 
 802 | #if defined (__ibm032__) /* RT/ROMP */  && W_TYPE_SIZE == 32
 803 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 804 |   __asm__ ("a %1,%5 \
 805 | 	ae %0,%3"							\
 806 | 	   : "=r" ((USItype)(sh)),					\
 807 | 	     "=&r" ((USItype)(sl))					\
 808 | 	   : "%0" ((USItype)(ah)),					\
 809 | 	     "r" ((USItype)(bh)),					\
 810 | 	     "%1" ((USItype)(al)),					\
 811 | 	     "r" ((USItype)(bl)))
 812 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 813 |   __asm__ ("s %1,%5 \
 814 | 	se %0,%3"							\
 815 | 	   : "=r" ((USItype)(sh)),					\
 816 | 	     "=&r" ((USItype)(sl))					\
 817 | 	   : "0" ((USItype)(ah)),					\
 818 | 	     "r" ((USItype)(bh)),					\
 819 | 	     "1" ((USItype)(al)),					\
 820 | 	     "r" ((USItype)(bl)))
 821 | #define umul_ppmm(ph, pl, m0, m1) \
 822 |   do {									\
 823 |     USItype __m0 = (m0), __m1 = (m1);					\
 824 |     __asm__ (								\
 825 |        "s	r2,r2 \
 826 | 	mts	r10,%2 \
 827 | 	m	r2,%3 \
 828 | 	m	r2,%3 \
 829 | 	m	r2,%3 \
 830 | 	m	r2,%3 \
 831 | 	m	r2,%3 \
 832 | 	m	r2,%3 \
 833 | 	m	r2,%3 \
 834 | 	m	r2,%3 \
 835 | 	m	r2,%3 \
 836 | 	m	r2,%3 \
 837 | 	m	r2,%3 \
 838 | 	m	r2,%3 \
 839 | 	m	r2,%3 \
 840 | 	m	r2,%3 \
 841 | 	m	r2,%3 \
 842 | 	m	r2,%3 \
 843 | 	cas	%0,r2,r0 \
 844 | 	mfs	r10,%1"							\
 845 | 	     : "=r" ((USItype)(ph)),					\
 846 | 	       "=r" ((USItype)(pl))					\
 847 | 	     : "%r" (__m0),						\
 848 | 		"r" (__m1)						\
 849 | 	     : "r2");							\
 850 |     (ph) += ((((SItype) __m0 >> 31) & __m1)				\
 851 | 	     + (((SItype) __m1 >> 31) & __m0));				\
 852 |   } while (0)
 853 | #define UMUL_TIME 20
 854 | #define UDIV_TIME 200
 855 | #define count_leading_zeros(count, x) \
 856 |   do {									\
 857 |     if ((x) >= 0x10000)							\
 858 |       __asm__ ("clz	%0,%1"						\
 859 | 	       : "=r" ((USItype)(count))				\
 860 | 	       : "r" ((USItype)(x) >> 16));				\
 861 |     else								\
 862 |       {									\
 863 | 	__asm__ ("clz	%0,%1"						\
 864 | 		 : "=r" ((USItype)(count))				\
 865 | 		 : "r" ((USItype)(x)));					\
 866 | 	(count) += 16;							\
 867 |       }									\
 868 |   } while (0)
 869 | #endif
 870 | 
 871 | #if defined (__sparc__) && W_TYPE_SIZE == 32
 872 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
 873 |   __asm__ ("addcc %r4,%5,%1 \
 874 | 	addx %r2,%3,%0"							\
 875 | 	   : "=r" ((USItype)(sh)),					\
 876 | 	     "=&r" ((USItype)(sl))					\
 877 | 	   : "%rJ" ((USItype)(ah)),					\
 878 | 	     "rI" ((USItype)(bh)),					\
 879 | 	     "%rJ" ((USItype)(al)),					\
 880 | 	     "rI" ((USItype)(bl))					\
 881 | 	   __CLOBBER_CC)
 882 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
 883 |   __asm__ ("subcc %r4,%5,%1 \
 884 | 	subx %r2,%3,%0"							\
 885 | 	   : "=r" ((USItype)(sh)),					\
 886 | 	     "=&r" ((USItype)(sl))					\
 887 | 	   : "rJ" ((USItype)(ah)),					\
 888 | 	     "rI" ((USItype)(bh)),					\
 889 | 	     "rJ" ((USItype)(al)),					\
 890 | 	     "rI" ((USItype)(bl))					\
 891 | 	   __CLOBBER_CC)
 892 | #if defined (__sparc_v8__)
 893 | /* Don't match immediate range because, 1) it is not often useful,
 894 |    2) the 'I' flag thinks of the range as a 13 bit signed interval,
 895 |    while we want to match a 13 bit interval, sign extended to 32 bits,
 896 |    but INTERPRETED AS UNSIGNED.  */
 897 | #define umul_ppmm(w1, w0, u, v) \
 898 |   __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
 899 | 	   : "=r" ((USItype)(w1)),					\
 900 | 	     "=r" ((USItype)(w0))					\
 901 | 	   : "r" ((USItype)(u)),					\
 902 | 	     "r" ((USItype)(v)))
 903 | #define UMUL_TIME 5
 904 | #ifndef SUPERSPARC
 905 | #define udiv_qrnnd(q, r, n1, n0, d) \
 906 |   __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
 907 | 	   : "=&r" ((USItype)(q)),					\
 908 | 	     "=&r" ((USItype)(r))					\
 909 | 	   : "r" ((USItype)(n1)),					\
 910 | 	     "r" ((USItype)(n0)),					\
 911 | 	     "r" ((USItype)(d)))
 912 | #define UDIV_TIME 25
 913 | #endif /* SUPERSPARC */
 914 | #else /* ! __sparc_v8__ */
 915 | #if defined (__sparclite__)
 916 | /* This has hardware multiply but not divide.  It also has two additional
 917 |    instructions scan (ffs from high bit) and divscc.  */
 918 | #define umul_ppmm(w1, w0, u, v) \
 919 |   __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
 920 | 	   : "=r" ((USItype)(w1)),					\
 921 | 	     "=r" ((USItype)(w0))					\
 922 | 	   : "r" ((USItype)(u)),					\
 923 | 	     "r" ((USItype)(v)))
 924 | #define UMUL_TIME 5
 925 | #define udiv_qrnnd(q, r, n1, n0, d) \
 926 |   __asm__ ("! Inlined udiv_qrnnd \
 927 | 	wr	%%g0,%2,%%y	! Not a delayed write for sparclite \
 928 | 	tst	%%g0 \
 929 | 	divscc	%3,%4,%%g1 \
 930 | 	divscc	%%g1,%4,%%g1 \
 931 | 	divscc	%%g1,%4,%%g1 \
 932 | 	divscc	%%g1,%4,%%g1 \
 933 | 	divscc	%%g1,%4,%%g1 \
 934 | 	divscc	%%g1,%4,%%g1 \
 935 | 	divscc	%%g1,%4,%%g1 \
 936 | 	divscc	%%g1,%4,%%g1 \
 937 | 	divscc	%%g1,%4,%%g1 \
 938 | 	divscc	%%g1,%4,%%g1 \
 939 | 	divscc	%%g1,%4,%%g1 \
 940 | 	divscc	%%g1,%4,%%g1 \
 941 | 	divscc	%%g1,%4,%%g1 \
 942 | 	divscc	%%g1,%4,%%g1 \
 943 | 	divscc	%%g1,%4,%%g1 \
 944 | 	divscc	%%g1,%4,%%g1 \
 945 | 	divscc	%%g1,%4,%%g1 \
 946 | 	divscc	%%g1,%4,%%g1 \
 947 | 	divscc	%%g1,%4,%%g1 \
 948 | 	divscc	%%g1,%4,%%g1 \
 949 | 	divscc	%%g1,%4,%%g1 \
 950 | 	divscc	%%g1,%4,%%g1 \
 951 | 	divscc	%%g1,%4,%%g1 \
 952 | 	divscc	%%g1,%4,%%g1 \
 953 | 	divscc	%%g1,%4,%%g1 \
 954 | 	divscc	%%g1,%4,%%g1 \
 955 | 	divscc	%%g1,%4,%%g1 \
 956 | 	divscc	%%g1,%4,%%g1 \
 957 | 	divscc	%%g1,%4,%%g1 \
 958 | 	divscc	%%g1,%4,%%g1 \
 959 | 	divscc	%%g1,%4,%%g1 \
 960 | 	divscc	%%g1,%4,%0 \
 961 | 	rd	%%y,%1 \
 962 | 	bl,a 1f \
 963 | 	add	%1,%4,%1 \
 964 | 1:	! End of inline udiv_qrnnd"					\
 965 | 	   : "=r" ((USItype)(q)),					\
 966 | 	     "=r" ((USItype)(r))					\
 967 | 	   : "r" ((USItype)(n1)),					\
 968 | 	     "r" ((USItype)(n0)),					\
 969 | 	     "rI" ((USItype)(d))					\
 970 | 	   : "%g1" __AND_CLOBBER_CC)
 971 | #define UDIV_TIME 37
 972 | #define count_leading_zeros(count, x) \
 973 |   __asm__ ("scan %1,0,%0"						\
 974 | 	   : "=r" ((USItype)(x))					\
 975 | 	   : "r" ((USItype)(count)))
 976 | #endif /* __sparclite__ */
 977 | #endif /* __sparc_v8__ */
 978 | /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
 979 | #ifndef umul_ppmm
 980 | #define umul_ppmm(w1, w0, u, v) \
 981 |   __asm__ ("! Inlined umul_ppmm \
 982 | 	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr \
 983 | 	sra	%3,31,%%g2	! Don't move this insn \
 984 | 	and	%2,%%g2,%%g2	! Don't move this insn \
 985 | 	andcc	%%g0,0,%%g1	! Don't move this insn \
 986 | 	mulscc	%%g1,%3,%%g1 \
 987 | 	mulscc	%%g1,%3,%%g1 \
 988 | 	mulscc	%%g1,%3,%%g1 \
 989 | 	mulscc	%%g1,%3,%%g1 \
 990 | 	mulscc	%%g1,%3,%%g1 \
 991 | 	mulscc	%%g1,%3,%%g1 \
 992 | 	mulscc	%%g1,%3,%%g1 \
 993 | 	mulscc	%%g1,%3,%%g1 \
 994 | 	mulscc	%%g1,%3,%%g1 \
 995 | 	mulscc	%%g1,%3,%%g1 \
 996 | 	mulscc	%%g1,%3,%%g1 \
 997 | 	mulscc	%%g1,%3,%%g1 \
 998 | 	mulscc	%%g1,%3,%%g1 \
 999 | 	mulscc	%%g1,%3,%%g1 \
1000 | 	mulscc	%%g1,%3,%%g1 \
1001 | 	mulscc	%%g1,%3,%%g1 \
1002 | 	mulscc	%%g1,%3,%%g1 \
1003 | 	mulscc	%%g1,%3,%%g1 \
1004 | 	mulscc	%%g1,%3,%%g1 \
1005 | 	mulscc	%%g1,%3,%%g1 \
1006 | 	mulscc	%%g1,%3,%%g1 \
1007 | 	mulscc	%%g1,%3,%%g1 \
1008 | 	mulscc	%%g1,%3,%%g1 \
1009 | 	mulscc	%%g1,%3,%%g1 \
1010 | 	mulscc	%%g1,%3,%%g1 \
1011 | 	mulscc	%%g1,%3,%%g1 \
1012 | 	mulscc	%%g1,%3,%%g1 \
1013 | 	mulscc	%%g1,%3,%%g1 \
1014 | 	mulscc	%%g1,%3,%%g1 \
1015 | 	mulscc	%%g1,%3,%%g1 \
1016 | 	mulscc	%%g1,%3,%%g1 \
1017 | 	mulscc	%%g1,%3,%%g1 \
1018 | 	mulscc	%%g1,0,%%g1 \
1019 | 	add	%%g1,%%g2,%0 \
1020 | 	rd	%%y,%1"							\
1021 | 	   : "=r" ((USItype)(w1)),					\
1022 | 	     "=r" ((USItype)(w0))					\
1023 | 	   : "%rI" ((USItype)(u)),					\
1024 | 	     "r" ((USItype)(v))						\
1025 | 	   : "%g1", "%g2" __AND_CLOBBER_CC)
1026 | #define UMUL_TIME 39		/* 39 instructions */
1027 | #endif
1028 | #ifndef udiv_qrnnd
1029 | #ifndef LONGLONG_STANDALONE
1030 | #define udiv_qrnnd(q, r, n1, n0, d) \
1031 |   do { USItype __r;							\
1032 |     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
1033 |     (r) = __r;								\
1034 |   } while (0) \
1035 | extern USItype __udiv_qrnnd ();
1036 | #define UDIV_TIME 140
1037 | #endif /* LONGLONG_STANDALONE */
1038 | #endif /* udiv_qrnnd */
1039 | #endif /* __sparc__ */
1040 | 
1041 | #if defined (__vax__) && W_TYPE_SIZE == 32
1042 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1043 |   __asm__ ("addl2 %5,%1 \
1044 | 	adwc %3,%0"							\
1045 | 	   : "=g" ((USItype)(sh)),					\
1046 | 	     "=&g" ((USItype)(sl))					\
1047 | 	   : "%0" ((USItype)(ah)),					\
1048 | 	     "g" ((USItype)(bh)),					\
1049 | 	     "%1" ((USItype)(al)),					\
1050 | 	     "g" ((USItype)(bl)))
1051 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1052 |   __asm__ ("subl2 %5,%1 \
1053 | 	sbwc %3,%0"							\
1054 | 	   : "=g" ((USItype)(sh)),					\
1055 | 	     "=&g" ((USItype)(sl))					\
1056 | 	   : "0" ((USItype)(ah)),					\
1057 | 	     "g" ((USItype)(bh)),					\
1058 | 	     "1" ((USItype)(al)),					\
1059 | 	     "g" ((USItype)(bl)))
1060 | #define umul_ppmm(xh, xl, m0, m1) \
1061 |   do {									\
1062 |     union {UDItype __ll;						\
1063 | 	   struct {USItype __l, __h;} __i;				\
1064 | 	  } __xx;							\
1065 |     USItype __m0 = (m0), __m1 = (m1);					\
1066 |     __asm__ ("emul %1,%2,$0,%0"						\
1067 | 	     : "=g" (__xx.__ll)						\
1068 | 	     : "g" (__m0),						\
1069 | 	       "g" (__m1));						\
1070 |     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
1071 |     (xh) += ((((SItype) __m0 >> 31) & __m1)				\
1072 | 	     + (((SItype) __m1 >> 31) & __m0));				\
1073 |   } while (0)
1074 | #define sdiv_qrnnd(q, r, n1, n0, d) \
1075 |   do {									\
1076 |     union {DItype __ll;							\
1077 | 	   struct {SItype __l, __h;} __i;				\
1078 | 	  } __xx;							\
1079 |     __xx.__i.__h = n1; __xx.__i.__l = n0;				\
1080 |     __asm__ ("ediv %3,%2,%0,%1"						\
1081 | 	     : "=g" (q), "=g" (r)					\
1082 | 	     : "g" (__xx.ll), "g" (d));					\
1083 |   } while (0)
1084 | #endif /* __vax__ */
1085 | 
1086 | #if defined (__z8000__) && W_TYPE_SIZE == 16
1087 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1088 |   __asm__ ("add	%H1,%H5\n\tadc	%H0,%H3"				\
1089 | 	   : "=r" ((unsigned int)(sh)),					\
1090 | 	     "=&r" ((unsigned int)(sl))					\
1091 | 	   : "%0" ((unsigned int)(ah)),					\
1092 | 	     "r" ((unsigned int)(bh)),					\
1093 | 	     "%1" ((unsigned int)(al)),					\
1094 | 	     "rQR" ((unsigned int)(bl)))
1095 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1096 |   __asm__ ("sub	%H1,%H5\n\tsbc	%H0,%H3"				\
1097 | 	   : "=r" ((unsigned int)(sh)),					\
1098 | 	     "=&r" ((unsigned int)(sl))					\
1099 | 	   : "0" ((unsigned int)(ah)),					\
1100 | 	     "r" ((unsigned int)(bh)),					\
1101 | 	     "1" ((unsigned int)(al)),					\
1102 | 	     "rQR" ((unsigned int)(bl)))
1103 | #define umul_ppmm(xh, xl, m0, m1) \
1104 |   do {									\
1105 |     union {long int __ll;						\
1106 | 	   struct {unsigned int __h, __l;} __i;				\
1107 | 	  } __xx;							\
1108 |     unsigned int __m0 = (m0), __m1 = (m1);				\
1109 |     __asm__ ("mult	%S0,%H3"					\
1110 | 	     : "=r" (__xx.__i.__h),					\
1111 | 	       "=r" (__xx.__i.__l)					\
1112 | 	     : "%1" (__m0),						\
1113 | 	       "rQR" (__m1));						\
1114 |     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
1115 |     (xh) += ((((signed int) __m0 >> 15) & __m1)				\
1116 | 	     + (((signed int) __m1 >> 15) & __m0));			\
1117 |   } while (0)
1118 | #define umul_ppmm_off(xh, xl, m0, m1) \
1119 |   do {									\
1120 |     union {long int __ll;						\
1121 | 	   struct {unsigned int __h, __l;} __i;				\
1122 | 	  } __xx;							\
1123 |     __asm__ ("mult	%S0,%H3"					\
1124 | 	     : "=r" (__xx.__i.__h),					\
1125 | 	       "=r" (__xx.__i.__l)					\
1126 | 	     : "%1" (m0),						\
1127 | 	       "rQR" (m1));						\
1128 |     (xh) = __xx.__i.__h + ((((signed int) m0 >> 15) & m1)		\
1129 | 			   + (((signed int) m1 >> 15) & m0));		\
1130 |     (xl) = __xx.__i.__l;						\
1131 |   } while (0)
1132 | #endif /* __z8000__ */
1133 | 
1134 | #endif /* __GNUC__ */
1135 | 
1136 | 
1137 | #if !defined (umul_ppmm) && defined (__umulsidi3)
1138 | #define umul_ppmm(ph, pl, m0, m1) \
1139 |   {									\
1140 |     UDWtype __ll = __umulsidi3 (m0, m1);				\
1141 |     ph = (UWtype) (__ll >> W_TYPE_SIZE);				\
1142 |     pl = (UWtype) __ll;							\
1143 |   }
1144 | #endif
1145 | 
1146 | #if !defined (__umulsidi3)
1147 | #define __umulsidi3(u, v) \
1148 |   ({UWtype __hi, __lo;							\
1149 |     umul_ppmm (__hi, __lo, u, v);					\
1150 |     ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1151 | #endif
1152 | 
1153 | /* If this machine has no inline assembler, use C macros.  */
1154 | 
1155 | #if !defined (add_ssaaaa)
1156 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1157 |   do {									\
1158 |     UWtype __x;								\
1159 |     __x = (al) + (bl);							\
1160 |     (sh) = (ah) + (bh) + (__x < (al));					\
1161 |     (sl) = __x;								\
1162 |   } while (0)
1163 | #endif
1164 | 
1165 | #if !defined (sub_ddmmss)
1166 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1167 |   do {									\
1168 |     UWtype __x;								\
1169 |     __x = (al) - (bl);							\
1170 |     (sh) = (ah) - (bh) - (__x > (al));					\
1171 |     (sl) = __x;								\
1172 |   } while (0)
1173 | #endif
1174 | 
1175 | #if !defined (umul_ppmm)
1176 | #define umul_ppmm(w1, w0, u, v)						\
1177 |   do {									\
1178 |     UWtype __x0, __x1, __x2, __x3;					\
1179 |     UHWtype __ul, __vl, __uh, __vh;					\
1180 |     UWtype __u = (u), __v = (v);					\
1181 | 									\
1182 |     __ul = __ll_lowpart (__u);						\
1183 |     __uh = __ll_highpart (__u);						\
1184 |     __vl = __ll_lowpart (__v);						\
1185 |     __vh = __ll_highpart (__v);						\
1186 | 									\
1187 |     __x0 = (UWtype) __ul * __vl;					\
1188 |     __x1 = (UWtype) __ul * __vh;					\
1189 |     __x2 = (UWtype) __uh * __vl;					\
1190 |     __x3 = (UWtype) __uh * __vh;					\
1191 | 									\
1192 |     __x1 += __ll_highpart (__x0);/* this can't give carry */		\
1193 |     __x1 += __x2;		/* but this indeed can */		\
1194 |     if (__x1 < __x2)		/* did we get it? */			\
1195 |       __x3 += __ll_B;		/* yes, add it in the proper pos. */	\
1196 | 									\
1197 |     (w1) = __x3 + __ll_highpart (__x1);					\
1198 |     (w0) = (__ll_lowpart (__x1) << W_TYPE_SIZE/2) + __ll_lowpart (__x0);\
1199 |   } while (0)
1200 | #endif
1201 | 
1202 | #if !defined (umul_ppmm)
1203 | #define smul_ppmm(w1, w0, u, v)						\
1204 |   do {									\
1205 |     UWtype __w1;							\
1206 |     USItype __m0 = (u), __m1 = (v);					\
1207 |     umul_ppmm (__w1, w0, __m0, __m1);					\
1208 |     (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1)			\
1209 | 		- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0);		\
1210 |   } while (0)
1211 | #endif
1212 | 
1213 | /* Define this unconditionally, so it can be used for debugging.  */
1214 | #define __udiv_qrnnd_c(q, r, n1, n0, d) \
1215 |   do {									\
1216 |     UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;			\
1217 |     __d1 = __ll_highpart (d);						\
1218 |     __d0 = __ll_lowpart (d);						\
1219 | 									\
1220 |     __r1 = (n1) % __d1;							\
1221 |     __q1 = (n1) / __d1;							\
1222 |     __m = (UWtype) __q1 * __d0;						\
1223 |     __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
1224 |     if (__r1 < __m)							\
1225 |       {									\
1226 | 	__q1--, __r1 += (d);						\
1227 | 	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1228 | 	  if (__r1 < __m)						\
1229 | 	    __q1--, __r1 += (d);					\
1230 |       }									\
1231 |     __r1 -= __m;							\
1232 | 									\
1233 |     __r0 = __r1 % __d1;							\
1234 |     __q0 = __r1 / __d1;							\
1235 |     __m = (UWtype) __q0 * __d0;						\
1236 |     __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
1237 |     if (__r0 < __m)							\
1238 |       {									\
1239 | 	__q0--, __r0 += (d);						\
1240 | 	if (__r0 >= (d))						\
1241 | 	  if (__r0 < __m)						\
1242 | 	    __q0--, __r0 += (d);					\
1243 |       }									\
1244 |     __r0 -= __m;							\
1245 | 									\
1246 |     (q) = (UWtype) __q1 * __ll_B | __q0;				\
1247 |     (r) = __r0;								\
1248 |   } while (0)
1249 | 
1250 | /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1251 |    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
1252 | #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1253 | #define udiv_qrnnd(q, r, nh, nl, d) \
1254 |   do {									\
1255 |     UWtype __r;								\
1256 |     (q) = __udiv_w_sdiv (&__r, nh, nl, d);				\
1257 |     (r) = __r;								\
1258 |   } while (0)
1259 | #endif
1260 | 
1261 | /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
1262 | #if !defined (udiv_qrnnd)
1263 | #define UDIV_NEEDS_NORMALIZATION 1
1264 | #define udiv_qrnnd __udiv_qrnnd_c
1265 | #endif
1266 | 
1267 | #if !defined (count_leading_zeros)
1268 | extern
1269 | #ifdef __STDC__
1270 | const
1271 | #endif
1272 | unsigned char __clz_tab[];
1273 | #define count_leading_zeros(count, x) \
1274 |   do {									\
1275 |     UWtype __xr = (x);							\
1276 |     UWtype __a;								\
1277 | 									\
1278 |     if (W_TYPE_SIZE <= 32)						\
1279 |       {									\
1280 | 	__a = __xr < ((UWtype) 1 << 2*__BITS4)				\
1281 | 	  ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4)		\
1282 | 	  : (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4);\
1283 |       }									\
1284 |     else								\
1285 |       {									\
1286 | 	for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
1287 | 	  if (((__xr >> __a) & 0xff) != 0)				\
1288 | 	    break;							\
1289 |       }									\
1290 | 									\
1291 |     (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);		\
1292 |   } while (0)
1293 | /* This version gives a well-defined value for zero. */
1294 | #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1295 | #endif
1296 | 
1297 | #if !defined (count_trailing_zeros)
1298 | /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
1299 |    defined in asm, but if it is not, the C version above is good enough.  */
1300 | #define count_trailing_zeros(count, x) \
1301 |   do {									\
1302 |     UWtype __ctz_x = (x);						\
1303 |     UWtype __ctz_c;							\
1304 |     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
1305 |     (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
1306 |   } while (0)
1307 | #endif
1308 | 
1309 | #ifndef UDIV_NEEDS_NORMALIZATION
1310 | #define UDIV_NEEDS_NORMALIZATION 0
1311 | #endif
1312 | 


--------------------------------------------------------------------------------
/run_program.def:
--------------------------------------------------------------------------------
  1 | /* Superoptimizer -- execute a instruction sequence to in order to
  2 |    test it's correctness.
  3 | 
  4 |    Copyright (C) 1991, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
  5 | 
  6 |    This program is free software; you can redistribute it and/or modify it
  7 |    under the terms of the GNU General Public License as published by the
  8 |    Free Software Foundation; either version 2, or (at your option) any
  9 |    later version.
 10 | 
 11 |    This program is distributed in the hope that it will be useful, but
 12 |    WITHOUT ANY WARRANTY; without even the implied warranty of
 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14 |    General Public License for more details.
 15 | 
 16 |    You should have received a copy of the GNU General Public License along
 17 |    with this program; see the file COPYING.  If not, write to the Free
 18 |    Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
 19 | 
 20 | int
 21 | #if HAS_NULLIFICATION
 22 | run_program(insn_t *sequence, int n_insns, word *regs, int arity)
 23 | #else
 24 | run_program(insn_t *sequence, int n_insns, word *regs)
 25 | #endif
 26 | {
 27 |   int pc;
 28 |   insn_t insn;
 29 |   word v, r1, r2;
 30 |   int co, ci = -1;                      /* Combine co and ci into cy?  */
 31 | #if HAS_NULLIFICATION
 32 |   int nullify_flag = 0;
 33 |   char reg_defined[0x100];
 34 |   int i;
 35 | #endif
 36 | 
 37 |   if (n_insns == 0)
 38 |     return ci;
 39 | 
 40 | #if HAS_NULLIFICATION
 41 |   memset (reg_defined, 0, 0x100);
 42 | 
 43 |   for (i = 0; i < arity; i++)
 44 |     reg_defined[i] = 1;
 45 |   /* The immediate values should be considered `defined'.  */
 46 |   for (i = -1; i < BITS_PER_WORD; i++)
 47 |     reg_defined[0x20 + i] = 1;
 48 |   reg_defined[0x20 - 2] = 1;
 49 |   reg_defined[0x20 - 3] = 1;
 50 |   reg_defined[0x20 - 4] = 1;
 51 |   reg_defined[0x20 - 5] = 1;
 52 | #endif
 53 | 
 54 |   for (pc = 0; pc < n_insns; pc++)
 55 |     {
 56 |       insn = sequence[pc];
 57 | 
 58 | #if HAS_NULLIFICATION
 59 |       if (nullify_flag)
 60 |         {
 61 |           nullify_flag = 0;
 62 |           continue;
 63 |         }
 64 | 
 65 |       /* Check if the source operands has become defined.  */
 66 |       if (!reg_defined[insn.s1] || !reg_defined[insn.s2])
 67 |         return -2;
 68 | #endif
 69 | 
 70 |       r1 = regs[insn.s1];
 71 |       r2 = regs[insn.s2];
 72 | 
 73 |       switch (insn.opcode)
 74 |         {
 75 |         default:
 76 |           fprintf(stderr,
 77 |                   "internal error: undefined instruction generated\n");
 78 |           abort();
 79 | 
 80 |         case COPY:      PERFORM_COPY(v, co, r1, ci); break;
 81 |         case EXCHANGE:
 82 |           regs[insn.s1] = r2;
 83 |           regs[insn.s2] = r1;
 84 |           continue;
 85 | 
 86 |         case ADD:       PERFORM_ADD(v, co, r1, r2, ci); break;
 87 |         case ADD_CI:    PERFORM_ADD_CI(v, co, r1, r2, ci); break;
 88 |         case ADD_CO:    PERFORM_ADD_CO(v, co, r1, r2, ci); break;
 89 |         case ADD_CIO:   PERFORM_ADD_CIO(v, co, r1, r2, ci); break;
 90 | 
 91 |         case SUB:       PERFORM_SUB(v, co, r1, r2, ci); break;
 92 |         case SUB_CI:    PERFORM_SUB_CI(v, co, r1, r2, ci); break;
 93 |         case SUB_CO:    PERFORM_SUB_CO(v, co, r1, r2, ci); break;
 94 |         case SUB_CIO:   PERFORM_SUB_CIO(v, co, r1, r2, ci); break;
 95 | 
 96 |         case ADC_CI:    PERFORM_ADC_CI(v, co, r1, r2, ci); break;
 97 |         case ADC_CO:    PERFORM_ADC_CO(v, co, r1, r2, ci); break;
 98 |         case ADC_CIO:   PERFORM_ADC_CIO(v, co, r1, r2, ci); break;
 99 | 
100 |         case ADDCMPL:   PERFORM_ADDCMPL(v, co, r1, r2, ci); break;
101 | 
102 |         case LDA16F:    PERFORM_LDA16F(v, co, r1, r2, ci); break;
103 |         case LDA16B:    PERFORM_LDA16B(v, co, r1, r2, ci); break;
104 |         case LDAWF:     PERFORM_LDAWF(v, co, r1, r2, ci); break;
105 |         case LDAWB:     PERFORM_LDAWB(v, co, r1, r2, ci); break;
106 | 
107 |         case CMP:       PERFORM_CMP(v, co, r1, r2, ci); break;
108 |         case CMPC:      PERFORM_CMPC(v, co, r1, r2, ci); break;
109 |         case CMPPAR:    PERFORM_CMPPAR(v, co, r1, r2, ci); break;
110 | 
111 |         case AND:       PERFORM_AND(v, co, r1, r2, ci); break;
112 |         case IOR:       PERFORM_IOR(v, co, r1, r2, ci); break;
113 |         case XOR:       PERFORM_XOR(v, co, r1, r2, ci); break;
114 |         case ANDC:      PERFORM_ANDC(v, co, r1, r2, ci); break;
115 |         case IORC:      PERFORM_IORC(v, co, r1, r2, ci); break;
116 |         case EQV:       PERFORM_EQV(v, co, r1, r2, ci); break;
117 |         case NAND:      PERFORM_NAND(v, co, r1, r2, ci); break;
118 |         case NOR:       PERFORM_NOR(v, co, r1, r2, ci); break;
119 | 
120 |         case AND_RC:    PERFORM_AND_RC(v, co, r1, r2, ci); break;
121 |         case IOR_RC:    PERFORM_IOR_RC(v, co, r1, r2, ci); break;
122 |         case XOR_RC:    PERFORM_XOR_RC(v, co, r1, r2, ci); break;
123 |         case ANDC_RC:   PERFORM_ANDC_RC(v, co, r1, r2, ci); break;
124 |         case IORC_RC:   PERFORM_IORC_RC(v, co, r1, r2, ci); break;
125 |         case EQV_RC:    PERFORM_EQV_RC(v, co, r1, r2, ci); break;
126 |         case NAND_RC:   PERFORM_NAND_RC(v, co, r1, r2, ci); break;
127 |         case NOR_RC:    PERFORM_NOR_RC(v, co, r1, r2, ci); break;
128 | 
129 |         case AND_CC:    PERFORM_AND_CC(v, co, r1, r2, ci); break;
130 |         case IOR_CC:    PERFORM_IOR_CC(v, co, r1, r2, ci); break;
131 |         case XOR_CC:    PERFORM_XOR_CC(v, co, r1, r2, ci); break;
132 |         case ANDC_CC:   PERFORM_ANDC_CC(v, co, r1, r2, ci); break;
133 |         case IORC_CC:   PERFORM_IORC_CC(v, co, r1, r2, ci); break;
134 |         case EQV_CC:    PERFORM_EQV_CC(v, co, r1, r2, ci); break;
135 |         case NAND_CC:   PERFORM_NAND_CC(v, co, r1, r2, ci); break;
136 |         case NOR_CC:    PERFORM_NOR_CC(v, co, r1, r2, ci); break;
137 | 
138 |         case LSHIFTR:   PERFORM_LSHIFTR(v, co, r1, r2, ci); break;
139 |         case ASHIFTR:   PERFORM_ASHIFTR(v, co, r1, r2, ci); break;
140 |         case SHIFTL:    PERFORM_SHIFTL(v, co, r1, r2, ci); break;
141 |         case ROTATEL:   PERFORM_ROTATEL(v, co, r1, r2, ci); break;
142 |         case LSHIFTR_CO:PERFORM_LSHIFTR_CO(v, co, r1, r2, ci); break;
143 |         case ASHIFTR_CO:PERFORM_ASHIFTR_CO(v, co, r1, r2, ci); break;
144 |         case SHIFTL_CO: PERFORM_SHIFTL_CO(v, co, r1, r2, ci); break;
145 |         case ROTATEL_CO:PERFORM_ROTATEL_CO(v, co, r1, r2, ci); break;
146 |         case ROTATER_CO:PERFORM_ROTATER_CO(v, co, r1, r2, ci); break;
147 |         case ROTATEXL_CIO:PERFORM_ROTATEXL_CIO(v, co, r1, r2, ci); break;
148 |         case ROTATEXR_CIO:PERFORM_ROTATEXR_CIO(v, co, r1, r2, ci); break;
149 |         case ASHIFTR_CON:PERFORM_ASHIFTR_CON(v, co, r1, r2, ci); break;
150 | 
151 |         case EXTS1:     PERFORM_EXTS1(v, co, r1, r2, ci); break;
152 |         case EXTS2:     PERFORM_EXTS2(v, co, r1, r2, ci); break;
153 |         case EXTS8:     PERFORM_EXTS8(v, co, r1, r2, ci); break;
154 |         case EXTS16:    PERFORM_EXTS16(v, co, r1, r2, ci); break;
155 |         case EXTU1:     PERFORM_EXTU1(v, co, r1, r2, ci); break;
156 |         case EXTU2:     PERFORM_EXTU2(v, co, r1, r2, ci); break;
157 | 
158 |         case CLZ:       PERFORM_CLZ(v, co, r1, ci); break;
159 |         case CTZ:       PERFORM_CTZ(v, co, r1, ci); break;
160 |         case BITREV:    PERFORM_BITREV(v, co, r1, ci); break;
161 |         case BYTEREV:   PERFORM_BYTEREV(v, co, r1, ci); break;
162 |         case FF1:       PERFORM_FF1(v, co, r1, ci); break;
163 |         case FF0:       PERFORM_FF0(v, co, r1, ci); break;
164 |         case BSF86:     PERFORM_BSF86(v, co, r1, ci); break;
165 | 
166 |         case ABSVAL:    PERFORM_ABSVAL(v, co, r1, ci); break;
167 |         case NABSVAL:   PERFORM_NABSVAL(v, co, r1, ci); break;
168 | 
169 |         case MKMSK:     PERFORM_MKMSK(v, co, r1, ci); break;
170 | 
171 |         case ZEXT:      PERFORM_ZEXT(v, co, r1, r2, ci); break;
172 |         case SEXT:      PERFORM_SEXT(v, co, r1, r2, ci); break;
173 | 
174 |         case DOZ:       PERFORM_DOZ(v, co, r1, r2, ci); break;
175 |         case SETCY:     co = 1; break;
176 |         case CLRCY:     co = 0; break;
177 |         case COMCY:     co = ci ^ 1; break;
178 | 
179 |         case CPEQ:      PERFORM_CPEQ(v, co, r1, r2, ci); break;
180 |         case CPGE:      PERFORM_CPGE(v, co, r1, r2, ci); break;
181 |         case CPGEU:     PERFORM_CPGEU(v, co, r1, r2, ci); break;
182 |         case CPGT:      PERFORM_CPGT(v, co, r1, r2, ci); break;
183 |         case CPGTU:     PERFORM_CPGTU(v, co, r1, r2, ci); break;
184 |         case CPLE:      PERFORM_CPLE(v, co, r1, r2, ci); break;
185 |         case CPLEU:     PERFORM_CPLEU(v, co, r1, r2, ci); break;
186 |         case CPLT:      PERFORM_CPLT(v, co, r1, r2, ci); break;
187 |         case CPLTU:     PERFORM_CPLTU(v, co, r1, r2, ci); break;
188 |         case CPNEQ:     PERFORM_CPNEQ(v, co, r1, r2, ci); break;
189 | 
190 |         case CMPEQ:     PERFORM_CMPEQ(v, co, r1, r2, ci); break;
191 |         case CMPLE:     PERFORM_CMPLE(v, co, r1, r2, ci); break;
192 |         case CMPLEU:    PERFORM_CMPLEU(v, co, r1, r2, ci); break;
193 |         case CMPLT:     PERFORM_CMPLT(v, co, r1, r2, ci); break;
194 |         case CMPLTU:    PERFORM_CMPLTU(v, co, r1, r2, ci); break;
195 | 
196 |         case CYEQ:      PERFORM_CYEQ(v, co, r1, r2, ci); break;
197 |         case CYGES:     PERFORM_CYGES(v, co, r1, r2, ci); break;
198 |         case CYGEU:     PERFORM_CYGEU(v, co, r1, r2, ci); break;
199 |         case CYGTS:     PERFORM_CYGTS(v, co, r1, r2, ci); break;
200 |         case CYGTU:     PERFORM_CYGTU(v, co, r1, r2, ci); break;
201 |         case CYAND:     PERFORM_CYAND(v, co, r1, r2, ci); break;
202 | 
203 |         case MERGE16:   PERFORM_MERGE16(v, co, r1, r2, ci); break;
204 |         case DECR_CYEQ: PERFORM_DECR_CYEQ(v, co, r1, r2, ci); break;
205 | 
206 |         case CMOVEQ:
207 |           v = regs[insn.d];
208 |           PERFORM_CMOVEQ(v, co, r1, r2, ci);
209 |           break;
210 |         case CMOVNE:
211 |           v = regs[insn.d];
212 |           PERFORM_CMOVNE(v, co, r1, r2, ci);
213 |           break;
214 |         case CMOVLT:
215 |           v = regs[insn.d];
216 |           PERFORM_CMOVLT(v, co, r1, r2, ci);
217 |           break;
218 |         case CMOVGE:
219 |           v = regs[insn.d];
220 |           PERFORM_CMOVGE(v, co, r1, r2, ci);
221 |           break;
222 |         case CMOVLE:
223 |           v = regs[insn.d];
224 |           PERFORM_CMOVLE(v, co, r1, r2, ci);
225 |           break;
226 |         case CMOVGT:
227 |           v = regs[insn.d];
228 |           PERFORM_CMOVGT(v, co, r1, r2, ci);
229 |           break;
230 | 
231 |         case MUL:       PERFORM_MUL(v, co, r1, r2, ci); break;
232 |         case UMULWIDEN_HI: PERFORM_UMULWIDEN_HI(v, co, r1, r2, ci); break;
233 |         case INVDIV: PERFORM_INVDIV(v, co, r1, ci); break;
234 |         case INVMOD: PERFORM_INVMOD(v, co, r1, ci); break;
235 | 
236 | #if HAS_NULLIFICATION
237 |         case ADD_SEQ:
238 |           PERFORM_ADD_SEQ(v, co, nullify_flag, r1, r2, ci);
239 |           break;
240 |         case ADD_SNE:
241 |           PERFORM_ADD_SNE(v, co, nullify_flag, r1, r2, ci);
242 |           break;
243 |         case ADD_SLTS:
244 |           PERFORM_ADD_SLTS(v, co, nullify_flag, r1, r2, ci);
245 |           break;
246 |         case ADD_SGES:
247 |           PERFORM_ADD_SGES(v, co, nullify_flag, r1, r2, ci);
248 |           break;
249 |         case ADD_SLES:
250 |           PERFORM_ADD_SLES(v, co, nullify_flag, r1, r2, ci);
251 |           break;
252 |         case ADD_SGTS:
253 |           PERFORM_ADD_SGTS(v, co, nullify_flag, r1, r2, ci);
254 |           break;
255 |         case ADD_SLTU:
256 |           PERFORM_ADD_SLTU(v, co, nullify_flag, r1, r2, ci);
257 |           break;
258 |         case ADD_SGEU:
259 |           PERFORM_ADD_SGEU(v, co, nullify_flag, r1, r2, ci);
260 |           break;
261 |         case ADD_SLEU:
262 |           PERFORM_ADD_SLEU(v, co, nullify_flag, r1, r2, ci);
263 |           break;
264 |         case ADD_SGTU:
265 |           PERFORM_ADD_SGTU(v, co, nullify_flag, r1, r2, ci);
266 |           break;
267 |         case ADD_SOVS:
268 |           PERFORM_ADD_SOVS(v, co, nullify_flag, r1, r2, ci);
269 |           break;
270 |         case ADD_SNVS:
271 |           PERFORM_ADD_SNVS(v, co, nullify_flag, r1, r2, ci);
272 |           break;
273 |         case ADD_SODD:
274 |           PERFORM_ADD_SODD(v, co, nullify_flag, r1, r2, ci);
275 |           break;
276 |         case ADD_SEVN:
277 |           PERFORM_ADD_SEVN(v, co, nullify_flag, r1, r2, ci);
278 |           break;
279 |         case ADD_S:
280 |           PERFORM_ADD_S(v, co, nullify_flag, r1, r2, ci);
281 |           break;
282 |         case ADD_CIO_SEQ:
283 |           PERFORM_ADD_CIO_SEQ(v, co, nullify_flag, r1, r2, ci);
284 |           break;
285 |         case ADD_CIO_SNE:
286 |           PERFORM_ADD_CIO_SNE(v, co, nullify_flag, r1, r2, ci);
287 |           break;
288 |         case ADD_CIO_SLTU:
289 |           PERFORM_ADD_CIO_SLTU(v, co, nullify_flag, r1, r2, ci);
290 |           break;
291 |         case ADD_CIO_SGEU:
292 |           PERFORM_ADD_CIO_SGEU(v, co, nullify_flag, r1, r2, ci);
293 |           break;
294 |         case ADD_CIO_SLEU:
295 |           PERFORM_ADD_CIO_SLEU(v, co, nullify_flag, r1, r2, ci);
296 |           break;
297 |         case ADD_CIO_SGTU:
298 |           PERFORM_ADD_CIO_SGTU(v, co, nullify_flag, r1, r2, ci);
299 |           break;
300 |         case ADD_CIO_SODD:
301 |           PERFORM_ADD_CIO_SODD(v, co, nullify_flag, r1, r2, ci);
302 |           break;
303 |         case ADD_CIO_SEVN:
304 |           PERFORM_ADD_CIO_SEVN(v, co, nullify_flag, r1, r2, ci);
305 |           break;
306 |         case ADD_CIO_S:
307 |           PERFORM_ADD_CIO_S(v, co, nullify_flag, r1, r2, ci);
308 |           break;
309 |         case ADD_CO_SEQ:
310 |           PERFORM_ADD_CO_SEQ(v, co, nullify_flag, r1, r2, ci);
311 |           break;
312 |         case ADD_CO_SNE:
313 |           PERFORM_ADD_CO_SNE(v, co, nullify_flag, r1, r2, ci);
314 |           break;
315 |         case ADD_CO_SLTU:
316 |           PERFORM_ADD_CO_SLTU(v, co, nullify_flag, r1, r2, ci);
317 |           break;
318 |         case ADD_CO_SGEU:
319 |           PERFORM_ADD_CO_SGEU(v, co, nullify_flag, r1, r2, ci);
320 |           break;
321 |         case ADD_CO_SLEU:
322 |           PERFORM_ADD_CO_SLEU(v, co, nullify_flag, r1, r2, ci);
323 |           break;
324 |         case ADD_CO_SGTU:
325 |           PERFORM_ADD_CO_SGTU(v, co, nullify_flag, r1, r2, ci);
326 |           break;
327 |         case ADD_CO_SODD:
328 |           PERFORM_ADD_CO_SODD(v, co, nullify_flag, r1, r2, ci);
329 |           break;
330 |         case ADD_CO_SEVN:
331 |           PERFORM_ADD_CO_SEVN(v, co, nullify_flag, r1, r2, ci);
332 |           break;
333 |         case ADD_CO_S:
334 |           PERFORM_ADD_CO_S(v, co, nullify_flag, r1, r2, ci);
335 |           break;
336 |         case SUB_SEQ:
337 |           PERFORM_SUB_SEQ(v, co, nullify_flag, r1, r2, ci);
338 |           break;
339 |         case SUB_SNE:
340 |           PERFORM_SUB_SNE(v, co, nullify_flag, r1, r2, ci);
341 |           break;
342 |         case SUB_SLTS:
343 |           PERFORM_SUB_SLTS(v, co, nullify_flag, r1, r2, ci);
344 |           break;
345 |         case SUB_SGES:
346 |           PERFORM_SUB_SGES(v, co, nullify_flag, r1, r2, ci);
347 |           break;
348 |         case SUB_SLES:
349 |           PERFORM_SUB_SLES(v, co, nullify_flag, r1, r2, ci);
350 |           break;
351 |         case SUB_SGTS:
352 |           PERFORM_SUB_SGTS(v, co, nullify_flag, r1, r2, ci);
353 |           break;
354 |         case SUB_SODD:
355 |           PERFORM_SUB_SODD(v, co, nullify_flag, r1, r2, ci);
356 |           break;
357 |         case SUB_SEVN:
358 |           PERFORM_SUB_SEVN(v, co, nullify_flag, r1, r2, ci);
359 |           break;
360 |         case SUB_S:
361 |           PERFORM_SUB_S(v, co, nullify_flag, r1, r2, ci);
362 |           break;
363 |         case ADC_CIO_SEQ:
364 |           PERFORM_ADC_CIO_SEQ(v, co, nullify_flag, r1, r2, ci);
365 |           break;
366 |         case ADC_CIO_SNE:
367 |           PERFORM_ADC_CIO_SNE(v, co, nullify_flag, r1, r2, ci);
368 |           break;
369 |         case ADC_CIO_SLTU:
370 |           PERFORM_ADC_CIO_SLTU(v, co, nullify_flag, r1, r2, ci);
371 |           break;
372 |         case ADC_CIO_SGEU:
373 |           PERFORM_ADC_CIO_SGEU(v, co, nullify_flag, r1, r2, ci);
374 |           break;
375 |         case ADC_CIO_SLEU:
376 |           PERFORM_ADC_CIO_SLEU(v, co, nullify_flag, r1, r2, ci);
377 |           break;
378 |         case ADC_CIO_SGTU:
379 |           PERFORM_ADC_CIO_SGTU(v, co, nullify_flag, r1, r2, ci);
380 |           break;
381 |         case ADC_CIO_SODD:
382 |           PERFORM_ADC_CIO_SODD(v, co, nullify_flag, r1, r2, ci);
383 |           break;
384 |         case ADC_CIO_SEVN:
385 |           PERFORM_ADC_CIO_SEVN(v, co, nullify_flag, r1, r2, ci);
386 |           break;
387 |         case ADC_CIO_S:
388 |           PERFORM_ADC_CIO_S(v, co, nullify_flag, r1, r2, ci);
389 |           break;
390 |         case ADC_CO_SEQ:
391 |           PERFORM_ADC_CO_SEQ(v, co, nullify_flag, r1, r2, ci);
392 |           break;
393 |         case ADC_CO_SNE:
394 |           PERFORM_ADC_CO_SNE(v, co, nullify_flag, r1, r2, ci);
395 |           break;
396 |         case ADC_CO_SLTU:
397 |           PERFORM_ADC_CO_SLTU(v, co, nullify_flag, r1, r2, ci);
398 |           break;
399 |         case ADC_CO_SGEU:
400 |           PERFORM_ADC_CO_SGEU(v, co, nullify_flag, r1, r2, ci);
401 |           break;
402 |         case ADC_CO_SLEU:
403 |           PERFORM_ADC_CO_SLEU(v, co, nullify_flag, r1, r2, ci);
404 |           break;
405 |         case ADC_CO_SGTU:
406 |           PERFORM_ADC_CO_SGTU(v, co, nullify_flag, r1, r2, ci);
407 |           break;
408 |         case ADC_CO_SODD:
409 |           PERFORM_ADC_CO_SODD(v, co, nullify_flag, r1, r2, ci);
410 |           break;
411 |         case ADC_CO_SEVN:
412 |           PERFORM_ADC_CO_SEVN(v, co, nullify_flag, r1, r2, ci);
413 |           break;
414 |         case ADC_CO_S:
415 |           PERFORM_ADC_CO_S(v, co, nullify_flag, r1, r2, ci);
416 |           break;
417 | 
418 |         case COMCLR_SEQ:
419 |           PERFORM_COMCLR_SEQ(v, co, nullify_flag, r1, r2, ci);
420 |           break;
421 |         case COMCLR_SNE:
422 |           PERFORM_COMCLR_SNE(v, co, nullify_flag, r1, r2, ci);
423 |           break;
424 |         case COMCLR_SLTS:
425 |           PERFORM_COMCLR_SLTS(v, co, nullify_flag, r1, r2, ci);
426 |           break;
427 |         case COMCLR_SGES:
428 |           PERFORM_COMCLR_SGES(v, co, nullify_flag, r1, r2, ci);
429 |           break;
430 |         case COMCLR_SLES:
431 |           PERFORM_COMCLR_SLES(v, co, nullify_flag, r1, r2, ci);
432 |           break;
433 |         case COMCLR_SGTS:
434 |           PERFORM_COMCLR_SGTS(v, co, nullify_flag, r1, r2, ci);
435 |           break;
436 |         case COMCLR_SLTU:
437 |           PERFORM_COMCLR_SLTU(v, co, nullify_flag, r1, r2, ci);
438 |           break;
439 |         case COMCLR_SGEU:
440 |           PERFORM_COMCLR_SGEU(v, co, nullify_flag, r1, r2, ci);
441 |           break;
442 |         case COMCLR_SLEU:
443 |           PERFORM_COMCLR_SLEU(v, co, nullify_flag, r1, r2, ci);
444 |           break;
445 |         case COMCLR_SGTU:
446 |           PERFORM_COMCLR_SGTU(v, co, nullify_flag, r1, r2, ci);
447 |           break;
448 |         case COMCLR_SODD:
449 |           PERFORM_COMCLR_SODD(v, co, nullify_flag, r1, r2, ci);
450 |           break;
451 |         case COMCLR_SEVN:
452 |           PERFORM_COMCLR_SEVN(v, co, nullify_flag, r1, r2, ci);
453 |           break;
454 | 
455 |         case AND_SEQ:
456 |           PERFORM_AND_SEQ(v, co, nullify_flag, r1, r2, ci);
457 |           break;
458 |         case AND_SNE:
459 |           PERFORM_AND_SNE(v, co, nullify_flag, r1, r2, ci);
460 |           break;
461 |         case AND_SLTS:
462 |           PERFORM_AND_SLTS(v, co, nullify_flag, r1, r2, ci);
463 |           break;
464 |         case AND_SGES:
465 |           PERFORM_AND_SGES(v, co, nullify_flag, r1, r2, ci);
466 |           break;
467 |         case AND_SLES:
468 |           PERFORM_AND_SLES(v, co, nullify_flag, r1, r2, ci);
469 |           break;
470 |         case AND_SGTS:
471 |           PERFORM_AND_SGTS(v, co, nullify_flag, r1, r2, ci);
472 |           break;
473 |         case AND_SODD:
474 |           PERFORM_AND_SODD(v, co, nullify_flag, r1, r2, ci);
475 |           break;
476 |         case AND_SEVN:
477 |           PERFORM_AND_SEVN(v, co, nullify_flag, r1, r2, ci);
478 |           break;
479 |         case AND_S:
480 |           PERFORM_AND_S(v, co, nullify_flag, r1, r2, ci);
481 |           break;
482 |         case IOR_SEQ:
483 |           PERFORM_IOR_SEQ(v, co, nullify_flag, r1, r2, ci);
484 |           break;
485 |         case IOR_SNE:
486 |           PERFORM_IOR_SNE(v, co, nullify_flag, r1, r2, ci);
487 |           break;
488 |         case IOR_SLTS:
489 |           PERFORM_IOR_SLTS(v, co, nullify_flag, r1, r2, ci);
490 |           break;
491 |         case IOR_SGES:
492 |           PERFORM_IOR_SGES(v, co, nullify_flag, r1, r2, ci);
493 |           break;
494 |         case IOR_SLES:
495 |           PERFORM_IOR_SLES(v, co, nullify_flag, r1, r2, ci);
496 |           break;
497 |         case IOR_SGTS:
498 |           PERFORM_IOR_SGTS(v, co, nullify_flag, r1, r2, ci);
499 |           break;
500 |         case IOR_SODD:
501 |           PERFORM_IOR_SODD(v, co, nullify_flag, r1, r2, ci);
502 |           break;
503 |         case IOR_SEVN:
504 |           PERFORM_IOR_SEVN(v, co, nullify_flag, r1, r2, ci);
505 |           break;
506 |         case IOR_S:
507 |           PERFORM_IOR_S(v, co, nullify_flag, r1, r2, ci);
508 |           break;
509 |         case XOR_SEQ:
510 |           PERFORM_XOR_SEQ(v, co, nullify_flag, r1, r2, ci);
511 |           break;
512 |         case XOR_SNE:
513 |           PERFORM_XOR_SNE(v, co, nullify_flag, r1, r2, ci);
514 |           break;
515 |         case XOR_SLTS:
516 |           PERFORM_XOR_SLTS(v, co, nullify_flag, r1, r2, ci);
517 |           break;
518 |         case XOR_SGES:
519 |           PERFORM_XOR_SGES(v, co, nullify_flag, r1, r2, ci);
520 |           break;
521 |         case XOR_SLES:
522 |           PERFORM_XOR_SLES(v, co, nullify_flag, r1, r2, ci);
523 |           break;
524 |         case XOR_SGTS:
525 |           PERFORM_XOR_SGTS(v, co, nullify_flag, r1, r2, ci);
526 |           break;
527 |         case XOR_SODD:
528 |           PERFORM_XOR_SODD(v, co, nullify_flag, r1, r2, ci);
529 |           break;
530 |         case XOR_SEVN:
531 |           PERFORM_XOR_SEVN(v, co, nullify_flag, r1, r2, ci);
532 |           break;
533 |         case XOR_S:
534 |           PERFORM_XOR_S(v, co, nullify_flag, r1, r2, ci);
535 |           break;
536 |         case ANDC_SEQ:
537 |           PERFORM_ANDC_SEQ(v, co, nullify_flag, r1, r2, ci);
538 |           break;
539 |         case ANDC_SNE:
540 |           PERFORM_ANDC_SNE(v, co, nullify_flag, r1, r2, ci);
541 |           break;
542 |         case ANDC_SLTS:
543 |           PERFORM_ANDC_SLTS(v, co, nullify_flag, r1, r2, ci);
544 |           break;
545 |         case ANDC_SGES:
546 |           PERFORM_ANDC_SGES(v, co, nullify_flag, r1, r2, ci);
547 |           break;
548 |         case ANDC_SLES:
549 |           PERFORM_ANDC_SLES(v, co, nullify_flag, r1, r2, ci);
550 |           break;
551 |         case ANDC_SGTS:
552 |           PERFORM_ANDC_SGTS(v, co, nullify_flag, r1, r2, ci);
553 |           break;
554 |         case ANDC_SODD:
555 |           PERFORM_ANDC_SODD(v, co, nullify_flag, r1, r2, ci);
556 |           break;
557 |         case ANDC_SEVN:
558 |           PERFORM_ANDC_SEVN(v, co, nullify_flag, r1, r2, ci);
559 |           break;
560 |         case ANDC_S:
561 |           PERFORM_ANDC_S(v, co, nullify_flag, r1, r2, ci);
562 |           break;
563 |         case LSHIFTR_S:
564 |           PERFORM_LSHIFTR_S(v, co, nullify_flag, r1, r2, ci);
565 |           break;
566 |         case ASHIFTR_S:
567 |           PERFORM_ASHIFTR_S(v, co, nullify_flag, r1, r2, ci);
568 |           break;
569 |         case SHIFTL_S:
570 |           PERFORM_SHIFTL_S(v, co, nullify_flag, r1, r2, ci);
571 |           break;
572 |         case ROTATEL_S:
573 |           PERFORM_ROTATEL_S(v, co, nullify_flag, r1, r2, ci);
574 |           break;
575 |         case EXTS1_S:
576 |           PERFORM_EXTS1_S(v, co, nullify_flag, r1, r2, ci);
577 |           break;
578 |         case EXTS2_S:
579 |           PERFORM_EXTS2_S(v, co, nullify_flag, r1, r2, ci);
580 |           break;
581 |         case EXTS8_S:
582 |           PERFORM_EXTS8_S(v, co, nullify_flag, r1, r2, ci);
583 |           break;
584 |         case EXTS16_S:
585 |           PERFORM_EXTS16_S(v, co, nullify_flag, r1, r2, ci);
586 |           break;
587 |         case EXTU1_S:
588 |           PERFORM_EXTU1_S(v, co, nullify_flag, r1, r2, ci);
589 |           break;
590 |         case EXTU2_S:
591 |           PERFORM_EXTU2_S(v, co, nullify_flag, r1, r2, ci);
592 |           break;
593 |         case COPY_S:
594 |           PERFORM_COPY_S(v, co, nullify_flag, r1, ci);
595 |           break;
596 | 
597 | #endif /* HAS_NULLIFICATION */
598 | 
599 |         case ADDC_960:          PERFORM_ADDC_960(v, co, r1, r2, ci); break;
600 |         case SUBC_960:          PERFORM_SUBC_960(v, co, r1, r2, ci); break;
601 |         case SEL_NO_960:        PERFORM_SEL_NO_960(v, co, r1, r2, ci); break;
602 |         case SEL_G_960:         PERFORM_SEL_G_960(v, co, r1, r2, ci); break;
603 |         case SEL_E_960:         PERFORM_SEL_E_960(v, co, r1, r2, ci); break;
604 |         case SEL_GE_960:        PERFORM_SEL_GE_960(v, co, r1, r2, ci); break;
605 |         case SEL_L_960:         PERFORM_SEL_L_960(v, co, r1, r2, ci); break;
606 |         case SEL_NE_960:        PERFORM_SEL_NE_960(v, co, r1, r2, ci); break;
607 |         case SEL_LE_960:        PERFORM_SEL_LE_960(v, co, r1, r2, ci); break;
608 |         case SEL_O_960:         PERFORM_SEL_O_960(v, co, r1, r2, ci); break;
609 |         case CONCMPO_960:       PERFORM_CONCMPO_960(v, co, r1, r2, ci); break;
610 |         case CONCMPI_960:       PERFORM_CONCMPI_960(v, co, r1, r2, ci); break;
611 |         case CMPO_960:          PERFORM_CMPO_960(v, co, r1, r2, ci); break;
612 |         case CMPI_960:          PERFORM_CMPI_960(v, co, r1, r2, ci); break;
613 |         case SHIFTL_NT:         PERFORM_SHIFTL_NT(v, co, r1, r2, ci); break;
614 |         case LSHIFTR_NT:        PERFORM_LSHIFTR_NT(v, co, r1, r2, ci); break;
615 |         case ASHIFTR_NT:        PERFORM_ASHIFTR_NT(v, co, r1, r2, ci); break;
616 |         case ADDO_NO_960:
617 |           v = regs[insn.d];
618 |           PERFORM_ADDO_NO_960(v, co, r1, r2, ci);
619 |           break;
620 |         case ADDO_G_960:
621 |           v = regs[insn.d];
622 |           PERFORM_ADDO_G_960(v, co, r1, r2, ci);
623 |           break;
624 |         case ADDO_E_960:
625 |           v = regs[insn.d];
626 |           PERFORM_ADDO_E_960(v, co, r1, r2, ci);
627 |           break;
628 |         case ADDO_GE_960:
629 |           v = regs[insn.d];
630 |           PERFORM_ADDO_GE_960(v, co, r1, r2, ci);
631 |           break;
632 |         case ADDO_L_960:
633 |           v = regs[insn.d];
634 |           PERFORM_ADDO_L_960(v, co, r1, r2, ci);
635 |           break;
636 |         case ADDO_NE_960:
637 |           v = regs[insn.d];
638 |           PERFORM_ADDO_NE_960(v, co, r1, r2, ci);
639 |           break;
640 |         case ADDO_LE_960:
641 |           v = regs[insn.d];
642 |           PERFORM_ADDO_LE_960(v, co, r1, r2, ci);
643 |           break;
644 |         case ADDO_O_960:
645 |           v = regs[insn.d];
646 |           PERFORM_ADDO_O_960(v, co, r1, r2, ci);
647 |           break;
648 |         case SUBO_NO_960:
649 |           v = regs[insn.d];
650 |           PERFORM_SUBO_NO_960(v, co, r1, r2, ci);
651 |           break;
652 |         case SUBO_G_960:
653 |           v = regs[insn.d];
654 |           PERFORM_SUBO_G_960(v, co, r1, r2, ci);
655 |           break;
656 |         case SUBO_E_960:
657 |           v = regs[insn.d];
658 |           PERFORM_SUBO_E_960(v, co, r1, r2, ci);
659 |           break;
660 |         case SUBO_GE_960:
661 |           v = regs[insn.d];
662 |           PERFORM_SUBO_GE_960(v, co, r1, r2, ci);
663 |           break;
664 |         case SUBO_L_960:
665 |           v = regs[insn.d];
666 |           PERFORM_SUBO_L_960(v, co, r1, r2, ci);
667 |           break;
668 |         case SUBO_NE_960:
669 |           v = regs[insn.d];
670 |           PERFORM_SUBO_NE_960(v, co, r1, r2, ci);
671 |           break;
672 |         case SUBO_LE_960:
673 |           v = regs[insn.d];
674 |           PERFORM_SUBO_LE_960(v, co, r1, r2, ci);
675 |           break;
676 |         case SUBO_O_960:
677 |           v = regs[insn.d];
678 |           PERFORM_SUBO_O_960(v, co, r1, r2, ci);
679 |           break;
680 | 
681 |         case ALTERBIT:          PERFORM_ALTERBIT(v, co, r1, r2, ci); break;
682 |         case SETBIT:            PERFORM_SETBIT(v, co, r1, r2, ci); break;
683 |         case CLRBIT:            PERFORM_CLRBIT(v, co, r1, r2, ci); break;
684 |         case CHKBIT:            PERFORM_CHKBIT(v, co, r1, r2, ci); break;
685 |         case NOTBIT:            PERFORM_NOTBIT(v, co, r1, r2, ci); break;
686 | 
687 | #ifdef UDIV_WITH_SDIV
688 |         case SDIV: PERFORM_SDIV(v, co, r1, r2, ci); break;
689 | #endif
690 |         }
691 | 
692 | #if HAS_NULLIFICATION
693 |       reg_defined[insn.d] = 1;
694 | #endif
695 |       /* Store result.  */
696 |       regs[insn.d] = v;
697 |       ci = co;
698 |     }
699 | 
700 | #if HAS_NULLIFICATION
701 |   /* Check if the destination has become defined for the current arguments.  */
702 |   if (!reg_defined[insn.d])
703 |     return -2;
704 | #endif
705 | 
706 |   return ci;
707 | }
708 | 


--------------------------------------------------------------------------------
/version.h:
--------------------------------------------------------------------------------
1 | char *version_string = "2.5";
2 | 


--------------------------------------------------------------------------------