├── COPYING ├── ChangeLog ├── Makefile ├── README ├── README.xcore ├── TODO ├── goal.def ├── hashtable.c ├── insn.def ├── longlong.h ├── run_program.def ├── superopt.c ├── superopt.h ├── synth.def └── version.h /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 5 | 675 Mass Ave, Cambridge, MA 02139, USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Library General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | Appendix: How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 19yy 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License 307 | along with this program; if not, write to the Free Software 308 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) 19yy name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Library General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2014-09-18 James Pallister 2 | Added the swap AVR instruction. 3 | 4 | * superopt.c (output_assembly): Added assembly output for the above 5 | * synth.def: Added calls to the above instructions. 6 | 7 | 2014-09-10 James Pallister 8 | Added the clc and sec AVR instructions. 9 | 10 | * superopt.c (output_assembly): Added assembly output for the above 11 | * synth.def: Added calls to the above instructions. 12 | 13 | 2014-09-08 James Pallister 14 | Added the ror, rol, asr and lsr AVR instructions 15 | 16 | * superopt.c (output_assembly): Added assembly output for the above 17 | * synth.def: Added calls to the above instructions. 18 | 19 | 2014-09-08 James Pallister 20 | Made the hashtable usage conditional on DO_HASHTABLE 21 | 22 | * superopt.c: Added the DO_HASHTABLE preprocessor defined, to decide 23 | whether to use the hashtable speed up or not 24 | 25 | 2014-09-05 James Pallister 26 | Added more instructions to the AVR port: dec, sbc, clr, ser, cpc 27 | 28 | * superopt.c (output_assembly): Added assembly output for the above 29 | instrucitons. 30 | * synth.def: Synth the above instructions. 31 | 32 | 2014-09-05 James Pallister 33 | Added the compare with carry instruction definition (cmpc), and updated 34 | cmp to work with other bit widths. 35 | 36 | * insn.def: Added CMPC instruction. 37 | * run_program.def: Added a case to execute the CMPC instruction. 38 | * superopt.h: Added CMPC and modified CMP to compute the carry for all bit 39 | widths. 40 | 41 | 2014-08-29 James Pallister 42 | Small bug fix in the pruning method. 43 | 44 | * superopt.c (recurse): Use the accepted heuristic counter instead of 45 | total success count, since we are only considering the possible 46 | correctness of the one set of input values, not all. 47 | 48 | 2014-08-29 James Pallister 49 | 50 | Added a new pruning method. This method records the state of the registers 51 | and carry if no solutions are found after recursing. Before subsequent 52 | calls to synth this hash table is checked. If the current registers and 53 | carry are in the table, and the allowed cost is lower or equal to the 54 | stored value then the current sequence cannot be optimal, so skip it. 55 | 56 | * superopt.c: Include hashtable.c and add a hash_skipped global to count 57 | the amount of pruning. 58 | (recurse): Before calling synth look up the registers and carry in the 59 | table. Don't do this if allowed_cost is small, since it is quicker 60 | just to test in this case. If no more results are found, add registers 61 | and carry to the hashtable 62 | * hashtable.c: New file. Implementation of a hashtable. 63 | 64 | 65 | 2014-08-29 James Pallister 66 | 67 | * superopt.h: 8 bit word should be selected if the bits per word is 8, 68 | not just if AVR is selected. 69 | 70 | 2014-08-19 James Pallister 71 | 72 | * superopt.c: Changed the test counters and limits to 64 bit to 73 | avoid overflow. 74 | 75 | 2014-08-18 James Pallister 76 | 77 | * superopt.c: Add a -test-limit option to specify the maximum 78 | number of tests to evaluate. 79 | 80 | 2014-08-18 James Pallister 81 | 82 | * goal.def: Changed p24 to work with smaller bit widths. 83 | 84 | 2014-08-18 James Pallister 85 | 86 | * superopt.c (output_assembly: Added adc instruction for AVR. 87 | * synth.def (synth): Added adc instruction for AVR. 88 | 89 | 2014-08-18 James Pallister 90 | 91 | * superopt.c: Added a test count, for the number of full sequences 92 | that have been evaluated. 93 | 94 | 2014-08-18 James Pallister 95 | 96 | * goal.def: To keep compatability don't generate any constants 97 | larger than 32-bits. 98 | 99 | 2014-08-17 James Pallister 100 | 101 | * goal.def: Some of the goals use the 0x80000000 constant. These are 102 | changed to be 0x80 when the bit width of the target is 8-bits, 103 | for example. 104 | 105 | 2014-08-17 James Pallister 106 | 107 | * All files: Add AVR support. Support for the add, sub, inc, cmp, xor, 108 | or, and, mov instruction. 109 | 110 | 2014-08-17 James Pallister 111 | 112 | * All files: Fix white space. All tabs converted to whitespace 113 | 114 | 2010-18-07 Richard Osborne 115 | 116 | * superopt.h: Fix incorrect use of unsigned_word type in PERFORM_SEXT macro 117 | 118 | 2009-28-08 Richard Osborne 119 | 120 | * superopt.c: Fix printing of ldaw / lda16. 121 | * superopt.h: Fix error in the PERFORM macros for ldaw / lda16. 122 | 123 | 2009-07-08 Richard Osborne 124 | 125 | * All files: Add XCore support 126 | 127 | Sat Jun 3 01:28:17 1995 Torbjorn Granlund 128 | 129 | * superopt.c (random_word): Delete unused variable tot_bits. 130 | 131 | Thu Jun 1 04:05:25 1995 Torbjorn Granlund 132 | 133 | * superopt.c (init_random_word): Make state1 have char type. 134 | Use random() on alpha, since srand48 doesn't work there. 135 | 136 | Wed May 31 17:08:12 1995 Torbjorn Granlund 137 | 138 | * superopt.c (test_operands): Add -3, -2, 3, 30, 31, 32, 63, 64. 139 | (random_word): Return small numbers with high probability. 140 | 141 | Sat May 27 18:32:03 1995 Torbjorn Granlund 142 | 143 | * superopt.c (N_RANDOM_TEST_OPERANDS): Set to 25000. 144 | (random_word): Rewrite. 145 | (RANDOM): New macro, internal to random_word. 146 | 147 | * goal.def (DBL_SHIFT_LO, DBL_SHIFT_LO_TRUNC): Arity is 2. 148 | 149 | Wed May 24 07:40:49 1995 Torbjorn Granlund 150 | 151 | * superopt.c (N_RANDOM_TEST_OPERANDS): New macro. 152 | (init_test_sets): Use N_RANDOM_TEST_OPERANDS. 153 | Also, zero n_words when in declarator. 154 | (random_word): Conditionally complement x before loop. 155 | 156 | * synth.def (synth_skip): Loop over dr also for unary operations. 157 | * superopt.c (recurse): Delete inline declaration. 158 | 159 | Tue May 23 01:35:16 1995 Torbjorn Granlund 160 | 161 | * goal.def: Add *_SEL goals. 162 | 163 | Mon May 22 23:00:31 1995 Torbjorn Granlund 164 | 165 | * Makefile (*.res rules): Use "./" when running superoptimizers. 166 | Delete spurious tab after rule. 167 | (ALL_MACHINES): Put hppa last. 168 | 169 | Thu May 18 22:36:58 1995 Torbjorn Granlund 170 | 171 | * synth.def (I960 synth): Break out conditional add and subtract 172 | instructions into separate loops, and get the pruning conditions 173 | right. 174 | 175 | * synth.def (ALPHA synth): Try CMOVcc with (1) as 2:nd operand. 176 | * synth.def (ALPHA synth): Read v every time before executing 177 | CMOVcc. Also, don't do CMOVcc with any immediate 1:st operands. 178 | (I960 synth): Likewise, but for ADDO_cc_960 and SUBO_cc_960. 179 | * run_program.def: Read v every time before executing ADDO_cc_960 180 | and SUBO_cc_960. 181 | 182 | * superopt.c (ALPHA output_assembly): Fix typo in CMPLEU and CMPLTU. 183 | 184 | * synth.def (I960 synth): Pass CY_0 in insn that sets cc to 100b. 185 | 186 | Wed May 17 09:19:13 1995 Torbjorn Granlund 187 | 188 | * synth.def (I960 synth): Use CRECURSE_2OP for CHKBIT. 189 | * superopt.c (I960 output_assembly): Output two operands for CHKBIT. 190 | 191 | * superopt.c (output_assembly): Use new PERFORM_CONCMPx_960 192 | name here too. 193 | 194 | Tue May 16 00:07:12 1995 Torbjorn Granlund 195 | 196 | * superopt.c (main_synth): Print C code for goal sequence before 197 | outputtting the sequences. 198 | (all functions): Print to stdout, not stderr, unless an error occured. 199 | 200 | * superopt.h (PERFORM_CONCMPx_960): New name for 201 | PERFORM_CONCMPx_NO_960. Rewrite, they were completely wrong. 202 | * synth.def, run_program.def, insn.def: 203 | Use new name PERFORM_CONCMPx_960. 204 | 205 | * synth.def (I960 synth): Delete I960_1_1 condition on CONCMPcc. 206 | 207 | * Makefile: For i960 build, pass -DI960_1_1. 208 | 209 | * superopt.c (recurse): Change test of HPPA to HAS_NULLIFICATION 210 | in search for goal_value in values array. Also compare v to 211 | goal_value, since it is not yet stored in values array. 212 | 213 | * superopt.h, synth.def: Handle i960 1.1 instructions specifically. 214 | (POWER): Fix typo testing for POWERPC. 215 | 216 | Mon May 15 23:49:56 1995 Torbjorn Granlund 217 | 218 | * synth.def (I960 synth): Fix typos for CONCMPcc instructions. 219 | Try SELcc, CONCMPcc, ADDOcc, SUBOcc with immediate arguments. 220 | Try LSHUFTR_NT with op1 being 1. 221 | 222 | Mon May 15 19:10:36 1995 Torbjorn Granlund 223 | 224 | * synth.def (I960 synth): Pass correct prune hint for SELcc. 225 | Try SELcc with immediate 0 and 1. 226 | 227 | Mon May 15 10:28:20 1995 Torbjorn Granlund 228 | 229 | * superopt.c (init_random_word): New function. 230 | (main_synth): Call init_random_word. 231 | 232 | * Update copyright headers. 233 | 234 | * superopt.c: Use "assembly", not "assembler" consistently. 235 | (main): Default maxmax_cost to 4 (was 5). 236 | 237 | Sun May 14 12:24:44 1995 Torbjorn Granlund 238 | 239 | * insn.def: Add `<' and `=' as instruction classes. 240 | (test_sequence): Use new sequences for suppresion of destination 241 | register printing. 242 | 243 | * Fold in i960 port. All files affected. 244 | 245 | * superopt.h (PERFORM_LSHIFT*): Cast r1 to unsigned_word, 246 | not signed_word. 247 | 248 | * superopt.c (recurse): In loop to find goal value when the last insn 249 | is nullified, loop from 0, not from goal_function_arity. 250 | 251 | Sat May 13 12:00:46 1995 Torbjorn Granlund 252 | 253 | * synth.def: New file, move all synth functions here. 254 | 255 | * superopt.c: Include synth.def twice, once for generating non-leaf 256 | synth functions, once for generating leaf synth functions, with 257 | different definitions of the various RECURSE macros. 258 | (SYNTH): New macro. 259 | (recurse): Use SYNTH. 260 | (main_synth): Likewise. 261 | (recurse_last): New function, called by the leaf synth variants. 262 | 263 | Sun May 7 11:46:50 1995 Torbjorn Granlund 264 | 265 | * superopt.c (HPPA synth): Split into several smaller functions. 266 | (synth_nonskip, synth_condskip, synth_skip): New functions. 267 | 268 | Sat May 6 10:35:25 1995 Torbjorn Granlund 269 | 270 | * superopt.c (ALL synth): Split extract-of-1 and extract-of-2 into 271 | separate loops; make latter loops terminate at BITS_PER_WORD-2. 272 | (ALL synth): Delete SHIFTS and EXTRACTS macros; use run-time 273 | conditionals instead. 274 | (flag_shifts, flag_extracts): New variables. 275 | (main): Set new variables. 276 | * superopt.h (SHIFTS): Delete. 277 | 278 | Wed May 3 15:35:23 1995 Torbjorn Granlund 279 | 280 | * superopt.c (HPPA synth): Do comiclr with -1 and 1, not just 0. 281 | (output_assembler, PYR): Handle rsubw in ADC_CO case. 282 | 283 | Tue May 2 21:09:37 1995 Torbjorn Granlund 284 | 285 | * superopt.h (EXTRA_SEQUENCE_TESTS, SH): Only detect non-zero immediate 286 | values. Also allow sequences that twice (or more) demand the *same* 287 | variable to be allocated to r0. 288 | 289 | Tue May 2 10:39:22 1995 Torbjorn Granlund 290 | 291 | * superopt.c (PA_RECURSE): Don't increment N_VALUES unconditionally; 292 | make it depend in if D equals N_VALUES. 293 | 294 | Mon May 1 23:04:17 1995 Torbjorn Granlund 295 | 296 | * superopt.h (word typedefs): 297 | Use long long also when _LONGLONG is defined. 298 | 299 | Mon May 1 17:59:11 1995 Torbjorn Granlund 300 | 301 | * superopt.c (output_assembler, POWER): For SUB, use INS_SUBF 302 | when not immediate operand. 303 | (INS_SUBF): New #define. 304 | 305 | Mon May 1 09:17:25 1995 Torbjorn Granlund 306 | 307 | * superopt.c (main): Use exit instead of return consistently. 308 | (output_assembler, HPPA): Handle EXT[SU][12]_S and ROTATEL_S. 309 | 310 | Sun Apr 30 00:14:14 1995 Torbjorn Granlund 311 | 312 | * superopt.c (output_assembler, ALPHA, case ADD): Cast immediate 313 | value to int. 314 | 315 | * superopt.c (RISC synth): Try COPY of registers for ALPHA. 316 | 317 | * superopt.c (HPPA synth): Also COPY 0 (it might be nullified). 318 | (HPPA synth): Correct several typos for COPY_S variants. 319 | 320 | * superopt.h (PSTR): Define as appropriate. 321 | * superopt.c (print_operand): New function. Use PSTR. 322 | (test_sequence): Move operand printing code to print_operand. 323 | 324 | * superopt.c (RISC synth): Try COPY of immediates also for ALPHA. 325 | (output_assembler, ALPHA): Handle COPY. 326 | (RISC synth): Don't do cmpltu(r,0) or cmpleu(0,r) or cmplt(r,0). 327 | 328 | * superopt.h (TRUNC_CNT): Use % instead of & for portability. 329 | (inline): Define to empty also if DEBUG. 330 | 331 | * superopt.c (synth): Add TIMING stuff to all variants of synth. 332 | Also, make type of time_start agree with type of cputime(). 333 | 334 | Sat Apr 29 09:32:58 1995 Torbjorn Granlund 335 | 336 | * longlong.h (C umul_ppmm): Use UWtype, not USItype for temps. 337 | (udiv_qrnnd): For cases implemented with call to __udiv_qrnnd, 338 | protect with new symbol LONGLONG_STANDALONE. 339 | 340 | * goal.def (CLEAR_LSB): Fix typo. 341 | 342 | * longlong.h: Replace with version from GNU MP. 343 | * superopt.h: Set up #defines for new longlong.h. 344 | 345 | Sat Apr 29 01:43:25 1995 Torbjorn Granlund 346 | 347 | * superopt.h (FF1_CHECK): Delete. 348 | (PERFORM_FF1): Don't use FF1_CHECK. 349 | (PERFORM_CLZ): Rewrite to handle 64 bit words. 350 | (PERFORM_FFS): Use BITS_PER_WORD, not the constant 32. 351 | 352 | * superopt.c (main): When printing list of goals, terminate with \n. 353 | 354 | * superopt.c (RISC synth): Try ADD with immediate -1. 355 | (output_assembler, ALPHA): Handle ADD with negative s2. 356 | 357 | * goal.def: Delete redundant goal divide_by_minus_2e31. 358 | 359 | Fri Apr 28 11:39:51 1995 Torbjorn Granlund 360 | 361 | * superopt.c (HPPA synth): In code protected by #if EXTRACTS, 362 | loop to 30, not 31. 363 | (RISC synth): Likewise. 364 | (output_assembler, HPPA): Handle EXT[SU][12]. 365 | 366 | Thu Apr 27 10:46:24 1995 Torbjorn Granlund 367 | 368 | * run_program.def (run_program): Use memset, not bzero. 369 | 370 | * superopt.c (output_assembler, HPPA): Handle COPY_S. 371 | Rearrange code for plain COPY. Enable unconditionally nullifying 372 | shift/rotate/extract. 373 | (HPPA synth): Delete spurious duplicate code within #if SHIFTS 374 | in the conditional-nullify block. Delete spurious 0-ary and copy code 375 | in the same block. Add systematically missing _S to 376 | shift/rotate/extract instruction names in unconditional-nullify block; 377 | Correct and enable 0-ary instructions and variants of COPY_S. 378 | 379 | * superopt.h (PERFORM_ROTATEL): Check TRUNC_CNT(r2), not plain r2. 380 | (PERFORM_ROTATEL_S): Likewise. 381 | 382 | * superopt.h (PERFORM_COPY_S): Define. 383 | (PERFORM_*SHIFT*_S, PERFORM_ROTATEL_S, PERFORM_EXT*_S): Define 384 | * insn.def: Corresponding changes. 385 | * run_program.def: Corresponding changes. 386 | 387 | Tue Apr 25 18:58:26 1995 Torbjorn Granlund 388 | 389 | * Makefile (superopt): Depend on HDRS. 390 | (HDRS): Add many missing included files. 391 | (superopt-*): Depend on HDRS. 392 | (superopt.o): Delete rule. 393 | (superopt): Depend on SRCS, not OBJS. Corresponding change to rule. 394 | 395 | Sat Apr 22 18:51:59 1995 Torbjorn Granlund 396 | 397 | * Makefile (FILES): Add ChangeLog (again). 398 | 399 | * superopt.c (CISC synth): Handle all immediate counts for MC68020 400 | (if -DSHIFTS), not just 31. 401 | 402 | Wed Mar 15 09:20:46 1995 Michael Meissner 403 | 404 | * Makefile (CFLAGS): Add new macro MACHINE to override the machine 405 | desired. 406 | (superopt): New alternate name for gso. 407 | (install): New rule. 408 | (all, install-all): New rule to build superopt for all of the 409 | supported machines. 410 | 411 | * superopt.h (POWEPC): Define if _ARCH_PPC is defined. 412 | (I386): Also check __i386__. 413 | 414 | * superopt.c (random_word): Eliminate inline, since it was 415 | previously declared without it. 416 | (output_assembler): For PowerPC, use PowerPC instruction names, 417 | rather than Power. Abort if Power only instructions used. 418 | (test_sequence): Print newline between patterns if -nl. 419 | (main): Recognize -nl switch. If unknown switch, print a usage 420 | message, including all supported goal functions. 421 | 422 | Sun Nov 13 22:59:42 1994 Torbjorn Granlund (tege@tiny.cygnus.com) 423 | 424 | * superopt.c (CISC synth): Use SHIFTS macro here too, just like in 425 | RISC synth. 426 | * superopt.h (SHIFTS): Make sure it is defined to 1 or 0. 427 | 428 | Tue Nov 8 01:33:40 1994 Torbjorn Granlund (tege@tiny.cygnus.com) 429 | 430 | * superopt.c (synth): Add missing SH conditional for "subc rx,ry"... 431 | (synth): Generate "add rx,ry" and "sub rx,ry" for SH... 432 | 433 | superopt.c (synth): Fix several SH-specific typos with prune hint 434 | setting. 435 | 436 | * superopt.h (EXTRA_SEQUENCE_TESTS): New macro; define for SH. 437 | * superopt.c (test_sequence): Use EXTRA_SEQUENCE_TESTS. 438 | 439 | * superopt.c (CISC synth): Generate ext[su].[bw], dt, swap.w, xtrct, 440 | and tst rx,ry for SH. 441 | (output_assembler): Print them. 442 | superopt.h: Handle immediates 0xff and 0xffff. 443 | (init_immediates): Initialize `values' with new immediates. 444 | 445 | superopt.c (synth): Shifts with count > 1 doesn't set T on the SH. 446 | (output_assembler, SH): Add support for carry-free shifts. 447 | 448 | * All files (EXTS8, EXTS16, CYAND, DECR_CYEQ, MERGE16): New opcodes. 449 | 450 | Sat Nov 5 13:21:11 1994 Torbjorn Granlund (tege@tiny.cygnus.com) 451 | 452 | * superopt.c (synth): Use ASHIFTR_CON for POWER consistently, 453 | never use ASHIFTR. 454 | 455 | superopt.c (synth): Delete ROTATEXL_CIO with count BITS_PER_WORD-1. 456 | (synth): Try ROTATEXR_CIO with count 1. 457 | superopt.h (PERFORM_ROTATEXR_CIO): New definition. 458 | (PERFORM_ROTATEXL_CIO): Rewrite. 459 | (PERFORM_ROTATEL_CIO): Make sure we use logical shifts. 460 | (PERFORM_ROTATER_CIO): New definition. 461 | insn.def, run_program.def: Add new instructions. 462 | (output_assembler, M68000): Handle rotate right instructions. 463 | 464 | * superopt.c (random_word): Use mrand48 for __svr4__. 465 | (operand_names): Merge I386 and PYR. 466 | (output_assembler, I386, ADD): Output "decl" here... 467 | (output_assembler, I386, SUB): ...not here. 468 | 469 | (synth): Bump shift cost for I386 from 2 to 1. 470 | (synth): Merge I386 shift patterns that now became identical to 471 | other patterns. 472 | 473 | (CISC synth): Don't try ROTATEXL_CIO with count 1; ADD_CIO performs 474 | the very same operation. 475 | 476 | * insn.def (CPEQ): Use 'c' for commutative. 477 | (CPNEQ, CMPEQ): Likewise. 478 | (COMCY): This is not a binary operation; use class 'x'. 479 | 480 | * superopt.c (main): Print target information for -v. 481 | * superopt.h (TARGET_STRING): New #define. 482 | 483 | * Ported to Hitatchi SH. Most files affected. 484 | 485 | Sun Jul 17 04:34:49 1994 Torbjorn Granlund (tege@tiny.cygnus.com) 486 | 487 | * superopt.c (HPPA synth): Fix typos for shifting conditionalized on 488 | SHIFTS. 489 | 490 | * Makefile (FILES): Include ChangeLog. 491 | 492 | Thu Jun 16 19:41:10 1994 Torbjorn Granlund (tege@adder.cygnus.com) 493 | 494 | * superopt.c (output_assembler, I386, case AND_RC): 495 | Fix typo in condition for andb. 496 | (case IOR_RC): Likewise. 497 | (case XOR_RC): Likewise. 498 | (CISC synth): Try "movl $0,d" for I386. 499 | 500 | Sun Jan 30 22:35:02 1994 Torbjorn Granlund (tege@adder.cygnus.com) 501 | 502 | * superopt.c (ffs_internal): Initialize ci to silent compiler 503 | warning. 504 | * superopt.c (header): Declare random_word. 505 | 506 | Sat Oct 16 21:21:51 1993 Torbjorn Granlund (tege@adder.cygnus.com) 507 | 508 | * superopt.c (CISC synth): Fix typo, MC68020 was M68020. 509 | 510 | Mon Jul 12 20:37:12 1993 Torbjorn Granlund (tege@pde.nada.kth.se) 511 | 512 | * superopt.c (CISC synth): Try BSF86 for I386. 513 | * superopt.h, run_program.def, insn.def: Add defs for BSF86. 514 | 515 | Fri May 28 11:59:43 1993 Torbjorn Granlund (tege@pde.nada.kth.se) 516 | 517 | * superopt.c (RISC synth): Try subf on POWERPC. 518 | (output_assembler)[POWER]: Handle subf. 519 | 520 | Mon May 24 09:46:56 1993 Torbjorn Granlund (tege@pde.nada.kth.se) 521 | 522 | * All files: Conditionalize on POWER, not RS6000. 523 | * superopt.h: Add handling of POWERPC. 524 | * superopt.c: Exclude ABS, NABS, DOZ if POWERPC. 525 | 526 | Sun Feb 21 14:21:20 1993 Torbjorn Granlund (tege@pde.nada.kth.se) 527 | 528 | * superopt.h (struct insn_t): Make opcode field wider at expense of 529 | other fields. 530 | 531 | * superopt.c (HPPA synth): Make pruning less agressive after 532 | nullifying insn. 533 | * run_program.def: Special case for N_INSNS == 0. 534 | 535 | * superopt.c (ALPHA synth, CMOV*): Move pruning test inside DR loop. 536 | Add condition (DR != LAST_DEST). 537 | 538 | * superopt.c (outside of functions): Declare malloc and realloc. 539 | * Ported to HP-PA. Most files affected. 540 | 541 | Thu Feb 18 21:23:17 1993 Torbjorn Granlund (tege@pde.nada.kth.se) 542 | 543 | * superopt.h (VALUE_MIN_SIGNED): Make it work for non 32-bit 544 | computers. 545 | (VALUE_MAX_SIGNED): Likewise. 546 | (__immediate_val): Use VALUE_MIN_SIGNED and VALUE_MAX_SIGNED. 547 | 548 | Mon Feb 15 11:41:16 1993 Torbjorn Granlund (tege@cyklop.nada.kth.se) 549 | 550 | * version.h: Now 2.2. 551 | 552 | * superopt.c (operand_names): Define constants up to 63 for ALPHA. 553 | (operand_names): Remove ...0,0,0... as array filler. 554 | 555 | * superopt.h (BITS_PER_WORD): Conditionalize on ALPHA. 556 | (unsigned_word, signed_word): Define depending on BITS_PER_WORD and 557 | compiler. 558 | 559 | Sun Feb 14 20:25:05 1993 Torbjorn Granlund (tege@cyklop.nada.kth.se) 560 | 561 | * superopt.c (output_assembler): Define Alpha asm syntax. 562 | * run_program.def: Special code for CMOVxx. 563 | 564 | Sat Feb 13 01:10:06 1993 Torbjorn Granlund (tege@cyklop.nada.kth.se) 565 | 566 | * Ported to Alpha. Most files affected. 567 | 568 | Sat Jan 2 15:50:40 1993 Torbjorn Granlund (tege@sics.se) 569 | 570 | * superopt.h (PERFORM_FFS): New macro. 571 | * superopt.c (ffs_internal): New function. 572 | * goal.def (FFS): Use ffs_internal. 573 | 574 | * superopt.c (output_assembler)[I386]: Fix typo, %s -> %d, two 575 | places. 576 | 577 | Thu Dec 17 13:58:33 1992 Torbjorn Granlund (tege@sics.se) 578 | 579 | * superopt.c (output_assembler)[RS6000,AM29K]: Handle CLZ. 580 | 581 | Sat Dec 12 15:40:05 1992 Torbjorn Granlund (tege@sics.se) 582 | 583 | * version.h: Now 2.1. 584 | 585 | * Makefile (dist): Make sed command more robust. 586 | 587 | * superopt.h: Handle MC68000 and MC68020, not M68000. 588 | * superopt.h: Define M68000 #if MC68000 || MC68020. 589 | * superopt.h (SHIFT_COST): Define. For MC68000 it's depends on 590 | count. 591 | * superopt.c (CISC synth): Use SHIFT_COST for all shifting on 592 | MC68000. 593 | * superopt.c (CISC synth): Try logical operation with #1. 594 | * superopt.c (output_assembler)[M68000 AND,IOR,XOR]: Output 595 | operation with small immediates using word (w) suffix. 596 | * superopt.c (output_assembler)[I386 AND,IOR,XOR]: Likewise. 597 | 598 | * superopt.c (output_assembler)[M88000 ADC_CI]: Output subu.ci, not 599 | subu.co. 600 | 601 | * superopt.c (RISC synth)[0-ary instructions]: Major enhancements. 602 | 603 | Next 3 from Paul Eggert: 604 | * goal.def (DIVIDE_BY_MINUS_2e31): New name flo DIVIDE_BY_2e31, 605 | better describing the operation. 606 | * goal.def (DEF_GOAL for GRAY, GRAY2, DIVIDE_BY_MINUS_2e31): Avoid 607 | overflow by shifting -1 instead of 1. 608 | * superopt.h (__immediate_val): Avoid overflow by casting 1 to word. 609 | overflow.) 610 | 611 | * superopt.c (timings): New name for 'time'. (Clashed with defs in 612 | ). 613 | 614 | * superopt.c (recurse): Assign sequence[n_insn] using constructor 615 | #if __GNUC__. (Gives better code.) 616 | 617 | * superopt.c: Update comments. 618 | 619 | * superopt.c (ARITH_BITS #define): Remove. (Not used any more.) 620 | 621 | * superopt.c (cputime): #if USG, use clock() instead of getrusage. 622 | 623 | * superopt.c (init_test_sets): Remove #ifdef UDIV_WITH_SDIV code. 624 | (Obsolete.) 625 | 626 | Wed Dec 9 14:58:17 1992 Torbjorn Granlund (tege@sics.se) 627 | 628 | * superopt.c (init_test_sets): Remove unused label 'next'. 629 | 630 | * superopt.c (CISC synth, I386, PYR)[COPY 0 => v]: Cost is 1. 631 | Pass n_values for s1,s2,d operands. (Bug fix.) 632 | 633 | * superopt.c (RISC synth, SPARC)[r1 + 1 - cy]: Try this, with and 634 | without generating carry out. (Bug fix.) 635 | 636 | * superopt.h: Define __CLOBBER_CC and __AND_CLOBBER_CC. 637 | (sparc asm PERFORM macros): Use __CLOBBER_CC. (Bug fixes.) 638 | 639 | Sat Nov 28 13:50:09 1992 Torbjorn Granlund (tege@sics.se) 640 | 641 | * version.h: Now 2.0. 642 | 643 | * superopt.c (init_test_sets): Remove code inside UDIV_WITH_SDIV 644 | conditional. 645 | 646 | * superopt.c (output_assembler): Output pyramid assembler. 647 | Output MUL for all CPUs that have it. 648 | * superopt.c (CISC synth): Fix many pyramid-related errors. 649 | * superopt.c (random_word): #ifdef hpux, use mrand48. 650 | 651 | * superopt.h, superopt.c (synth), insn.def, run_program.def: 652 | UMULWIDEN_LO => MUL, PERFORM_UMULWIDEN_LO => PERFORM_MUL. 653 | * superopt.h (PERFORM_MUL): Simply use (r1 * r2), don't call 654 | umul_ppmm. 655 | * superopt.h: Define all PERFORM_* macros unconditionally. 656 | * insn.def: Remove #ifdef DM conditionals. 657 | * goal.def (UMULH): New goal. 658 | * goal.def: Remove #ifdef DM conditionals. Include some goals only 659 | with GCC. Switch off division goals due to domain problems. 660 | * run_program.def: Remove #ifdef DM conditionals. 661 | 662 | * superopt.c (synth): Try shifts by 16, #if SHIFT16. 663 | 664 | * superopt.h: Include longlong.h unconditionally. 665 | 666 | * Makefile (FILES): Add longlong.h. 667 | (dist): Rewrite to have tar file creating a directory. 668 | 669 | Tue Jul 28 15:05:09 1992 Torbjorn Granlund (tege@sics.se) 670 | 671 | * goel.def: Add new goals for signed division. 672 | 673 | * Makefile: Create superopt-VERSION.tar.Z. 674 | 675 | * version.h: New file. 676 | * superopt.c: Include "version.h". 677 | * superopt.c (main): Handle `-version' option. 678 | 679 | * superopt.c (main): Better error messages. 680 | 681 | * superopt.c (main): Move initialization of goal_function to handle 682 | empty command lines. 683 | 684 | * superopt.c: Add timing per recursion level #ifdef TIMING. 685 | 686 | Thu Jun 25 20:10:23 1992 Torbjorn Granlund (tege@sics.se) 687 | 688 | * superopt.c (test_sequence): Remove STATISTICS. 689 | (recurse): Put it here. 690 | 691 | * superopt.c (random_word): Back to random. 692 | 693 | * superopt.c (recurse): Make it static. 694 | 695 | * superopt.c (RISC synth): Don't copy 0 on sparc and 88k. 696 | 697 | Fri Jun 12 17:16:54 1992 Tom Wood (wood@gen-rtx.rtp.dg.com) 698 | 699 | * superopt.c (test_sequence): Make n_test_operands const to avoid 700 | re-computation. 701 | 702 | Thu Jun 11 23:23:26 1992 Torbjorn Granlund (tege@sics.se) 703 | 704 | * Version 1.91. 705 | 706 | * superopt.c (main): Check argc before calling atoi. 707 | 708 | Thu Jun 11 20:38:20 1992 Tom Wood (wood@gen-rtx.rtp.dg.com) 709 | 710 | * goal.def: New file. 711 | * superopt.[ch]: Use goal.def. 712 | 713 | Thu Jun 11 19:19:37 1992 Torbjorn Granlund (tege@sics.se) 714 | 715 | * Version 1.90. 716 | 717 | Thu Jun 11 10:38:20 1992 Tom Wood (wood@gen-rtx.rtp.dg.com) 718 | 719 | * insn.def: New file. 720 | * superopt.[ch]: Use insn.def. 721 | 722 | * superopt.c (output_assembler, m88k COPY): Specify use of r0 and 723 | print small constants right. 724 | 725 | * superopt.c (synth): Correct indentation. 726 | 727 | * superopt.c (main): Add -all option to run thorough all know goal 728 | functions. This can be done quickly with -max-cost 2. 729 | 730 | * superopt.h (IMMEDIATE_VAL): Evaluate sparse values properly. 731 | 732 | * superopt.h (PERFORM_ADD_CIO, PERFORM_ADC_CIO): Operand 0 is 733 | written before the inputs are read. 734 | 735 | * superopt.h (PERFORM_CMPPAR): Add m88110 bits and don't check 736 | with the native cmp instruction. 737 | 738 | Thu Jun 11 02:37:01 1992 Torbjorn Granlund (tege@sics.se) 739 | 740 | * Version 1.16. 741 | 742 | * superopt.h: Hack PERFORM_{CLZ,FF1} to be faster. 743 | * superopt.c (clz_tab): Corresponding changes. 744 | * superopt.c (ff1_tab): New table for PERFORM_FF1. 745 | 746 | * superopt.c (RISC synth): Try ADD_CI(x,x). 747 | 748 | * superopt.c (main_synth): Always pass NO_PRUNE to synth. 749 | 750 | * superopt.c (output_assembler): Generalize "cmp" output. 751 | 752 | * superopt.c (synth): Rename cy_in to ci, and cy_out to co. 753 | 754 | * superopt.h (sparc asm): Clobber "cc". 755 | * superopt.h: Include asm iff USE_ASM is defined. Off by default. 756 | 757 | Wed Jun 10 15:40:45 1992 Tom Wood (wood@gen-rtx.rtp.dg.com) 758 | 759 | * superopt.c (output_assembler): Fix typos in 88k assembler. 760 | * superopt.c (operand_names): Make the constant names agree with 761 | the new values. 762 | 763 | * run_program.def (run_program): Have this return the value of the 764 | carry flag or -1 if the flag was never set. 765 | * superopt.h: run_program now returns an int. 766 | * superopt.c (main_synth): Allow the specification of an initial 767 | sequence of instructions and provide an example sequence. 768 | 769 | * superopt.h (PERFORM_FF1, PERFORM_CMPPAR): When running native, 770 | compare the native instruction's output to the generic 771 | computation. 772 | 773 | Thu Jun 10 02:00:22 1992 Torbjorn Granlund (tege@sics.se) 774 | 775 | * Version 1.15. 776 | 777 | * Add for CLZ, CTZ, and 88k's FF0, FF1, EXT*, and CMPPAR. 778 | * superopt.[ch]: Rewrite handling of immediates to allow arbitrary 779 | shifts. 780 | 781 | Sat Jun 6 20:04:03 1992 Torbjorn Granlund (tege@sics.se) 782 | 783 | * Version 1.14. 784 | 785 | * superopt.c: 88k subu.c* and addu.c* insn were incorrectly asumed 786 | to accept immediate values. 787 | 788 | Wed Apr 1 22:03:04 1992 Torbjorn Granlund (tege@sics.se) 789 | 790 | * superopt.[ch], run_program.def: ROTATEXL_CIO, new name for 791 | ROTATEXL_CO. 792 | * superopt.c: Only use ROTATEXL_CIO when carry is defined. 793 | 794 | * superopt.c (test_sequence): Add some values to test_operands 795 | vector. 796 | * superopt.c (RISC synth): Try add of immediate 1. 797 | * superopt.[ch]: Add new goals for GS paper. 798 | 799 | Thu Mar 5 05:56:12 1992 Torbjorn Granlund (tege@sics.se) 800 | 801 | * superopt.[ch], run_program.def: Ported to pyramid. Added 802 | _CC internal insns. 803 | 804 | Fri Feb 14 23:19:11 1992 Torbjorn Granlund (tege@sics.se) 805 | 806 | * superopt.c (output_assembler M88100): Handle negative values for 807 | ADD_CIO, by outputting subu. 808 | * superopt.c (RISC synth): Fix comment add ADD_CIO (..., -1). 809 | 810 | Thu Dec 12 21:54:14 1991 Torbjorn Granlund (tege@sics.se) 811 | 812 | * superopt.c (test_sequence): Add comments. 813 | 814 | Tue Dec 10 21:37:01 1991 Torbjorn Granlund (tege@sics.se) 815 | 816 | * superopt.c (main_synth): Make nested for loops have different 817 | induction variables... 818 | 819 | Thu Dec 5 19:33:40 1991 Torbjorn Granlund (tege@sics.se) 820 | 821 | * superopt.c (output_assembler several places): Cast IMMEDIATE_VAL 822 | to signed_word when comparing to zero. 823 | 824 | Wed Nov 13 21:20:11 1991 Torbjorn Granlund (tege@sics.se) 825 | 826 | * superopt.c (main_synth): Hack to generate the initial random 827 | arguments such that the goal function take a value != 0. 828 | 829 | Mon Nov 11 11:41:50 1991 Torbjorn Granlund (tege@sics.se) 830 | 831 | * superopt.c (output_assembler RS6000 and M88000): Handle (op1 & 1). 832 | * superopt.c (RISC synth): Try (op1 & 1) on all machines. 833 | 834 | Sat Nov 2 16:24:35 1991 Torbjorn Granlund (tege@sics.se) 835 | 836 | * superopt.c (RISC synth): Try 29k CPXX insns with immediate 0. 837 | 838 | * superopt.c (test_sequence): Output small negative operands in 839 | decimal. 840 | 841 | Fri Oct 25 01:01:54 1991 Torbjorn Granlund (tege@sics.se) 842 | 843 | * superopt.c (test_sequence): Test with VALUE_MAX_SIGNED too. 844 | 845 | * superopt.c (CISC synth): Try CMP operation for VALUE_MAX_SIGNED 846 | and VALUE_MIN_SIGNED on '386. 847 | 848 | * superopt.c, superopt.h, run_program.def: Add new operation 849 | ROTATEXL_CO. Try it for CISCs. 850 | * ROTATEL new name for ROTATE. 851 | 852 | * superopt.c (output_assembler I386): Fix syntax for if statement. 853 | 854 | * superopt.h (PERFORM_*SHIFT*, PERFORM_ROTATE*): Truncate shift 855 | counts with BITS_PER_WORD. 856 | * superopt.h (TRUNC_CNT): New macro for shift count truncation. 857 | 858 | * superopt.c (output_assembler M68000 COPY): Output "moveq" for 859 | negative numbers as intended, by casting operands to signed_word. 860 | 861 | * superopt.c (RISC synth): Try SUB for two regs even on M88000. 862 | 863 | * superopt.c (RISC synth, CISC synth): Try rotate and shift 864 | instruction with count 1. 865 | * superopt.c (output_assembler): Output rotate instructions. 866 | 867 | * superopt.c, superopt.h, run_program.def: Add new operation 868 | "ASHIFTR_CON" for RS/6000 arithmetic right shifts. (The 869 | arithmetic shift insns used to be incorrectly described as not 870 | affecting carry.) 871 | 872 | Local Variables: 873 | mode: indented-text 874 | left-margin: 8 875 | fill-column: 76 876 | version-control: never 877 | End: 878 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for GNU superoptimizer 2 | 3 | MAXCOST = 3 4 | EXTRA = -shifts -extracts 5 | 6 | CC = gcc 7 | DBG = -g 8 | OPT = -O 9 | CFLAGS = $(CPU) $(OPT) $(DBG) 10 | ALL_MACHINES = superopt-sparc \ 11 | superopt-power \ 12 | superopt-powerpc \ 13 | superopt-m88000 \ 14 | superopt-am29k \ 15 | superopt-mc68000 \ 16 | superopt-mc68020 \ 17 | superopt-i386 \ 18 | superopt-i960a \ 19 | superopt-i960b \ 20 | superopt-pyr \ 21 | superopt-alpha \ 22 | superopt-sh \ 23 | superopt-hppa \ 24 | superopt-xcore \ 25 | superopt-avr 26 | 27 | OBJS = superopt.o 28 | SRCS = superopt.c 29 | HDRS = run_program.def insn.def goal.def synth.def superopt.h version.h \ 30 | longlong.h 31 | BINDIR = /usr/local/bin 32 | INSTALL = install -c 33 | FILES = README COPYING Makefile TODO ChangeLog superopt.c synth.def $(HDRS) 34 | 35 | VERSION = `sed 's,char \*version_string = "\([0-9.]*\)";,\1,' < version.h` 36 | 37 | superopt: $(SRCS) $(HDRS) 38 | $(CC) $(CFLAGS) -o superopt $(SRCS) 39 | 40 | clean: 41 | rm -f $(OBJS) superopt $(ALL_MACHINES) *.tmp 42 | 43 | install: superopt 44 | $(INSTALL) superopt $(BINDIR)/superopt 45 | 46 | all: $(ALL_MACHINES) 47 | 48 | install-all: all 49 | for x in $(ALL_MACHINES); do $(INSTALL) $$x $(BINDIR)/$$x; done 50 | 51 | run-all: sparc.res power.res powerpc.res m88000.res am29k.res mc68000.res \ 52 | mc68020.res i386.res i960a.res i960b.res pyr.res alpha.res sh.res \ 53 | hppa.res 54 | @echo "Done!" 55 | 56 | superopt-sparc: $(SRCS) $(HDRS) 57 | $(CC) $(CFLAGS) -DSPARC -o superopt-sparc $(SRCS) 58 | sparc.res: superopt-sparc 59 | ./superopt-sparc -all $(EXTRA) -max $(MAXCOST) -as >sparc.tmp 2>&1 60 | mv sparc.tmp sparc.res 61 | 62 | superopt-power: $(SRCS) $(HDRS) 63 | $(CC) $(CFLAGS) -DPOWER -o superopt-power $(SRCS) 64 | power.res: superopt-power 65 | ./superopt-power -all $(EXTRA) -max $(MAXCOST) -as >power.tmp 2>&1 66 | mv power.tmp power.res 67 | 68 | superopt-powerpc: $(SRCS) $(HDRS) 69 | $(CC) $(CFLAGS) -DPOWERPC -o superopt-powerpc $(SRCS) 70 | powerpc.res: superopt-powerpc 71 | ./superopt-powerpc -all $(EXTRA) -max $(MAXCOST) -as >powerpc.tmp 2>&1 72 | mv powerpc.tmp powerpc.res 73 | 74 | superopt-m88000: $(SRCS) $(HDRS) 75 | $(CC) $(CFLAGS) -DM88000 -o superopt-m88000 $(SRCS) 76 | m88000.res: superopt-m88000 77 | ./superopt-m88000 -all $(EXTRA) -max $(MAXCOST) -as >m88000.tmp 2>&1 78 | mv m88000.tmp m88000.res 79 | 80 | superopt-am29k: $(SRCS) $(HDRS) 81 | $(CC) $(CFLAGS) -DAM29K -o superopt-am29k $(SRCS) 82 | am29k.res: superopt-am29k 83 | ./superopt-am29k -all $(EXTRA) -max $(MAXCOST) -as >am29k.tmp 2>&1 84 | mv am29k.tmp am29k.res 85 | 86 | superopt-mc68000: $(SRCS) $(HDRS) 87 | $(CC) $(CFLAGS) -DMC68000 -o superopt-mc68000 $(SRCS) 88 | mc68000.res: superopt-mc68000 89 | ./superopt-mc68000 -all $(EXTRA) -max $(MAXCOST) -as >mc68000.tmp 2>&1 90 | mv mc68000.tmp mc68000.res 91 | 92 | superopt-mc68020: $(SRCS) $(HDRS) 93 | $(CC) $(CFLAGS) -DMC68020 -o superopt-mc68020 $(SRCS) 94 | mc68020.res: superopt-mc68020 95 | ./superopt-mc68020 -all $(EXTRA) -max $(MAXCOST) -as >mc68020.tmp 2>&1 96 | mv mc68020.tmp mc68020.res 97 | 98 | superopt-i386: $(SRCS) $(HDRS) 99 | $(CC) $(CFLAGS) -DI386 -o superopt-i386 $(SRCS) 100 | i386.res: superopt-i386 101 | ./superopt-i386 -all $(EXTRA) -max $(MAXCOST) -as >i386.tmp 2>&1 102 | mv i386.tmp i386.res 103 | 104 | superopt-i960a: $(SRCS) $(HDRS) 105 | $(CC) $(CFLAGS) -DI960 -o superopt-i960a $(SRCS) 106 | i960a.res: superopt-i960a 107 | ./superopt-i960a -all $(EXTRA) -max $(MAXCOST) -as >i960a.tmp 2>&1 108 | mv i960a.tmp i960a.res 109 | 110 | superopt-i960b: $(SRCS) $(HDRS) 111 | $(CC) $(CFLAGS) -DI960B -o superopt-i960b $(SRCS) 112 | i960b.res: superopt-i960b 113 | ./superopt-i960b -all $(EXTRA) -max $(MAXCOST) -as >i960b.tmp 2>&1 114 | mv i960b.tmp i960b.res 115 | 116 | superopt-pyr: $(SRCS) $(HDRS) 117 | $(CC) $(CFLAGS) -DPYR -o superopt-pyr $(SRCS) 118 | pyr.res: superopt-pyr 119 | ./superopt-pyr -all $(EXTRA) -max $(MAXCOST) -as >pyr.tmp 2>&1 120 | mv pyr.tmp pyr.res 121 | 122 | superopt-alpha: $(SRCS) $(HDRS) 123 | $(CC) $(CFLAGS) -DALPHA -o superopt-alpha $(SRCS) 124 | alpha.res: superopt-alpha 125 | ./superopt-alpha -all $(EXTRA) -max $(MAXCOST) -as >alpha.tmp 2>&1 126 | mv alpha.tmp alpha.res 127 | 128 | superopt-sh: $(SRCS) $(HDRS) 129 | $(CC) $(CFLAGS) -DSH -o superopt-sh $(SRCS) 130 | sh.res: superopt-sh 131 | ./superopt-sh -all $(EXTRA) -max $(MAXCOST) -as >sh.tmp 2>&1 132 | mv sh.tmp sh.res 133 | 134 | superopt-hppa: $(SRCS) $(HDRS) 135 | $(CC) $(CFLAGS) -DHPPA -o superopt-hppa $(SRCS) 136 | hppa.res: superopt-hppa 137 | ./superopt-hppa -all $(EXTRA) -max $(MAXCOST) -as >hppa.tmp 2>&1 138 | mv hppa.tmp hppa.res 139 | 140 | superopt-xcore: $(SRCS) $(HDRS) 141 | $(CC) $(CFLAGS) -DXCORE -o superopt-xcore $(SRCS) 142 | xcore.res: superopt-xcore 143 | ./superopt-xcore -all $(EXTRA) -max $(MAXCOST) -as >xcore.tmp 2>&1 144 | mv xcore.tmp xcore.res 145 | 146 | superopt-avr: $(SRCS) $(HDRS) 147 | $(CC) $(CFLAGS) -DAVR -o superopt-avr $(SRCS) 148 | avr.res: superopt-avr 149 | ./superopt-avr -all $(EXTRA) -max $(MAXCOST) -as >avr.tmp 2>&1 150 | mv avr.tmp avr.res 151 | 152 | dist: 153 | mkdir superopt-$(VERSION) 154 | ln $(FILES) superopt-$(VERSION) 155 | tar cf - superopt-$(VERSION) | gzip --best > superopt-$(VERSION).tar.gz 156 | rm -rf superopt-$(VERSION) 157 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | 2 | GNU SUPEROPTIMIZER 3 | 4 | The superoptimizer is a function sequence generator that uses an exhaustive 5 | generate-and-test approach to finding the shortest instruction sequence for 6 | a given function. You have to tell the superoptimizer which function and 7 | which CPU you want to generate code for, and how many instructions you can 8 | accept. 9 | 10 | The superoptimizer can't generate very long sequences, unless you have a 11 | very fast computer or very much spare time. The time complexity of the used 12 | algorithm is approximately 13 | 14 | 2n 15 | O(m n ) 16 | 17 | where m is the number of available instructions on the architecture and n is 18 | the shortest sequence for the goal function. The practical sequence length 19 | limit depends on the target architecture and goal function arity; In most 20 | cases it is about 5, but for a rich instruction set as the HPPA it is just 21 | 4. The longest sequence ever generated was for the MC68020 and 7 22 | instructions long. It took several weeks to generate it... 23 | 24 | The superoptimizer can't guarantee that it finds the best possible 25 | instruction sequences for all possible goal functions. For example, it 26 | doesn't even try to include immediate constants (other that -1, 0, +1, and 27 | the smallest negative and biggest positive numbers) in the sequences. 28 | 29 | Other reasons why not optimal sequences might be found is that not all 30 | instructions are included, not even in their register-only form. Also, some 31 | instructions included might not be correctly simulated. If you encounter 32 | any of these problems, please report them to the address below. 33 | 34 | WARNING! The generated sequences might be incorrect with a very small 35 | probability. Always make sure a sequence is correct before using it. So 36 | far, I have never encountered any incorrect sequences. If you find one, 37 | please let me know about it! 38 | 39 | Having said this, note that the superoptimizer practically always finds 40 | optimal and correct sequences for functions that depend on registers only. 41 | 42 | 43 | USAGE INSTRUCTIONS 44 | 45 | The superoptimizer supports these CPUs: SPARC v7, Motorola 68000, 68020, and 46 | 88000, IBM POWER and PowerPC, AMD 29000, Intel x86 and 960 1.0 and 1.1, 47 | Pyramid, DEC Alpha, HP PA-RISC, and Hitachi SH. SGI Mips is not supported, 48 | since it doesn't have instructions whose use in non-obvious. Some new 49 | instructions, like the Intel P6 and Sparc v9 conditional moves are not 50 | supported. 51 | 52 | You need an ANSI C compiler, for example GCC, to compile the superoptimizer. 53 | Type 54 | 55 | make CPU=-D superopt 56 | 57 | where is one of SPARC, MC68000, MC68020, M88000, POWER, POWERPC, 58 | AM29K, I386, I960 (for i960 1.0), I960B (for I960B 1.1), PYR, ALPHA, HPPA, 59 | or SH. The compilation might take a long time and use up a lot of memory, 60 | especially for HPPA. 61 | 62 | You can also build all superoptimizers by typing: 63 | 64 | make all 65 | 66 | This will create superopt-sparc, superopt-power, etc. 67 | 68 | There are also install targets, use `make install' to install a single 69 | superoptimizer and `make install-all' to install all of them. 70 | 71 | To run the superoptimizer, type 72 | 73 | superopt -f | -all [-assembly] [-max-cost n] 74 | [-shifts] [-extracts] [-no-carry-insns] [-extra-cost n] 75 | 76 | and wait until the found instructions sequences are printed. For example, 77 | 78 | superopt -flts0 -as 79 | 80 | will print all sequences computing the statement 81 | 82 | { r = (signed_word) v0 < 0; }. 83 | 84 | See below for some examples of possible goal functions. 85 | 86 | By default, the superoptimizer doesn't try all immediate shift counts. To 87 | enable all shift counts, pass -shifts as a command line option. To enable 88 | all bit field extracts, use -extracts. 89 | 90 | OPTIONS 91 | 92 | The `-f' option has always to be defined to tell the superoptimizer for 93 | which function it should try to to find an instruction sequence. See below 94 | for possible function names. 95 | 96 | Option names may be abbreviated. 97 | 98 | -assembly 99 | Output assembly suitable to feed the assembler instead of pseudo- 100 | code suitable for humans. 101 | 102 | -max-cost n 103 | Limit the `cost' of the instruction sequence to n. May be used to 104 | stop the search if no instruction sequence of that length or 105 | shorter is found. By default this is 4. 106 | 107 | -extra-cost n 108 | Search for sequences n more expensive than the cheapest found 109 | sequence. Default is 0 meaning that only the cheapest sequence(s) 110 | are printed. 111 | 112 | -no-carry-insns 113 | Don't use instructions that use the carry flag. This might be 114 | desirable on RISCs to simplify instruction scheduling. 115 | 116 | -shifts 117 | Include all shift counts supported by the target architecture in 118 | the search. This slows down the search considerably. 119 | 120 | -extracts 121 | Include all bit field extracts supported by the target architecture 122 | in the search. This slows down the search considerably. 123 | 124 | -f 125 | 126 | where is one of eq, ne, les, ges, lts, gts, 127 | leu, geu, ltu, gtu, eq0, ne0, les0, ges0, lts0, gts0, neq, nne, 128 | nles, nges, nlts, ngts, nleu, ngeu, nltu, ngtu, neq0, nne0, nles0, 129 | nges0, nlts0, ngts0, maxs, mins, maxu, minu, sgn, abs, nabs, gray, 130 | or gray2, etc, etc. 131 | 132 | eq, ne, les, etc, computes the C expression "a == b", "a != b", "a 133 | <= b", etc, where the operation codes ending in `s' indicates 134 | signed comparison; `u` indicates unsigned comparison. 135 | 136 | eq0,... computes "a == 0", ... 137 | 138 | The `n' before the names means that the corresponding function 139 | value is negated, e.g. nlt is the C expression "-(a < b)". 140 | 141 | maxs, mins, maxu, minu are binary (i.e. two argument) signed 142 | respectively unsigned max and min. 143 | 144 | sgn is the unary sign function; -1 for negative, 0 for zero, and +1 145 | for positive arguments. 146 | 147 | abs and nabs are absolute value and negative absolute value, 148 | respectively. 149 | 150 | For a complete list of goal function and their definitions, look in 151 | the file goal.def. You can easily add your own goal functions to 152 | that file. After having added a new function, you have to recompile 153 | the superoptimizer. 154 | 155 | 156 | READING SUPEROPTIMIZER OUTPUT 157 | 158 | The superoptimizer by default outputs sequences in high-level language like 159 | syntax. For example, this is the output for M88000/abs: 160 | 161 | 1: r1:=arith_shift_right(r0,0x1f) 162 | r2:=add_co(r1,r0) 163 | r3:=xor(r2,r1) 164 | 2: r1:=arith_shift_right(r0,0x1f) 165 | r2:=add(r1,r0) 166 | r3:=xor(r2,r1) 167 | 3: r1:=arith_shift_right(r0,0x1f) 168 | r2:=xor(r1,r0) 169 | r3:=adc_co(r2,r1) 170 | 171 | r1:=arith_shift_right(r0,0x1f) means "shift r0 right 31 steps 172 | arithmetically and put the result in r1". add_co is "add and set carry". 173 | adc_co is the subtraction instruction found on most RISCs, i.e. "add with 174 | complement and set carry". This may seem dumb, but there is an important 175 | difference in the way carry is set after an addition-with-complement and a 176 | subtraction. The suffixes "_ci" and "_cio" means respectively that carry 177 | is input but not affected, and that carry is both input and generated. 178 | 179 | The interesting value is always the value computed by the last instruction. 180 | 181 | 182 | ********************************* 183 | 184 | Please send comments, improvements and new ports to tege@gnu.ai.mit.edu. 185 | 186 | The GNU superoptimizer was written by Torbjorn Granlund (currently with 187 | Cygnus Support). Tom Wood (at the time with Data General, now at Motorola) 188 | made major improvements, like the clean way to describe goal functions and 189 | internal instructions. The original superoptimizer idea is due to Henry 190 | Massalin. 191 | 192 | The GNU superoptimizer and it's application for tuning GCC are described in 193 | the proceedings of the ACM SIGPLAN conference on Programming Language 194 | Design an Implementation (PLDI), 1992. 195 | -------------------------------------------------------------------------------- /README.xcore: -------------------------------------------------------------------------------- 1 | Build using: 2 | 3 | make superopt-xcore 4 | 5 | To start searching optimal sequence for a particular goal function (in this example 6 | abs) use: 7 | 8 | superopt-xcore -fabs -assembly -max-cost 3 9 | 10 | This should produce the following output: 11 | 12 | Searching for { r = (signed_word) v0 < 0 ? -v0 : v0; } 13 | Superoptimizing at cost 1 2 3 14 | 1: ashr r1,r0,32 15 | add r2,r1,r0 16 | xor r3,r2,r1 17 | 2: ashr r1,r0,32 18 | xor r2,r1,r0 19 | sub r3,r2,r1 20 | [2 sequences found] 21 | 22 | To add new goal functions you will need to edit goal.def and recompile, the syntax 23 | should be clear from the existing examples. 24 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | * For the 960, we often see sequences "chkbit;concmp" where the chkbit is 2 | just used to unconditionally clear bit 2 of Ac.cc. We might want to prune 3 | the variants of chkbit to decrease the number of printed sequences. 4 | 5 | * For the 960, we pass the prune hints based on bit 1 of the Ac.cc. This 6 | might lead to undesirable pruning. 7 | 8 | * For the 960 and Alpha, we let conditionally executed instructions like 9 | ADDO_cc_960, SUBO_cc_960, and CMOVEcc overwrite an existing register. 10 | This is OK, but we should also write to a new register, since several 11 | conditional adds and subtracts with different conditions and the same 12 | destination register might make the result well-defined anyway. 13 | 14 | * When we have conditional execution (HPPA, i960, Alpha, Sparc9, Coldfire, 15 | etc) we have to make sure we prune instruction 3 carefully in this 16 | situation: 17 | 18 | insn1 19 | insn2 is conditionally executed 20 | insn3 21 | 22 | Depending on the flag settings of insn1 and insn2, the correct set of 23 | insn3 to try is tricky. Now, we might prune too much. 24 | 25 | * Later 29k models have additional logical ops. Add them! 26 | 27 | * add_co(a,a) and shiftl_co(a,1) are identical. Affects m68k, x86, pyr, etc. 28 | 29 | * For goal functions with the same arity, the exact same computations are 30 | made in synth. This suggests that we could search for many goal functions 31 | in parallel instead of serially. We could maintain an array of goal 32 | values, one for each goal. Instead of simply comparing the last generated 33 | value to goal_value, we would loop through a goal_values[] array, and call 34 | test_sequence for each goal that matches. This would speed up searches by 35 | as much as a factor of 10. 36 | 37 | * Adding the bsfl instruction revealed a deficiency: We can't deal with 38 | instructions that give an undefined result for some inputs. This is so 39 | because the sequences might fail to work only when the undefined result 40 | happen to become a certain value. To cope with this, we have to make 41 | test_sequence try lots of values, but it can only do that if it knows 42 | about these instructions. 43 | 44 | A cleaner way would be to add a valid bit to each computed value. 45 | 46 | * Now we require equality between a computed goal value and a computed 47 | result. Permit fuzzier function, like "something negative". E.g., a 48 | fuzzy sgn function might be useful. 49 | 50 | * Most importantly: Generalize the class of possible goal functions. Allow 51 | them to be any mapping from a vector of words to another vector of words, 52 | each of arbitrary length. 53 | 54 | To make it fast, record after each instruction if it generates a value 55 | that is in (the vector) goal_value, and prune a sequence if it has not 56 | produced N-M requested values when M more instructions are allowed [N the 57 | number of words in goal_value]. 58 | 59 | We should split `synth'. The leaf search `synth' function could be 60 | written like currently, but with the leaf-test "if (allowed_cost > 0)" 61 | removed. The non-leaf `synth' need to loop and look for the generated 62 | value in goal_value. To avoid massive code replication, we have to put 63 | the synth function in a separate file, and play with cpp and #include. 64 | 65 | Make sure to handle the case were you find all values before the last 66 | instruction. This might be non-trivial! We know that we have to use the 67 | value from the ultimate instruction, otherwise we would have found this 68 | sequence before. Problem is, we will either have to loop and look for 69 | the value in goal_value, or, probably much better, just accept the 70 | sequence. 71 | 72 | * Add -test-on-cpu option triggering a mechanism for testing the generated 73 | sequences on the real hardware. That would help debug the simulation 74 | code. 75 | 76 | * I'd like to have a means to define that a goal function is not defined 77 | for all possible input values. An extra parameter, ALLOWED_ARGUMENTS, to 78 | DEF_GOAL could take care of that. 79 | 80 | Also I'd like the user to have the possibility to add a list of immediate 81 | values to try for each goal function. For example, 31 and 32 could be 82 | useful for ffs. 83 | 84 | * Make it possible to handle more immediate values, for example by putting 85 | them in the immediate_val array. 86 | 87 | * Interpret goal functions so the user doesn't need to recompile. 88 | Interpretation would make goal function evaluation slower than it is now, 89 | but goal function evaluation is not critical. 90 | 91 | * Add code to algebraically prove that generated sequences are correct. 92 | 93 | * Add bsrl/bsfl and bfffo to CISC synth. 94 | 95 | * Check that PERFORM_CLZ works like RS/6000's cntlz and 29k's clz. Is it 96 | ok for input == 0? 97 | 98 | * A major speed improvement would be to make independent insn have a 99 | canonical order. Consider `gts' on the SPARC. This is probably not very 100 | hard, if insns are enumerated in some clever way and loop variables are 101 | passed down. A very simple but potentially quite powerful mechanism: If 102 | the putative instruction doesn't depend on the last instruction, compare 103 | the putative instruction's opcode with the last instruction's opcode, and 104 | proceed iff, say, the < relation holds. 105 | 106 | After an instruction that sets carry (and there is another instruction 107 | with the same effect apart from that it doesn't affect carry), the 108 | generated carry has to be used. [Fix this with a reservation vector 109 | --allow both making and deleting a reservation. Make reservation when 110 | carry is generated and delete it when it is used.] The leaf instructions 111 | have to input carry if an unused carry is pending. 112 | 113 | Make sure all computed values are used by subsequent instructions. For 114 | example, if we have just two more values to compute and three yet unused 115 | values, the last two instructions have to restrict their input operands. 116 | 117 | * Efficient pruning of sequences not using generated resources: 118 | 119 | Each generated instruction should record it's computed 'resources' in a 120 | list of unused resources. (A written register is such a resource, and the 121 | carry flag is such a resource.) When a resource is used by an 122 | instruction, it's removed from the data base. 123 | 124 | At each recursion, we check that the unused resources can be consumed 125 | with the allowed number of instructions. If not, we back-track. 126 | 127 | Beware: A resource is not 'consumed' when it has been used. I have seen 128 | optimal sequences that uses a generated carry more than once. 129 | 130 | * Shift 32 steps on 68k is well-defined. LSHIFTR_CO can be used to zero a 131 | word and simultaneously move the sign bit to the X flag, ASHIFTR_CO can 132 | be used to propagate the sign bit to the whole word and to the X flag. 133 | Useful? 134 | 135 | * Model the exact timing, i.e., instruction overlap, superscalar issue, 136 | etc. Requires modelling the CPU internal function units. 137 | 138 | * `386: bt, clc, cmc, cdq[0->1], lea, shld, shrd, stc. 139 | 140 | * Make the instruction description cleaner. Something of this kind would 141 | be great: 142 | 143 | 88k: 144 | {ADD, "addu %d{r},%1{r,0},%2{r,[0-FFFF]}"}, 145 | {ADD_CI, "addu.ci %d{r},%1{r,0},%2{r,[0-FFFF]}"}, 146 | ... 147 | 148 | sparc: 149 | {ADD, "add %1{r,0},%2{r,[-1000,+FFF]},%d{r}"}, 150 | {ADD_CI, "addx %1{r,0},%2{r,[-1000,+FFF]},%d{r}"}, 151 | ... 152 | 153 | We would need a tool to extract the information and generate a 'synth' 154 | function. (That instruction description format would be useful to 155 | assemblers, disassemblers, and simulators too.) 156 | 157 | * Include a 'synth' function for several targets in one gso binary. Have a 158 | command line option -t select which one to use. 159 | 160 | * The following XCore instructions are missing: crc, crc8, ladd, lmul, lsub, 161 | maccs, maccu. In addition not all possible immediate values for the add, eq, 162 | ldaw, ldc, mkmsk, sext, sub and zext instructions are used. 163 | -------------------------------------------------------------------------------- /goal.def: -------------------------------------------------------------------------------- 1 | /* This file contains the definitions and documentation for the goal functions 2 | used in the Superoptimizer. 3 | 4 | Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc. 5 | 6 | This program is free software; you can redistribute it and/or modify it 7 | under the terms of the GNU General Public License as published by the 8 | Free Software Foundation; either version 2, or (at your option) any 9 | later version. 10 | 11 | This program is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License along 17 | with this program; see the file COPYING. If not, write to the Free 18 | Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ 19 | 20 | /* The fields in the cpp macro call "DEF_GOAL()" are: 21 | 22 | 1. The internal name of the goal function. 23 | 24 | 2. The number of input operands. 25 | 26 | 3. The printed name of the instruction. 27 | 28 | 4. C code that implements the function. 29 | 30 | */ 31 | 32 | DEF_GOAL (P1, 1, "p1", { r = (v0-1) & v0; }) 33 | DEF_GOAL (P2, 1, "p2", { r = (v0+1) & v0; }) 34 | DEF_GOAL (P3, 1, "p3", { r = (-v0) & v0; }) 35 | DEF_GOAL (P4, 1, "p4", { r = (v0-1) ^ v0; }) 36 | DEF_GOAL (P5, 1, "p5", { r = (v0-1) | v0; }) 37 | DEF_GOAL (P6, 1, "p6", { r = (v0+1) | v0; }) 38 | DEF_GOAL (P7, 1, "p7", { r = (v0+1) & ~v0; }) 39 | DEF_GOAL (P8, 1, "p8", { r = (v0-1) & ~v0; }) 40 | DEF_GOAL (P9, 1, "p9", { r = ((signed_word) v0 < 0)?-v0:v0; }) 41 | DEF_GOAL (P10, 2, "p10", { r = (v0^v1) <= (v0&v1); }) 42 | DEF_GOAL (P11, 2, "p11", { r = (v0&~v1) > v1 ; }) 43 | DEF_GOAL (P12, 2, "p12", { r = (v0&~v1) <= v1 ; }) 44 | DEF_GOAL (P13, 1, "p13", { r = (signed_word) v0 > 0 ? 1 : ((signed_word) v0 < 0 ? -1 : 0); }) 45 | DEF_GOAL (P14, 2, "p14", { r = (v0&v1) + ((v0^v1)>>1) ; }) 46 | DEF_GOAL (P15, 2, "p15", { r = (v0|v1) - ((v0^v1)>>1) ; }) 47 | DEF_GOAL (P16, 2, "p16", { r = (v0 > v1)?v0:v1 ; }) 48 | DEF_GOAL (P17, 1, "p17", { r = (((v0-1) | v0) + 1) & v0; }) 49 | DEF_GOAL (P18, 1, "p18", { r = !((v0-1) & v0) && v0; }) 50 | DEF_GOAL (P19, 3, "p19", { unsigned_word r__o3 = ((v0 >> v2) ^ v0) & v1; r = ((r__o3 << 1) ^ r__o3) ^ v0; }) 51 | DEF_GOAL (P20, 3, "p20", { unsigned_word r__o2 = v0 & -v0; \ 52 | unsigned_word r__o3 = r__o2 + v0; \ 53 | unsigned_word r__o3a = ((v0 ^ r__o2) >> 2); \ 54 | r = (r__o2?r__o3a / r__o2:0) | r__o3; }) 55 | DEF_GOAL (P21, 4, "p21", { r = (v0 == v1)? v2 : ((v0 == v2)?v3:v1); }) 56 | DEF_GOAL (P22, 1, "p22", { r = __builtin_popcountl(v0)&1 ; }) 57 | DEF_GOAL (P23, 1, "p23", { r = __builtin_popcountl(v0) ; }) 58 | DEF_GOAL (P24, 1, "p24", { r = (v0 <= 2)? v0 : (1 << (sizeof(v0)*8 - __builtin_clz(v0 - 1))) ; }) 59 | DEF_GOAL (P25, 2, "p25", ({ word t1; word t2; umul_ppmm (t1, t2, v0, v1); r = t2; })) 60 | 61 | DEF_GOAL (EQ, 2, "eq", { r = v0 == v1; }) 62 | DEF_GOAL (NE, 2, "ne", { r = v0 != v1; }) 63 | DEF_GOAL (LES, 2, "les", { r = (signed_word) v0 <= (signed_word) v1; }) 64 | DEF_GOAL (GES, 2, "ges", { r = (signed_word) v0 >= (signed_word) v1; }) 65 | DEF_GOAL (LTS, 2, "lts", { r = (signed_word) v0 < (signed_word) v1; }) 66 | DEF_GOAL (GTS, 2, "gts", { r = (signed_word) v0 > (signed_word) v1; }) 67 | DEF_GOAL (LEU, 2, "leu", { r = (unsigned_word) v0 <= (unsigned_word) v1; }) 68 | DEF_GOAL (GEU, 2, "geu", { r = (unsigned_word) v0 >= (unsigned_word) v1; }) 69 | DEF_GOAL (LTU, 2, "ltu", { r = (unsigned_word) v0 < (unsigned_word) v1; }) 70 | DEF_GOAL (GTU, 2, "gtu", { r = (unsigned_word) v0 > (unsigned_word) v1; }) 71 | DEF_GOAL (LESU, 2, "lesu", { r = (signed_word) v0 < 0 || (unsigned_word) v0 <= (unsigned_word) v1; }) 72 | DEF_GOAL (GESU, 2, "gesu", { r = (signed_word) v0 >= 0 && (unsigned_word) v0 >= (unsigned_word) v1; }) 73 | DEF_GOAL (LTSU, 2, "ltsu", { r = (signed_word) v0 < 0 || (unsigned_word) v0 < (unsigned_word) v1; }) 74 | DEF_GOAL (GTSU, 2, "gtsu", { r = (signed_word) v0 >= 0 && (unsigned_word) v0 > (unsigned_word) v1; }) 75 | 76 | DEF_GOAL (EQ0, 1, "eq0", { r = v0 == 0; }) 77 | DEF_GOAL (NE0, 1, "ne0", { r = v0 != 0; }) 78 | DEF_GOAL (LES0, 1, "les0", { r = (signed_word) v0 <= 0; }) 79 | DEF_GOAL (GES0, 1, "ges0", { r = (signed_word) v0 >= 0; }) 80 | DEF_GOAL (LTS0, 1, "lts0", { r = (signed_word) v0 < 0; }) 81 | DEF_GOAL (GTS0, 1, "gts0", { r = (signed_word) v0 > 0; }) 82 | 83 | DEF_GOAL (NEQ, 2, "neq", { r = -(v0 == v1); }) 84 | DEF_GOAL (NNE, 2, "nne", { r = -(v0 != v1); }) 85 | DEF_GOAL (NLES, 2, "nles", { r = -((signed_word) v0 <= (signed_word) v1); }) 86 | DEF_GOAL (NGES, 2, "nges", { r = -((signed_word) v0 >= (signed_word) v1); }) 87 | DEF_GOAL (NLTS, 2, "nlts", { r = -((signed_word) v0 < (signed_word) v1); }) 88 | DEF_GOAL (NGTS, 2, "ngts", { r = -((signed_word) v0 > (signed_word) v1); }) 89 | DEF_GOAL (NLEU, 2, "nleu", { r = -((unsigned_word) v0 <= (unsigned_word) v1); }) 90 | DEF_GOAL (NGEU, 2, "ngeu", { r = -((unsigned_word) v0 >= (unsigned_word) v1); }) 91 | DEF_GOAL (NLTU, 2, "nltu", { r = -((unsigned_word) v0 < (unsigned_word) v1); }) 92 | DEF_GOAL (NGTU, 2, "ngtu", { r = -((unsigned_word) v0 > (unsigned_word) v1); }) 93 | 94 | DEF_GOAL (NEQ0, 1, "neq0", { r = -(v0 == 0); }) 95 | DEF_GOAL (NNE0, 1, "nne0", { r = -(v0 != 0); }) 96 | DEF_GOAL (NLES0, 1, "nles0", { r = -((signed_word) v0 <= 0); }) 97 | DEF_GOAL (NGES0, 1, "nges0", { r = -((signed_word) v0 >= 0); }) 98 | DEF_GOAL (NLTS0, 1, "nlts0", { r = -((signed_word) v0 < 0); }) 99 | DEF_GOAL (NGTS0, 1, "ngts0", { r = -((signed_word) v0 > 0); }) 100 | 101 | /* Maximum of 32 for compatability */ 102 | #define N_BITS ((sizeof(unsigned_word)*8) > 32 ? 32 : (sizeof(unsigned_word)*8)) 103 | /* Negative high one */ 104 | #define HIGHONE_N ((unsigned_word)1<<(N_BITS-1)) 105 | /* Positive high one */ 106 | #define HIGHONE_P ((signed_word)1<<(N_BITS-2)) 107 | DEF_GOAL (EQ80000000, 2, "eq80000000", { r = HIGHONE_N & -(v0 == v1); }) 108 | DEF_GOAL (NE80000000, 2, "ne80000000", { r = HIGHONE_N & -(v0 != v1); }) 109 | DEF_GOAL (LES80000000, 2, "les80000000", { r = HIGHONE_N & -((signed_word) v0 <= (signed_word) v1); }) 110 | DEF_GOAL (GES80000000, 2, "ges80000000", { r = HIGHONE_N & -((signed_word) v0 >= (signed_word) v1); }) 111 | DEF_GOAL (LTS80000000, 2, "lts80000000", { r = HIGHONE_N & -((signed_word) v0 < (signed_word) v1); }) 112 | DEF_GOAL (GTS80000000, 2, "gts80000000", { r = HIGHONE_N & -((signed_word) v0 > (signed_word) v1); }) 113 | DEF_GOAL (LEU80000000, 2, "leu80000000", { r = HIGHONE_N & -((unsigned_word) v0 <= (unsigned_word) v1); }) 114 | DEF_GOAL (GEU80000000, 2, "geu80000000", { r = HIGHONE_N & -((unsigned_word) v0 >= (unsigned_word) v1); }) 115 | DEF_GOAL (LTU80000000, 2, "ltu80000000", { r = HIGHONE_N & -((unsigned_word) v0 < (unsigned_word) v1); }) 116 | DEF_GOAL (GTU80000000, 2, "gtu80000000", { r = HIGHONE_N & -((unsigned_word) v0 > (unsigned_word) v1); }) 117 | 118 | DEF_GOAL (EQ080000000, 1, "eq080000000", { r = HIGHONE_N & -(v0 == 0); }) 119 | DEF_GOAL (NE080000000, 1, "ne080000000", { r = HIGHONE_N & -(v0 != 0); }) 120 | DEF_GOAL (LES080000000, 1, "les080000000", { r = HIGHONE_N & -((signed_word) v0 <= 0); }) 121 | DEF_GOAL (GES080000000, 1, "ges080000000", { r = HIGHONE_N & -((signed_word) v0 >= 0); }) 122 | DEF_GOAL (LTS080000000, 1, "lts080000000", { r = HIGHONE_N & -((signed_word) v0 < 0); }) 123 | DEF_GOAL (GTS080000000, 1, "gts080000000", { r = HIGHONE_N & -((signed_word) v0 > 0); }) 124 | 125 | DEF_GOAL (MAXS, 2, "maxs", { r = (signed_word) v0 > (signed_word) v1 ? v0 : v1; }) 126 | DEF_GOAL (MINS, 2, "mins", { r = (signed_word) v0 < (signed_word) v1 ? v0 : v1; }) 127 | DEF_GOAL (MAXU, 2, "maxu", { r = (unsigned_word) v0 > (unsigned_word) v1 ? v0 : v1; }) 128 | DEF_GOAL (MINU, 2, "minu", { r = (unsigned_word) v0 < (unsigned_word) v1 ? v0 : v1; }) 129 | DEF_GOAL (CMPS, 2, "cmps", { r = (signed_word) v0 > (signed_word) v1? 1 : ((signed_word) v0 < (signed_word) v1 ? -1 : 0); }) 130 | DEF_GOAL (CMPU, 2, "cmpu", { r = (unsigned_word) v0 > (unsigned_word) v1? 1 : ((unsigned_word) v0 < (unsigned_word) v1 ? -1 : 0); }) 131 | DEF_GOAL (SGN, 1, "sgn", { r = (signed_word) v0 > 0 ? 1 : ((signed_word) v0 < 0 ? -1 : 0); }) 132 | DEF_GOAL (ABS, 1, "abs", { r = (signed_word) v0 < 0 ? -v0 : v0; }) 133 | DEF_GOAL (NABS, 1, "nabs", { r = (signed_word) v0 > 0 ? -v0 : v0; }) 134 | DEF_GOAL (GRAY, 1, "gray", { r = ((~(v0 ^ (v0 << 1)) & (-1 << (N_BITS-1))) | ((v0 << 1) & ~(-1 << (N_BITS-1))) | (v0 >> (N_BITS-1))); }) 135 | DEF_GOAL (GRAY2, 1, "gray2", { v0 = ((~(v0 ^ (v0 << 1)) & (-1 << (N_BITS-1))) | ((v0 << 1) & ~(-1 << (N_BITS-1))) | (v0 >> (N_BITS-1))); r = ((~(v0 ^ (v0 << 1)) & (-1 << (N_BITS-1))) | ((v0 << 1) & ~(-1 << (N_BITS-1))) | (v0 >> 31)); }) 136 | DEF_GOAL (DIVIDE_BY_2, 1, "divide_by_2", { r = (signed_word) v0 / 2; }) 137 | DEF_GOAL (DIVIDE_BY_4, 1, "divide_by_4", { r = (signed_word) v0 / 4; }) 138 | DEF_GOAL (DIVIDE_BY_2e30, 1, "divide_by_2e30", { r = (signed_word) v0 / HIGHONE_P; }) 139 | DEF_GOAL (MOD_BY_2, 1, "mod_by_2", { r = (signed_word) v0 % 2; }) 140 | DEF_GOAL (MOD_BY_4, 1, "mod_by_4", { r = (signed_word) v0 % 4; }) 141 | DEF_GOAL (MOD_BY_2e30, 1, "mod_by_2e30", { r = (signed_word) v0 % HIGHONE_P; }) 142 | 143 | DEF_GOAL (EQ_PLUS, 3, "eq+", { r = (v0 == v1) + v2; }) 144 | DEF_GOAL (NE_PLUS, 3, "ne+", { r = (v0 != v1) + v2; }) 145 | DEF_GOAL (LES_PLUS, 3, "les+", { r = ((signed_word) v0 <= (signed_word) v1) + v2; }) 146 | DEF_GOAL (GES_PLUS, 3, "ges+", { r = ((signed_word) v0 >= (signed_word) v1) + v2; }) 147 | DEF_GOAL (LTS_PLUS, 3, "lts+", { r = ((signed_word) v0 < (signed_word) v1) + v2; }) 148 | DEF_GOAL (GTS_PLUS, 3, "gts+", { r = ((signed_word) v0 > (signed_word) v1) + v2; }) 149 | DEF_GOAL (LEU_PLUS, 3, "leu+", { r = ((unsigned_word) v0 <= (unsigned_word) v1) + v2; }) 150 | DEF_GOAL (GEU_PLUS, 3, "geu+", { r = ((unsigned_word) v0 >= (unsigned_word) v1) + v2; }) 151 | DEF_GOAL (LTU_PLUS, 3, "ltu+", { r = ((unsigned_word) v0 < (unsigned_word) v1) + v2; }) 152 | DEF_GOAL (GTU_PLUS, 3, "gtu+", { r = ((unsigned_word) v0 > (unsigned_word) v1) + v2; }) 153 | DEF_GOAL (LESU_PLUS, 3, "lesu+", { r = ((signed_word) v0 < 0 || (unsigned_word) v0 <= (unsigned_word) v1) + v2; }) 154 | DEF_GOAL (GESU_PLUS, 3, "gesu+", { r = ((signed_word) v0 >= 0 && (unsigned_word) v0 >= (unsigned_word) v1) + v2; }) 155 | DEF_GOAL (LTSU_PLUS, 3, "ltsu+", { r = ((signed_word) v0 < 0 || (unsigned_word) v0 < (unsigned_word) v1) + v2; }) 156 | DEF_GOAL (GTSU_PLUS, 3, "gtsu+", { r = ((signed_word) v0 >= 0 && (unsigned_word) v0 > (unsigned_word) v1) + v2; }) 157 | DEF_GOAL (EQ0_PLUS, 2, "eq0+", { r = (v0 == 0) + v1; }) 158 | DEF_GOAL (NE0_PLUS, 2, "ne0+", { r = (v0 != 0) + v1; }) 159 | DEF_GOAL (LES0_PLUS, 2, "les0+", { r = ((signed_word) v0 <= 0) + v1; }) 160 | DEF_GOAL (GES0_PLUS, 2, "ges0+", { r = ((signed_word) v0 >= 0) + v1; }) 161 | DEF_GOAL (LTS0_PLUS, 2, "lts0+", { r = ((signed_word) v0 < 0) + v1; }) 162 | DEF_GOAL (GTS0_PLUS, 2, "gts0+", { r = ((signed_word) v0 > 0) + v1; }) 163 | 164 | DEF_SYNONYM (EQ_PLUS, "peq") 165 | DEF_SYNONYM (NE_PLUS, "pne") 166 | DEF_SYNONYM (LES_PLUS, "ples") 167 | DEF_SYNONYM (GES_PLUS, "pges") 168 | DEF_SYNONYM (LTS_PLUS, "plts") 169 | DEF_SYNONYM (GTS_PLUS, "pgts") 170 | DEF_SYNONYM (LEU_PLUS, "pleu") 171 | DEF_SYNONYM (GEU_PLUS, "pgeu") 172 | DEF_SYNONYM (LTU_PLUS, "pltu") 173 | DEF_SYNONYM (GTU_PLUS, "pgtu") 174 | DEF_SYNONYM (LESU_PLUS, "plesu") 175 | DEF_SYNONYM (GESU_PLUS, "pgesu") 176 | DEF_SYNONYM (LTSU_PLUS, "pltsu") 177 | DEF_SYNONYM (GTSU_PLUS, "pgtsu") 178 | DEF_SYNONYM (EQ0_PLUS, "peq0") 179 | DEF_SYNONYM (NE0_PLUS, "pne0") 180 | DEF_SYNONYM (LES0_PLUS, "ples0") 181 | DEF_SYNONYM (GES0_PLUS, "pges0") 182 | DEF_SYNONYM (LTS0_PLUS, "plts0") 183 | DEF_SYNONYM (GTS0_PLUS, "pgts0") 184 | 185 | DEF_GOAL (EQ_MINUS, 3, "eq-", { r = v2 - (v0 == v1); }) 186 | DEF_GOAL (NE_MINUS, 3, "ne-", { r = v2 - (v0 != v1); }) 187 | DEF_GOAL (LES_MINUS, 3, "les-", { r = v2 - ((signed_word) v0 <= (signed_word) v1); }) 188 | DEF_GOAL (GES_MINUS, 3, "ges-", { r = v2 - ((signed_word) v0 >= (signed_word) v1); }) 189 | DEF_GOAL (LTS_MINUS, 3, "lts-", { r = v2 - ((signed_word) v0 < (signed_word) v1); }) 190 | DEF_GOAL (GTS_MINUS, 3, "gts-", { r = v2 - ((signed_word) v0 > (signed_word) v1); }) 191 | DEF_GOAL (LEU_MINUS, 3, "leu-", { r = v2 - ((unsigned_word) v0 <= (unsigned_word) v1); }) 192 | DEF_GOAL (GEU_MINUS, 3, "geu-", { r = v2 - ((unsigned_word) v0 >= (unsigned_word) v1); }) 193 | DEF_GOAL (LTU_MINUS, 3, "ltu-", { r = v2 - ((unsigned_word) v0 < (unsigned_word) v1); }) 194 | DEF_GOAL (GTU_MINUS, 3, "gtu-", { r = v2 - ((unsigned_word) v0 > (unsigned_word) v1); }) 195 | DEF_GOAL (LESU_MINUS, 3, "lesu-", { r = v2 - ((signed_word) v0 < 0 || (unsigned_word) v0 <= (unsigned_word) v1); }) 196 | DEF_GOAL (GESU_MINUS, 3, "gesu-", { r = v2 - ((signed_word) v0 >= 0 && (unsigned_word) v0 >= (unsigned_word) v1); }) 197 | DEF_GOAL (LTSU_MINUS, 3, "ltsu-", { r = v2 - ((signed_word) v0 < 0 || (unsigned_word) v0 < (unsigned_word) v1); }) 198 | DEF_GOAL (GTSU_MINUS, 3, "gtsu-", { r = v2 - ((signed_word) v0 >= 0 && (unsigned_word) v0 > (unsigned_word) v1); }) 199 | 200 | DEF_GOAL (EQ0_MINUS, 2, "eq0-", { r = v1 - (v0 == 0); }) 201 | DEF_GOAL (NE0_MINUS, 2, "ne0-", { r = v1 - (v0 != 0); }) 202 | DEF_GOAL (LES0_MINUS, 2, "les0-", { r = v1 - ((signed_word) v0 <= 0); }) 203 | DEF_GOAL (GES0_MINUS, 2, "ges0-", { r = v1 - ((signed_word) v0 >= 0); }) 204 | DEF_GOAL (LTS0_MINUS, 2, "lts0-", { r = v1 - ((signed_word) v0 < 0); }) 205 | DEF_GOAL (GTS0_MINUS, 2, "gts0-", { r = v1 - ((signed_word) v0 > 0); }) 206 | 207 | DEF_GOAL (NEQ_AND, 3, "naeq", { r = -(v0 == v1) & v2; }) 208 | DEF_GOAL (NNE_AND, 3, "nane", { r = -(v0 != v1) & v2; }) 209 | DEF_GOAL (NLES_AND, 3, "nales", { r = -((signed_word) v0 <= (signed_word) v1) & v2; }) 210 | DEF_GOAL (NGES_AND, 3, "nages", { r = -((signed_word) v0 >= (signed_word) v1) & v2; }) 211 | DEF_GOAL (NLTS_AND, 3, "nalts", { r = -((signed_word) v0 < (signed_word) v1) & v2; }) 212 | DEF_GOAL (NGTS_AND, 3, "nagts", { r = -((signed_word) v0 > (signed_word) v1) & v2; }) 213 | DEF_GOAL (NLEU_AND, 3, "naleu", { r = -((unsigned_word) v0 <= (unsigned_word) v1) & v2; }) 214 | DEF_GOAL (NGEU_AND, 3, "nageu", { r = -((unsigned_word) v0 >= (unsigned_word) v1) & v2; }) 215 | DEF_GOAL (NLTU_AND, 3, "naltu", { r = -((unsigned_word) v0 < (unsigned_word) v1) & v2; }) 216 | DEF_GOAL (NGTU_AND, 3, "nagtu", { r = -((unsigned_word) v0 > (unsigned_word) v1) & v2; }) 217 | 218 | DEF_GOAL (NEQ0_AND, 2, "naeq0", { r = -(v0 == 0) & v1; }) 219 | DEF_GOAL (NNE0_AND, 2, "nane0", { r = -(v0 != 0) & v1; }) 220 | DEF_GOAL (NLES0_AND, 2, "nales0", { r = -((signed_word) v0 <= 0) & v1; }) 221 | DEF_GOAL (NGES0_AND, 2, "nages0", { r = -((signed_word) v0 >= 0) & v1; }) 222 | DEF_GOAL (NLTS0_AND, 2, "nalts0", { r = -((signed_word) v0 < 0) & v1; }) 223 | DEF_GOAL (NGTS0_AND, 2, "nagts0", { r = -((signed_word) v0 > 0) & v1; }) 224 | 225 | /* Don't do all variants here, since it is trivial to deduce the rest. */ 226 | DEF_GOAL (EQ_SEL, 4, "eq-sel", { r = (v0 == v1) ? v2 : v3; }) 227 | DEF_GOAL (LES_SEL, 4, "les-sel", { r = ((signed_word) v0 <= (signed_word) v1) ? v2 : v3; }) 228 | DEF_GOAL (LTS_SEL, 4, "lts-sel", { r = ((signed_word) v0 < (signed_word) v1) ? v2 : v3; }) 229 | DEF_GOAL (LEU_SEL, 4, "leu-sel", { r = ((unsigned_word) v0 <= (unsigned_word) v1) ? v2 : v3; }) 230 | DEF_GOAL (LTU_SEL, 4, "ltu-sel", { r = ((unsigned_word) v0 < (unsigned_word) v1) ? v2 : v3; }) 231 | 232 | DEF_GOAL (EQ0_SEL, 3, "eq0-sel", { r = (v0 == 0) ? v1 : v2; }) 233 | DEF_GOAL (LES0_SEL, 3, "les0-sel", { r = ((signed_word) v0 <= 0) ? v1 : v2; }) 234 | DEF_GOAL (LTS0_SEL, 3, "lts0-sel", { r = ((signed_word) v0 < 0) ? v1 : v2; }) 235 | 236 | DEF_GOAL (FFS, 1, "ffs", { r = ffs_internal (v0); }) 237 | DEF_GOAL (CLOG2, 1, "ceil_log2", { r = ceil_log2 (v0); }) 238 | DEF_GOAL (FLOG2, 1, "floor_log2", { r = floor_log2 (v0); }) 239 | DEF_GOAL (MULTADJ, 3, "multadj", { r = v1 + ((signed_word) v0 < 0 ? v2 : 0); }) 240 | 241 | DEF_GOAL (SHIFTL_1, 1, "sll1", { r = v0 << 1; }) 242 | DEF_GOAL (SHIFTL_2, 1, "sll2", { r = v0 << 2; }) 243 | DEF_GOAL (SHIFTL_3, 1, "sll3", { r = v0 << 3; }) 244 | DEF_GOAL (SHIFTL_4, 1, "sll4", { r = v0 << 4; }) 245 | DEF_GOAL (SHIFTL_5, 1, "sll5", { r = v0 << 5; }) 246 | DEF_GOAL (SHIFTL_6, 1, "sll6", { r = v0 << 6; }) 247 | DEF_GOAL (SHIFTL_7, 1, "sll7", { r = v0 << 7; }) 248 | DEF_GOAL (SHIFTL_8, 1, "sll8", { r = v0 << 8; }) 249 | DEF_GOAL (SHIFTL_9, 1, "sll9", { r = v0 << 9; }) 250 | DEF_GOAL (SHIFTL_10, 1, "sll10", { r = v0 << 10; }) 251 | DEF_GOAL (SHIFTL_11, 1, "sll11", { r = v0 << 11; }) 252 | DEF_GOAL (SHIFTL_12, 1, "sll12", { r = v0 << 12; }) 253 | DEF_GOAL (SHIFTL_13, 1, "sll13", { r = v0 << 13; }) 254 | DEF_GOAL (SHIFTL_14, 1, "sll14", { r = v0 << 14; }) 255 | DEF_GOAL (SHIFTL_15, 1, "sll15", { r = v0 << 15; }) 256 | DEF_GOAL (SHIFTL_16, 1, "sll16", { r = v0 << 16; }) 257 | DEF_GOAL (SHIFTL_17, 1, "sll17", { r = v0 << 17; }) 258 | DEF_GOAL (SHIFTL_18, 1, "sll18", { r = v0 << 18; }) 259 | DEF_GOAL (SHIFTL_19, 1, "sll19", { r = v0 << 19; }) 260 | DEF_GOAL (SHIFTL_20, 1, "sll20", { r = v0 << 20; }) 261 | DEF_GOAL (SHIFTL_21, 1, "sll21", { r = v0 << 21; }) 262 | DEF_GOAL (SHIFTL_22, 1, "sll22", { r = v0 << 22; }) 263 | DEF_GOAL (SHIFTL_23, 1, "sll23", { r = v0 << 23; }) 264 | DEF_GOAL (SHIFTL_24, 1, "sll24", { r = v0 << 24; }) 265 | DEF_GOAL (SHIFTL_25, 1, "sll25", { r = v0 << 25; }) 266 | DEF_GOAL (SHIFTL_26, 1, "sll26", { r = v0 << 26; }) 267 | DEF_GOAL (SHIFTL_27, 1, "sll27", { r = v0 << 27; }) 268 | DEF_GOAL (SHIFTL_28, 1, "sll28", { r = v0 << 28; }) 269 | DEF_GOAL (SHIFTL_29, 1, "sll29", { r = v0 << 29; }) 270 | DEF_GOAL (SHIFTL_30, 1, "sll30", { r = v0 << 30; }) 271 | DEF_GOAL (SHIFTL_31, 1, "sll31", { r = v0 << 31; }) 272 | 273 | DEF_GOAL (LSHIFTR_1, 1, "srl1", { r = v0 >> 1; }) 274 | DEF_GOAL (LSHIFTR_2, 1, "srl2", { r = v0 >> 2; }) 275 | DEF_GOAL (LSHIFTR_3, 1, "srl3", { r = v0 >> 3; }) 276 | DEF_GOAL (LSHIFTR_4, 1, "srl4", { r = v0 >> 4; }) 277 | DEF_GOAL (LSHIFTR_5, 1, "srl5", { r = v0 >> 5; }) 278 | DEF_GOAL (LSHIFTR_6, 1, "srl6", { r = v0 >> 6; }) 279 | DEF_GOAL (LSHIFTR_7, 1, "srl7", { r = v0 >> 7; }) 280 | DEF_GOAL (LSHIFTR_8, 1, "srl8", { r = v0 >> 8; }) 281 | DEF_GOAL (LSHIFTR_9, 1, "srl9", { r = v0 >> 9; }) 282 | DEF_GOAL (LSHIFTR_10, 1, "srl10", { r = v0 >> 10; }) 283 | DEF_GOAL (LSHIFTR_11, 1, "srl11", { r = v0 >> 11; }) 284 | DEF_GOAL (LSHIFTR_12, 1, "srl12", { r = v0 >> 12; }) 285 | DEF_GOAL (LSHIFTR_13, 1, "srl13", { r = v0 >> 13; }) 286 | DEF_GOAL (LSHIFTR_14, 1, "srl14", { r = v0 >> 14; }) 287 | DEF_GOAL (LSHIFTR_15, 1, "srl15", { r = v0 >> 15; }) 288 | DEF_GOAL (LSHIFTR_16, 1, "srl16", { r = v0 >> 16; }) 289 | DEF_GOAL (LSHIFTR_17, 1, "srl17", { r = v0 >> 17; }) 290 | DEF_GOAL (LSHIFTR_18, 1, "srl18", { r = v0 >> 18; }) 291 | DEF_GOAL (LSHIFTR_19, 1, "srl19", { r = v0 >> 19; }) 292 | DEF_GOAL (LSHIFTR_20, 1, "srl20", { r = v0 >> 20; }) 293 | DEF_GOAL (LSHIFTR_21, 1, "srl21", { r = v0 >> 21; }) 294 | DEF_GOAL (LSHIFTR_22, 1, "srl22", { r = v0 >> 22; }) 295 | DEF_GOAL (LSHIFTR_23, 1, "srl23", { r = v0 >> 23; }) 296 | DEF_GOAL (LSHIFTR_24, 1, "srl24", { r = v0 >> 24; }) 297 | DEF_GOAL (LSHIFTR_25, 1, "srl25", { r = v0 >> 25; }) 298 | DEF_GOAL (LSHIFTR_26, 1, "srl26", { r = v0 >> 26; }) 299 | DEF_GOAL (LSHIFTR_27, 1, "srl27", { r = v0 >> 27; }) 300 | DEF_GOAL (LSHIFTR_28, 1, "srl28", { r = v0 >> 28; }) 301 | DEF_GOAL (LSHIFTR_29, 1, "srl29", { r = v0 >> 29; }) 302 | DEF_GOAL (LSHIFTR_30, 1, "srl30", { r = v0 >> 30; }) 303 | DEF_GOAL (LSHIFTR_31, 1, "srl31", { r = v0 >> 31; }) 304 | 305 | DEF_GOAL (ASHIFTR_1, 1, "sra1", { r = (signed_word) v0 >> 1; }) 306 | DEF_GOAL (ASHIFTR_2, 1, "sra2", { r = (signed_word) v0 >> 2; }) 307 | DEF_GOAL (ASHIFTR_3, 1, "sra3", { r = (signed_word) v0 >> 3; }) 308 | DEF_GOAL (ASHIFTR_4, 1, "sra4", { r = (signed_word) v0 >> 4; }) 309 | DEF_GOAL (ASHIFTR_5, 1, "sra5", { r = (signed_word) v0 >> 5; }) 310 | DEF_GOAL (ASHIFTR_6, 1, "sra6", { r = (signed_word) v0 >> 6; }) 311 | DEF_GOAL (ASHIFTR_7, 1, "sra7", { r = (signed_word) v0 >> 7; }) 312 | DEF_GOAL (ASHIFTR_8, 1, "sra8", { r = (signed_word) v0 >> 8; }) 313 | DEF_GOAL (ASHIFTR_9, 1, "sra9", { r = (signed_word) v0 >> 9; }) 314 | DEF_GOAL (ASHIFTR_10, 1, "sra10", { r = (signed_word) v0 >> 10; }) 315 | DEF_GOAL (ASHIFTR_11, 1, "sra11", { r = (signed_word) v0 >> 11; }) 316 | DEF_GOAL (ASHIFTR_12, 1, "sra12", { r = (signed_word) v0 >> 12; }) 317 | DEF_GOAL (ASHIFTR_13, 1, "sra13", { r = (signed_word) v0 >> 13; }) 318 | DEF_GOAL (ASHIFTR_14, 1, "sra14", { r = (signed_word) v0 >> 14; }) 319 | DEF_GOAL (ASHIFTR_15, 1, "sra15", { r = (signed_word) v0 >> 15; }) 320 | DEF_GOAL (ASHIFTR_16, 1, "sra16", { r = (signed_word) v0 >> 16; }) 321 | DEF_GOAL (ASHIFTR_17, 1, "sra17", { r = (signed_word) v0 >> 17; }) 322 | DEF_GOAL (ASHIFTR_18, 1, "sra18", { r = (signed_word) v0 >> 18; }) 323 | DEF_GOAL (ASHIFTR_19, 1, "sra19", { r = (signed_word) v0 >> 19; }) 324 | DEF_GOAL (ASHIFTR_20, 1, "sra20", { r = (signed_word) v0 >> 20; }) 325 | DEF_GOAL (ASHIFTR_21, 1, "sra21", { r = (signed_word) v0 >> 21; }) 326 | DEF_GOAL (ASHIFTR_22, 1, "sra22", { r = (signed_word) v0 >> 22; }) 327 | DEF_GOAL (ASHIFTR_23, 1, "sra23", { r = (signed_word) v0 >> 23; }) 328 | DEF_GOAL (ASHIFTR_24, 1, "sra24", { r = (signed_word) v0 >> 24; }) 329 | DEF_GOAL (ASHIFTR_25, 1, "sra25", { r = (signed_word) v0 >> 25; }) 330 | DEF_GOAL (ASHIFTR_26, 1, "sra26", { r = (signed_word) v0 >> 26; }) 331 | DEF_GOAL (ASHIFTR_27, 1, "sra27", { r = (signed_word) v0 >> 27; }) 332 | DEF_GOAL (ASHIFTR_28, 1, "sra28", { r = (signed_word) v0 >> 28; }) 333 | DEF_GOAL (ASHIFTR_29, 1, "sra29", { r = (signed_word) v0 >> 29; }) 334 | DEF_GOAL (ASHIFTR_30, 1, "sra30", { r = (signed_word) v0 >> 30; }) 335 | DEF_GOAL (ASHIFTR_31, 1, "sra31", { r = (signed_word) v0 >> 31; }) 336 | 337 | 338 | DEF_GOAL (CMPBYTES, 2, "cmpbytes", 339 | { 340 | union { word w; char b[4]; } __r1; 341 | union { word w; char b[4]; } __r2; 342 | __r1.w = (v0); __r2.w = (v1); 343 | r = ((__r1.b[0] != __r2.b[0]) && (__r1.b[1] != __r2.b[1]) 344 | && (__r1.b[2] != __r2.b[2]) && (__r1.b[3] != __r2.b[3])); 345 | }) 346 | 347 | #ifdef __GNUC__ 348 | #if 0 349 | DEF_GOAL (UDIV_QRNND, 3, "udiv_qrnnd", ({ word t1, t2; udiv_qrnnd (t1, t2, v0, v1, v2); r = t1; })) 350 | DEF_GOAL (UMOD_QRNND, 3, "umod_qrnnd", ({ word t1, t2; udiv_qrnnd (t1, t2, v0, v1, v2); r = t2; })) 351 | #endif 352 | DEF_GOAL (UMULH, 2, "umulh", ({ word t1; word t2; umul_ppmm (t1, t2, v0, v1); r = t1; })) 353 | DEF_GOAL (UMULH3, 1, "umulh3", ({ word t1; word t2; umul_ppmm (t1, t2, v0, 3); r = t1; })) 354 | DEF_GOAL (UMULH5, 1, "umulh5", ({ word t1; word t2; umul_ppmm (t1, t2, v0, 5); r = t1; })) 355 | DEF_GOAL (UMULH6, 1, "umulh6", ({ word t1; word t2; umul_ppmm (t1, t2, v0, 6); r = t1; })) 356 | DEF_GOAL (UMULH7, 1, "umulh7", ({ word t1; word t2; umul_ppmm (t1, t2, v0, 7); r = t1; })) 357 | DEF_GOAL (UMULH9, 1, "umulh9", ({ word t1; word t2; umul_ppmm (t1, t2, v0, 9); r = t1; })) 358 | DEF_GOAL (UMULH10, 1, "umulh10", ({ word t1; word t2; umul_ppmm (t1, t2, v0, 10); r = t1; })) 359 | DEF_GOAL (UMULH11, 1, "umulh11", ({ word t1; word t2; umul_ppmm (t1, t2, v0, 11); r = t1; })) 360 | #endif /* __GNUC__ */ 361 | 362 | #ifdef __GNUC__ 363 | DEF_GOAL (MORE_EVEN, 2, "more_even", ({ r = (v0 & -v0) > (v1 & -v1) ; })) 364 | #endif /* __GNUC__ */ 365 | DEF_GOAL (CLEAR_LSB, 1, "clear_lsb", { r = v0 & ~(v0 & -v0); }) 366 | DEF_GOAL (CLEAR_LSB2, 2, "clear_lsb2", { r = v1 & ~(v0 & -v0); }) 367 | 368 | #if 0 369 | DEF_GOAL (UDIV, 2, "udiv", { r = v0 / v1; }) 370 | #endif 371 | 372 | /* After "v0 = v1 + v2", compute if we got signed overflow. */ 373 | DEF_GOAL (OVERFLOW_AFTER_ADD, 3, "add_ovfl", { r = (signed_word) (~(v1 ^ v2) & (v0 ^ v1)) < 0; }) 374 | 375 | /* The most and least significant bits of a double word left shift. */ 376 | DEF_GOAL (DBL_SHIFT_HI, 3, "dbl_shift_hi", 377 | { r = (v2 >= BITS_PER_WORD 378 | ? v0 << (v2 - BITS_PER_WORD) 379 | : (v1 << v2) | (v0 >> (BITS_PER_WORD - v2))); }) 380 | DEF_GOAL (DBL_SHIFT_LO, 2, "dbl_shift_lo", 381 | { r = (v1 >= BITS_PER_WORD 382 | ? 0 383 | : v0 << v1); }) 384 | DEF_GOAL (DBL_SHIFT_HI_TRUNC, 3, "dbl_shift_hi_trunc", 385 | { r = ((v2 & 2*BITS_PER_WORD-1) >= BITS_PER_WORD 386 | ? v0 << ((v2 & 2*BITS_PER_WORD-1) - BITS_PER_WORD) 387 | : (v1 << (v2 & 2*BITS_PER_WORD-1)) | (v0 >> (BITS_PER_WORD - (v2 & 2*BITS_PER_WORD-1)))); }) 388 | DEF_GOAL (DBL_SHIFT_LO_TRUNC, 2, "dbl_shift_lo_trunc", 389 | { r = ((v1 & 2*BITS_PER_WORD-1) >= BITS_PER_WORD 390 | ? 0 391 | : v0 << (v1 & 2*BITS_PER_WORD-1)); }) 392 | 393 | /* 394 | Local variables: 395 | mode:c 396 | version-control: t 397 | End: 398 | */ 399 | -------------------------------------------------------------------------------- /hashtable.c: -------------------------------------------------------------------------------- 1 | 2 | #define HASHTABLE_BUCKETS (1024*8) 3 | 4 | 5 | struct hashtable_entry_t { 6 | word *values; 7 | int n_values; 8 | int carry; 9 | int cost; 10 | struct hashtable_entry_t *next; 11 | }; 12 | 13 | typedef struct hashtable_entry_t hashtable_entry; 14 | 15 | hashtable_entry hashtable[HASHTABLE_BUCKETS]; 16 | 17 | void hashtable_init(); 18 | void hashtable_insert(word *values, int n_values, int carry, int cost); 19 | int hashtable_find(word *values, int n_values, int carry); 20 | void hashtable_free(); 21 | 22 | void hashtable_init() 23 | { 24 | int i; 25 | 26 | for (i = 0; i < HASHTABLE_BUCKETS; ++i) 27 | { 28 | hashtable[i].values = NULL; 29 | hashtable[i].n_values = -1; 30 | hashtable[i].carry = 0; 31 | hashtable[i].next = NULL; 32 | } 33 | } 34 | 35 | unsigned hash(word *values, int n_values, int carry) 36 | { 37 | unsigned v = 0; 38 | int i; 39 | 40 | for (i = 0; i < n_values; ++i) 41 | { 42 | v += values[i] * (i+1) * carry; 43 | } 44 | 45 | return v % HASHTABLE_BUCKETS; 46 | } 47 | 48 | int compare(word *values_1, int n_values_1, int carry_1, word *values_2, int n_values_2, int carry_2) 49 | { 50 | int i; 51 | 52 | if (n_values_1 != n_values_2 || carry_1 != carry_2) 53 | return 0; 54 | 55 | return memcmp(values_1, values_2, n_values_1*sizeof(word))==0; 56 | } 57 | 58 | void hashtable_insert(word *values, int n_values, int carry, int cost) 59 | { 60 | unsigned h = hash(values, n_values, carry); 61 | hashtable_entry *e = &hashtable[h]; 62 | int exists = 0; 63 | 64 | while(1) 65 | { 66 | if (compare(values, n_values, carry, e->values, e->n_values, e->carry)) 67 | { 68 | if (cost > e->cost) 69 | e->cost = cost; 70 | return; 71 | } 72 | 73 | if(!e->next) 74 | { 75 | e->next = malloc(sizeof(hashtable_entry)); 76 | e = e->next; 77 | e->cost = cost; 78 | e->n_values = n_values; 79 | e->values = malloc(sizeof(word)*n_values); 80 | e->carry = carry; 81 | memcpy(e->values, values, sizeof(word)*n_values); 82 | e->next = NULL; 83 | return; 84 | } 85 | 86 | e = e->next; 87 | } 88 | } 89 | 90 | int hashtable_find(word *values, int n_values, int carry) 91 | { 92 | unsigned h = hash(values, n_values, carry); 93 | hashtable_entry *e = &hashtable[h]; 94 | int exists = 0; 95 | 96 | while(1) 97 | { 98 | if (compare(values, n_values, carry, e->values, e->n_values, e->carry)) 99 | { 100 | return e->cost; 101 | } 102 | 103 | if(!e->next) 104 | { 105 | return 0; 106 | } 107 | 108 | e = e->next; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /insn.def: -------------------------------------------------------------------------------- 1 | /* This file contains the definitions and documentation for the instructions 2 | used in the Superoptimizer. 3 | 4 | Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. 5 | 6 | This program is free software; you can redistribute it and/or modify it 7 | under the terms of the GNU General Public License as published by the 8 | Free Software Foundation; either version 2, or (at your option) any 9 | later version. 10 | 11 | This program is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License along 17 | with this program; see the file COPYING. If not, write to the Free 18 | Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ 19 | 20 | /* The fields in the cpp macro call "DEF_INSN()" are: 21 | 22 | 1. The internal name of the instruction. 23 | 24 | 2. The class of the instruction. These are stored in the insn_class 25 | array and are accessed via the GET_INSN_CLASS macro. They are defined 26 | as follows: 27 | 28 | "1" an rtx code for a unary arithmetic expression (e.g, ABSVAL, CLZ) 29 | "c" an rtx code for a commutative binary operation (e.g, ADD, XOR) 30 | "2" an rtx code for a non-commutative binary operation (e.g., SUB, CMP) 31 | "<" an rtx code for a non-commutative binary instruction that only 32 | writes to the condition code 33 | "=" an rtx code for a commutative binary instruction that only writes 34 | to the condition code 35 | "x" everything else 36 | 37 | 3. The printed name of the instruction. 38 | 39 | */ 40 | 41 | /* Arithmetic insns. d = r1 + r2 [+ cy] */ 42 | DEF_INSN (ADD, 'b', "add") 43 | DEF_INSN (ADD_CI, 'b', "add_ci") 44 | DEF_INSN (ADD_CO, 'b', "add_co") 45 | DEF_INSN (ADD_CIO, 'b', "add_cio") 46 | /* Arithmetic insns. d = r1 - r2 [- cy] */ 47 | DEF_INSN (SUB, '2', "sub") 48 | DEF_INSN (SUB_CI, '2', "sub_ci") 49 | DEF_INSN (SUB_CO, '2', "sub_co") 50 | DEF_INSN (SUB_CIO, '2', "sub_cio") 51 | /* Arithmetic insns. d = r1 + ~r2 + cy/1 */ 52 | DEF_INSN (ADC_CI, '2', "adc_ci") 53 | DEF_INSN (ADC_CO, '2', "adc_co") 54 | DEF_INSN (ADC_CIO, '2', "adc_cio") 55 | /* Arithmetic insns. d = r1 + ~r2 */ 56 | DEF_INSN (ADDCMPL, '2', "addcmpl") 57 | /* XCore address arithmetic. */ 58 | DEF_INSN (LDA16F, '2', "lda16f") 59 | DEF_INSN (LDA16B, '2', "lda16b") 60 | DEF_INSN (LDAWF, '2', "ldawf") 61 | DEF_INSN (LDAWB, '2', "ldawb") 62 | 63 | /* Logical insns that don't affect the carry flag. */ 64 | DEF_INSN (AND, 'b', "and") 65 | DEF_INSN (IOR, 'b', "ior") 66 | DEF_INSN (XOR, 'b', "xor") 67 | DEF_INSN (ANDC, '2', "andc") 68 | DEF_INSN (IORC, '2', "iorc") 69 | DEF_INSN (EQV, '2', "eqv") 70 | DEF_INSN (NAND, 'b', "nand") 71 | DEF_INSN (NOR, 'b', "nor") 72 | 73 | /* Logical insns that reset the carry flag. */ 74 | DEF_INSN (AND_RC, 'b', "and_rc") 75 | DEF_INSN (IOR_RC, 'b', "ior_rc") 76 | DEF_INSN (XOR_RC, 'b', "xor_rc") 77 | DEF_INSN (ANDC_RC, '2', "andc_rc") 78 | DEF_INSN (IORC_RC, '2', "iorc_rc") 79 | DEF_INSN (EQV_RC, '2', "eqv_rc") 80 | DEF_INSN (NAND_RC, 'b', "nand_rc") 81 | DEF_INSN (NOR_RC, 'b', "nor_rc") 82 | 83 | /* Logical insns that clobber the carry flag. */ 84 | DEF_INSN (AND_CC, 'b', "and_cc") 85 | DEF_INSN (IOR_CC, 'b', "ior_cc") 86 | DEF_INSN (XOR_CC, 'b', "xor_cc") 87 | DEF_INSN (ANDC_CC, '2', "andc_cc") 88 | DEF_INSN (IORC_CC, '2', "iorc_cc") 89 | DEF_INSN (EQV_CC, '2', "eqv_cc") 90 | DEF_INSN (NAND_CC, 'b', "nand_cc") 91 | DEF_INSN (NOR_CC, 'b', "nor_cc") 92 | 93 | /* Bit shift and count. */ 94 | DEF_INSN (LSHIFTR, '2', "log_shift_right") 95 | DEF_INSN (ASHIFTR, '2', "arith_shift_right") 96 | DEF_INSN (SHIFTL, '2', "shift_left") 97 | DEF_INSN (LSHIFTR_CO, '2', "log_shift_right_co") 98 | DEF_INSN (ASHIFTR_CO, '2', "arith_shift_right_co") 99 | DEF_INSN (SHIFTL_CO, '2', "shift_left_co") 100 | DEF_INSN (ROTATEL, '2', "rotate_left") 101 | DEF_INSN (ROTATEL_CO, '2', "rotate_left_co") 102 | DEF_INSN (ROTATER_CO, '2', "rotate_right_co") 103 | DEF_INSN (SHIFTL_NT, '2', "shiftl_nt") 104 | DEF_INSN (LSHIFTR_NT, '2', "lshiftr_nt") 105 | DEF_INSN (ASHIFTR_NT, '2', "ashiftr_nt") 106 | 107 | /* Rotate thru carry. */ 108 | DEF_INSN (ROTATEXL_CIO, '2', "rotate_thru_carry_left_co") 109 | DEF_INSN (ROTATEXR_CIO, '2', "rotate_thru_carry_right_co") 110 | 111 | /* Shift arithmetic right instruction and set carry iff the shifted 112 | operand is negative and any bit shifted out is 1. */ 113 | DEF_INSN (ASHIFTR_CON, '2', "arith_shift_right_set_carry_if_neg_and_nzbit_lost") 114 | 115 | /* Extract. */ 116 | DEF_INSN (EXTS1, '2', "exts1") 117 | DEF_INSN (EXTS2, '2', "exts2") 118 | DEF_INSN (EXTS8, '2', "exts8") 119 | DEF_INSN (EXTS16, '2', "exts16") 120 | DEF_INSN (EXTU1, '2', "extu1") 121 | DEF_INSN (EXTU2, '2', "extu2") 122 | 123 | /* XCore extend instructions. */ 124 | DEF_INSN (ZEXT, '2', "zext") 125 | DEF_INSN (SEXT, '2', "sext") 126 | 127 | /* Count leading/trailing zero instructions. */ 128 | DEF_INSN (CLZ, '1', "clz") 129 | DEF_INSN (CTZ, '1', "ctz") 130 | DEF_INSN (FF1, '1', "ff1") 131 | DEF_INSN (FF0, '1', "ff0") 132 | DEF_INSN (BSF86, '1', "bsf86") 133 | 134 | DEF_INSN (ABSVAL, '1', "abs") 135 | DEF_INSN (NABSVAL, '1', "nabs") 136 | DEF_INSN (CMP, '<', "cmp") 137 | DEF_INSN (CMPC, '<', "cmpc") 138 | 139 | /* XCore bit/byte reverse instructions. */ 140 | DEF_INSN (BITREV, '1', "bitrev") 141 | DEF_INSN (BYTEREV, '1', "byterev") 142 | 143 | /* 29k CMP instructions. */ 144 | DEF_INSN (CPEQ, 'c', "cpeq") 145 | DEF_INSN (CPGE, '2', "cpge") 146 | DEF_INSN (CPGEU, '2', "cpgeu") 147 | DEF_INSN (CPGT, '2', "cpgt") 148 | DEF_INSN (CPGTU, '2', "cpgtu") 149 | DEF_INSN (CPLE, '2', "cple") 150 | DEF_INSN (CPLEU, '2', "cpleu") 151 | DEF_INSN (CPLT, '2', "cplt") 152 | DEF_INSN (CPLTU, '2', "cpltu") 153 | DEF_INSN (CPNEQ, 'c', "cpneq") 154 | 155 | /* Alpha/XCore CMP instructions. */ 156 | DEF_INSN (CMPEQ, 'c', "cmpeq") 157 | DEF_INSN (CMPLE, '2', "cmple") 158 | DEF_INSN (CMPLEU, '2', "cmpleu") 159 | DEF_INSN (CMPLT, '2', "cmplt") 160 | DEF_INSN (CMPLTU, '2', "cmpltu") 161 | 162 | /* 88100 CMP instruction. */ 163 | DEF_INSN (CMPPAR, '2', "cmppar") 164 | 165 | /* SH CMP instructions. */ 166 | DEF_INSN (CYEQ, '=', "cyeq") 167 | DEF_INSN (CYGTU, '<', "cygtu") 168 | DEF_INSN (CYGEU, '<', "cygeu") 169 | DEF_INSN (CYGTS, '<', "cygts") 170 | DEF_INSN (CYGES, '<', "cyges") 171 | DEF_INSN (CYAND, '=', "cyand") 172 | 173 | /* SH specific instruction. */ 174 | DEF_INSN (MERGE16, '2', "merge16") 175 | DEF_INSN (DECR_CYEQ, '2', "decr_cyeq") 176 | 177 | /* XCore specific instruction. */ 178 | DEF_INSN (MKMSK, '1', "mkmsk") 179 | 180 | /* Difference-or-zero (rs6000) */ 181 | DEF_INSN (DOZ, '2', "difference_or_zero") 182 | 183 | DEF_INSN (COPY, '1', "copy") 184 | DEF_INSN (EXCHANGE, '2', "exchange") 185 | 186 | /* Set, clear, complement carry */ 187 | DEF_INSN (SETCY, 'x', "set_cy") 188 | DEF_INSN (CLRCY, 'x', "clear_cy") 189 | DEF_INSN (COMCY, 'x', "complement_cy") 190 | 191 | /* Alpha conditional move */ 192 | DEF_INSN (CMOVEQ, '2', "cmoveq") 193 | DEF_INSN (CMOVNE, '2', "cmovne") 194 | DEF_INSN (CMOVLT, '2', "cmovlt") 195 | DEF_INSN (CMOVGE, '2', "cmovge") 196 | DEF_INSN (CMOVLE, '2', "cmovle") 197 | DEF_INSN (CMOVGT, '2', "cmovgt") 198 | 199 | DEF_INSN (INVDIV, '2', "invdiv") 200 | DEF_INSN (INVMOD, '2', "invmod") 201 | DEF_INSN (UMULWIDEN_HI, '2', "umulwiden_hi") 202 | DEF_INSN (MUL, '2', "mul") 203 | 204 | #if UDIV_WITH_SDIV 205 | DEF_INSN (SDIV, '2', "sdiv") 206 | #endif 207 | 208 | DEF_INSN (ADD_SEQ, '2', "add_seq") 209 | DEF_INSN (ADD_SNE, '2', "add_sne") 210 | DEF_INSN (ADD_SLTS, '2', "add_slts") 211 | DEF_INSN (ADD_SGES, '2', "add_sges") 212 | DEF_INSN (ADD_SLES, '2', "add_sles") 213 | DEF_INSN (ADD_SGTS, '2', "add_sgts") 214 | DEF_INSN (ADD_SLTU, '2', "add_sltu") 215 | DEF_INSN (ADD_SGEU, '2', "add_sgeu") 216 | DEF_INSN (ADD_SLEU, '2', "add_sleu") 217 | DEF_INSN (ADD_SGTU, '2', "add_sgtu") 218 | DEF_INSN (ADD_SOVS, '2', "add_sovs") 219 | DEF_INSN (ADD_SNVS, '2', "add_snvs") 220 | DEF_INSN (ADD_SODD, '2', "add_sodd") 221 | DEF_INSN (ADD_SEVN, '2', "add_sevn") 222 | DEF_INSN (ADD_S, '2', "add_s") 223 | DEF_INSN (ADD_CIO_SEQ, '2', "add_cio_seq") 224 | DEF_INSN (ADD_CIO_SNE, '2', "add_cio_sne") 225 | DEF_INSN (ADD_CIO_SLTU, '2', "add_cio_sltu") 226 | DEF_INSN (ADD_CIO_SGEU, '2', "add_cio_sgeu") 227 | DEF_INSN (ADD_CIO_SLEU, '2', "add_cio_sleu") 228 | DEF_INSN (ADD_CIO_SGTU, '2', "add_cio_sgtu") 229 | DEF_INSN (ADD_CIO_SODD, '2', "add_cio_sodd") 230 | DEF_INSN (ADD_CIO_SEVN, '2', "add_cio_sevn") 231 | DEF_INSN (ADD_CIO_S, '2', "add_cio_s") 232 | DEF_INSN (ADD_CO_SEQ, '2', "add_co_seq") 233 | DEF_INSN (ADD_CO_SNE, '2', "add_co_sne") 234 | DEF_INSN (ADD_CO_SLTU, '2', "add_co_sltu") 235 | DEF_INSN (ADD_CO_SGEU, '2', "add_co_sgeu") 236 | DEF_INSN (ADD_CO_SLEU, '2', "add_co_sleu") 237 | DEF_INSN (ADD_CO_SGTU, '2', "add_co_sgtu") 238 | DEF_INSN (ADD_CO_SODD, '2', "add_co_sodd") 239 | DEF_INSN (ADD_CO_SEVN, '2', "add_co_sevn") 240 | DEF_INSN (ADD_CO_S, '2', "add_co_s") 241 | 242 | DEF_INSN (SUB_SEQ, '2', "sub_seq") 243 | DEF_INSN (SUB_SNE, '2', "sub_sne") 244 | DEF_INSN (SUB_SLTS, '2', "sub_slts") 245 | DEF_INSN (SUB_SGES, '2', "sub_sges") 246 | DEF_INSN (SUB_SLES, '2', "sub_sles") 247 | DEF_INSN (SUB_SGTS, '2', "sub_sgts") 248 | DEF_INSN (SUB_SODD, '2', "sub_sodd") 249 | DEF_INSN (SUB_SEVN, '2', "sub_sevn") 250 | DEF_INSN (SUB_S, '2', "sub_s") 251 | 252 | DEF_INSN (ADC_CIO_SEQ, '2', "adc_cio_seq") 253 | DEF_INSN (ADC_CIO_SNE, '2', "adc_cio_sne") 254 | DEF_INSN (ADC_CIO_SLTU, '2', "adc_cio_sltu") 255 | DEF_INSN (ADC_CIO_SGEU, '2', "adc_cio_sgeu") 256 | DEF_INSN (ADC_CIO_SLEU, '2', "adc_cio_sleu") 257 | DEF_INSN (ADC_CIO_SGTU, '2', "adc_cio_sgtu") 258 | DEF_INSN (ADC_CIO_SODD, '2', "adc_cio_sodd") 259 | DEF_INSN (ADC_CIO_SEVN, '2', "adc_cio_sevn") 260 | DEF_INSN (ADC_CIO_S, '2', "adc_cio_s") 261 | DEF_INSN (ADC_CO_SEQ, '2', "adc_co_seq") 262 | DEF_INSN (ADC_CO_SNE, '2', "adc_co_sne") 263 | DEF_INSN (ADC_CO_SLTU, '2', "adc_co_sltu") 264 | DEF_INSN (ADC_CO_SGEU, '2', "adc_co_sgeu") 265 | DEF_INSN (ADC_CO_SLEU, '2', "adc_co_sleu") 266 | DEF_INSN (ADC_CO_SGTU, '2', "adc_co_sgtu") 267 | DEF_INSN (ADC_CO_SODD, '2', "adc_co_sodd") 268 | DEF_INSN (ADC_CO_SEVN, '2', "adc_co_sevn") 269 | DEF_INSN (ADC_CO_S, '2', "adc_co_s") 270 | 271 | DEF_INSN (COMCLR_SEQ, '2', "comclr_seq") 272 | DEF_INSN (COMCLR_SNE, '2', "comclr_sne") 273 | DEF_INSN (COMCLR_SLTS, '2', "comclr_slts") 274 | DEF_INSN (COMCLR_SGES, '2', "comclr_sges") 275 | DEF_INSN (COMCLR_SLES, '2', "comclr_sles") 276 | DEF_INSN (COMCLR_SGTS, '2', "comclr_sgts") 277 | DEF_INSN (COMCLR_SLTU, '2', "comclr_sltu") 278 | DEF_INSN (COMCLR_SGEU, '2', "comclr_sgeu") 279 | DEF_INSN (COMCLR_SLEU, '2', "comclr_sleu") 280 | DEF_INSN (COMCLR_SGTU, '2', "comclr_sgtu") 281 | DEF_INSN (COMCLR_SODD, '2', "comclr_sodd") 282 | DEF_INSN (COMCLR_SEVN, '2', "comclr_sevn") 283 | /* DEF_INSN (COMCLR_S, '2', "comclr_s") */ 284 | 285 | DEF_INSN (AND_SEQ, '2', "and_seq") 286 | DEF_INSN (AND_SNE, '2', "and_sne") 287 | DEF_INSN (AND_SLTS, '2', "and_slts") 288 | DEF_INSN (AND_SGES, '2', "and_sges") 289 | DEF_INSN (AND_SLES, '2', "and_sles") 290 | DEF_INSN (AND_SGTS, '2', "and_sgts") 291 | DEF_INSN (AND_SODD, '2', "and_sodd") 292 | DEF_INSN (AND_SEVN, '2', "and_sevn") 293 | DEF_INSN (AND_S, '2', "and_s") 294 | DEF_INSN (IOR_SEQ, '2', "ior_seq") 295 | DEF_INSN (IOR_SNE, '2', "ior_sne") 296 | DEF_INSN (IOR_SLTS, '2', "ior_slts") 297 | DEF_INSN (IOR_SGES, '2', "ior_sges") 298 | DEF_INSN (IOR_SLES, '2', "ior_sles") 299 | DEF_INSN (IOR_SGTS, '2', "ior_sgts") 300 | DEF_INSN (IOR_SODD, '2', "ior_sodd") 301 | DEF_INSN (IOR_SEVN, '2', "ior_sevn") 302 | DEF_INSN (IOR_S, '2', "ior_s") 303 | DEF_INSN (XOR_SEQ, '2', "xor_seq") 304 | DEF_INSN (XOR_SNE, '2', "xor_sne") 305 | DEF_INSN (XOR_SLTS, '2', "xor_slts") 306 | DEF_INSN (XOR_SGES, '2', "xor_sges") 307 | DEF_INSN (XOR_SLES, '2', "xor_sles") 308 | DEF_INSN (XOR_SGTS, '2', "xor_sgts") 309 | DEF_INSN (XOR_SODD, '2', "xor_sodd") 310 | DEF_INSN (XOR_SEVN, '2', "xor_sevn") 311 | DEF_INSN (XOR_S, '2', "xor_s") 312 | DEF_INSN (ANDC_SEQ, '2', "andc_seq") 313 | DEF_INSN (ANDC_SNE, '2', "andc_sne") 314 | DEF_INSN (ANDC_SLTS, '2', "andc_slts") 315 | DEF_INSN (ANDC_SGES, '2', "andc_sges") 316 | DEF_INSN (ANDC_SLES, '2', "andc_sles") 317 | DEF_INSN (ANDC_SGTS, '2', "andc_sgts") 318 | DEF_INSN (ANDC_SODD, '2', "andc_sodd") 319 | DEF_INSN (ANDC_SEVN, '2', "andc_sevn") 320 | DEF_INSN (ANDC_S, '2', "andc_s") 321 | 322 | /* Bit shift and count. */ 323 | DEF_INSN (LSHIFTR_S, '2', "log_shift_right_s") 324 | DEF_INSN (ASHIFTR_S, '2', "arith_shift_right_s") 325 | DEF_INSN (SHIFTL_S, '2', "shift_left_s") 326 | DEF_INSN (ROTATEL_S, '2', "rotate_left_s") 327 | 328 | /* Extract. */ 329 | DEF_INSN (EXTS1_S, '2', "exts1_s") 330 | DEF_INSN (EXTS2_S, '2', "exts2_s") 331 | DEF_INSN (EXTS8_S, '2', "exts8_s") 332 | DEF_INSN (EXTS16_S, '2', "exts16_s") 333 | DEF_INSN (EXTU1_S, '2', "extu1_s") 334 | DEF_INSN (EXTU2_S, '2', "extu2_s") 335 | 336 | DEF_INSN (COPY_S, '1', "copy_s") 337 | 338 | 339 | /* Inte 960 specific instructions. */ 340 | DEF_INSN (ADDC_960, '2', "addc_960") 341 | DEF_INSN (SUBC_960, '2', "subc_960") 342 | 343 | DEF_INSN (SEL_NO_960, '2', "sel_no_960") 344 | DEF_INSN (SEL_G_960, '2', "sel_g_960") 345 | DEF_INSN (SEL_E_960, '2', "sel_e_960") 346 | DEF_INSN (SEL_GE_960, '2', "sel_ge_960") 347 | DEF_INSN (SEL_L_960, '2', "sel_l_960") 348 | DEF_INSN (SEL_NE_960, '2', "sel_ne_960") 349 | DEF_INSN (SEL_LE_960, '2', "sel_le_960") 350 | DEF_INSN (SEL_O_960, '2', "sel_o_960") 351 | 352 | DEF_INSN (CONCMPO_960, '<', "concmpo_960") 353 | DEF_INSN (CONCMPI_960, '<', "concmpi_960") 354 | DEF_INSN (CMPO_960, '<', "cmpo_960") 355 | DEF_INSN (CMPI_960, '<', "cmpi_960") 356 | DEF_INSN (ADDO_NO_960, '2', "addo_no_960") 357 | DEF_INSN (ADDO_G_960, '2', "addo_g_960") 358 | DEF_INSN (ADDO_E_960, '2', "addo_e_960") 359 | DEF_INSN (ADDO_GE_960, '2', "addo_ge_960") 360 | DEF_INSN (ADDO_L_960, '2', "addo_l_960") 361 | DEF_INSN (ADDO_NE_960, '2', "addo_ne_960") 362 | DEF_INSN (ADDO_LE_960, '2', "addo_le_960") 363 | DEF_INSN (ADDO_O_960, '2', "addo_o_960") 364 | DEF_INSN (SUBO_NO_960, '2', "subo_no_960") 365 | DEF_INSN (SUBO_G_960, '2', "subo_g_960") 366 | DEF_INSN (SUBO_E_960, '2', "subo_e_960") 367 | DEF_INSN (SUBO_GE_960, '2', "subo_ge_960") 368 | DEF_INSN (SUBO_L_960, '2', "subo_l_960") 369 | DEF_INSN (SUBO_NE_960, '2', "subo_ne_960") 370 | DEF_INSN (SUBO_LE_960, '2', "subo_le_960") 371 | DEF_INSN (SUBO_O_960, '2', "subo_o_960") 372 | 373 | DEF_INSN (ALTERBIT, '2', "alterbit") 374 | DEF_INSN (SETBIT, '2', "setbit") 375 | DEF_INSN (CLRBIT, '2', "clrbit") 376 | DEF_INSN (CHKBIT, '<', "chkbit") 377 | DEF_INSN (NOTBIT, '2', "notbit") 378 | 379 | /* 380 | Local variables: 381 | mode:c 382 | version-control: t 383 | End: 384 | */ 385 | -------------------------------------------------------------------------------- /longlong.h: -------------------------------------------------------------------------------- 1 | /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. 2 | 3 | Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc. 4 | 5 | This file is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU Library General Public License as published by 7 | the Free Software Foundation; either version 2 of the License, or (at your 8 | option) any later version. 9 | 10 | This file is distributed in the hope that it will be useful, but 11 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 13 | License for more details. 14 | 15 | You should have received a copy of the GNU Library General Public License 16 | along with this file; see the file COPYING.LIB. If not, write to 17 | the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ 18 | 19 | /* You have to define the following before including this file: 20 | 21 | UWtype -- An unsigned type, default type for operations (typically a "word") 22 | UHWtype -- An unsigned type, at least half the size of UWtype. 23 | UDWtype -- An unsigned type, at least twice as large a UWtype 24 | W_TYPE_SIZE -- size in bits of UWtype 25 | 26 | SItype, USItype -- Signed and unsigned 32 bit types. 27 | DItype, UDItype -- Signed and unsigned 64 bit types. 28 | 29 | On a 32 bit machine UWtype should typically be USItype; 30 | on a 64 bit machine, UWtype should typically be UDItype. 31 | */ 32 | 33 | #define __BITS4 (W_TYPE_SIZE / 4) 34 | #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 35 | #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 36 | #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 37 | 38 | /* Define auxiliary asm macros. 39 | 40 | 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two 41 | UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype 42 | word product in HIGH_PROD and LOW_PROD. 43 | 44 | 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a 45 | UDWtype product. This is just a variant of umul_ppmm. 46 | 47 | 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 48 | denominator) divides a UDWtype, composed by the UWtype integers 49 | HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient 50 | in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less 51 | than DENOMINATOR for correct operation. If, in addition, the most 52 | significant bit of DENOMINATOR must be 1, then the pre-processor symbol 53 | UDIV_NEEDS_NORMALIZATION is defined to 1. 54 | 55 | 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 56 | denominator). Like udiv_qrnnd but the numbers are signed. The quotient 57 | is rounded towards 0. 58 | 59 | 5) count_leading_zeros(count, x) counts the number of zero-bits from the 60 | msb to the first non-zero bit in the UWtype X. This is the number of 61 | steps X needs to be shifted left to set the msb. Undefined for X == 0, 62 | unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 63 | 64 | 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts 65 | from the least significant end. 66 | 67 | 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 68 | high_addend_2, low_addend_2) adds two UWtype integers, composed by 69 | HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 70 | respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow 71 | (i.e. carry out) is not stored anywhere, and is lost. 72 | 73 | 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, 74 | high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, 75 | composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and 76 | LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE 77 | and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, 78 | and is lost. 79 | 80 | If any of these macros are left undefined for a particular CPU, 81 | C macros are used. */ 82 | 83 | /* The CPUs come in alphabetical order below. 84 | 85 | Please add support for more CPUs here, or improve the current support 86 | for the CPUs below! */ 87 | 88 | #if defined (__GNUC__) && !defined (NO_ASM) 89 | 90 | /* We sometimes need to clobber "cc" with gcc2, but that would not be 91 | understood by gcc1. Use cpp to avoid major code duplication. */ 92 | #if __GNUC__ < 2 93 | #define __CLOBBER_CC 94 | #define __AND_CLOBBER_CC 95 | #else /* __GNUC__ >= 2 */ 96 | #define __CLOBBER_CC : "cc" 97 | #define __AND_CLOBBER_CC , "cc" 98 | #endif /* __GNUC__ < 2 */ 99 | 100 | #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 101 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 102 | __asm__ ("add %1,%4,%5 \ 103 | addc %0,%2,%3" \ 104 | : "=r" ((USItype)(sh)), \ 105 | "=&r" ((USItype)(sl)) \ 106 | : "%r" ((USItype)(ah)), \ 107 | "rI" ((USItype)(bh)), \ 108 | "%r" ((USItype)(al)), \ 109 | "rI" ((USItype)(bl))) 110 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 111 | __asm__ ("sub %1,%4,%5 \ 112 | subc %0,%2,%3" \ 113 | : "=r" ((USItype)(sh)), \ 114 | "=&r" ((USItype)(sl)) \ 115 | : "r" ((USItype)(ah)), \ 116 | "rI" ((USItype)(bh)), \ 117 | "r" ((USItype)(al)), \ 118 | "rI" ((USItype)(bl))) 119 | #define umul_ppmm(xh, xl, m0, m1) \ 120 | do { \ 121 | USItype __m0 = (m0), __m1 = (m1); \ 122 | __asm__ ("multiplu %0,%1,%2" \ 123 | : "=r" ((USItype)(xl)) \ 124 | : "r" (__m0), \ 125 | "r" (__m1)); \ 126 | __asm__ ("multmu %0,%1,%2" \ 127 | : "=r" ((USItype)(xh)) \ 128 | : "r" (__m0), \ 129 | "r" (__m1)); \ 130 | } while (0) 131 | #define udiv_qrnnd(q, r, n1, n0, d) \ 132 | __asm__ ("dividu %0,%3,%4" \ 133 | : "=r" ((USItype)(q)), \ 134 | "=q" ((USItype)(r)) \ 135 | : "1" ((USItype)(n1)), \ 136 | "r" ((USItype)(n0)), \ 137 | "r" ((USItype)(d))) 138 | #define count_leading_zeros(count, x) \ 139 | __asm__ ("clz %0,%1" \ 140 | : "=r" ((USItype)(count)) \ 141 | : "r" ((USItype)(x))) 142 | #endif /* __a29k__ */ 143 | 144 | #if defined (__alpha__) && W_TYPE_SIZE == 64 145 | #define umul_ppmm(ph, pl, m0, m1) \ 146 | do { \ 147 | UDItype __m0 = (m0), __m1 = (m1); \ 148 | __asm__ ("umulh %r1,%2,%0" \ 149 | : "=r" ((UDItype) ph) \ 150 | : "%rJ" (__m0), \ 151 | "rI" (__m1)); \ 152 | (pl) = __m0 * __m1; \ 153 | } while (0) 154 | #define UMUL_TIME 46 155 | #ifndef LONGLONG_STANDALONE 156 | #define udiv_qrnnd(q, r, n1, n0, d) \ 157 | do { UDItype __r; \ 158 | (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ 159 | (r) = __r; \ 160 | } while (0) 161 | extern UDItype __udiv_qrnnd (); 162 | #define UDIV_TIME 220 163 | #endif /* LONGLONG_STANDALONE */ 164 | #endif /* __alpha__ */ 165 | 166 | #if defined (__arm__) && W_TYPE_SIZE == 32 167 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 168 | __asm__ ("adds %1, %4, %5 \ 169 | adc %0, %2, %3" \ 170 | : "=r" ((USItype)(sh)), \ 171 | "=&r" ((USItype)(sl)) \ 172 | : "%r" ((USItype)(ah)), \ 173 | "rI" ((USItype)(bh)), \ 174 | "%r" ((USItype)(al)), \ 175 | "rI" ((USItype)(bl))) 176 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 177 | __asm__ ("subs %1, %4, %5 \ 178 | sbc %0, %2, %3" \ 179 | : "=r" ((USItype)(sh)), \ 180 | "=&r" ((USItype)(sl)) \ 181 | : "r" ((USItype)(ah)), \ 182 | "rI" ((USItype)(bh)), \ 183 | "r" ((USItype)(al)), \ 184 | "rI" ((USItype)(bl))) 185 | #define umul_ppmm(xh, xl, a, b) \ 186 | __asm__ ("%@ Inlined umul_ppmm \ 187 | mov %|r0, %2, lsr #16 \ 188 | mov %|r2, %3, lsr #16 \ 189 | bic %|r1, %2, %|r0, lsl #16 \ 190 | bic %|r2, %3, %|r2, lsl #16 \ 191 | mul %1, %|r1, %|r2 \ 192 | mul %|r2, %|r0, %|r2 \ 193 | mul %|r1, %0, %|r1 \ 194 | mul %0, %|r0, %0 \ 195 | adds %|r1, %|r2, %|r1 \ 196 | addcs %0, %0, #65536 \ 197 | adds %1, %1, %|r1, lsl #16 \ 198 | adc %0, %0, %|r1, lsr #16" \ 199 | : "=&r" ((USItype)(xh)), \ 200 | "=r" ((USItype)(xl)) \ 201 | : "r" ((USItype)(a)), \ 202 | "r" ((USItype)(b)) \ 203 | : "r0", "r1", "r2") 204 | #define UMUL_TIME 20 205 | #define UDIV_TIME 100 206 | #endif /* __arm__ */ 207 | 208 | #if defined (__clipper__) && W_TYPE_SIZE == 32 209 | #define umul_ppmm(w1, w0, u, v) \ 210 | ({union {UDItype __ll; \ 211 | struct {USItype __l, __h;} __i; \ 212 | } __xx; \ 213 | __asm__ ("mulwux %2,%0" \ 214 | : "=r" (__xx.__ll) \ 215 | : "%0" ((USItype)(u)), \ 216 | "r" ((USItype)(v))); \ 217 | (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) 218 | #define smul_ppmm(w1, w0, u, v) \ 219 | ({union {DItype __ll; \ 220 | struct {SItype __l, __h;} __i; \ 221 | } __xx; \ 222 | __asm__ ("mulwx %2,%0" \ 223 | : "=r" (__xx.__ll) \ 224 | : "%0" ((SItype)(u)), \ 225 | "r" ((SItype)(v))); \ 226 | (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) 227 | #define __umulsidi3(u, v) \ 228 | ({UDItype __w; \ 229 | __asm__ ("mulwux %2,%0" \ 230 | : "=r" (__w) \ 231 | : "%0" ((USItype)(u)), \ 232 | "r" ((USItype)(v))); \ 233 | __w; }) 234 | #endif /* __clipper__ */ 235 | 236 | #if defined (__gmicro__) && W_TYPE_SIZE == 32 237 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 238 | __asm__ ("add.w %5,%1 \ 239 | addx %3,%0" \ 240 | : "=g" ((USItype)(sh)), \ 241 | "=&g" ((USItype)(sl)) \ 242 | : "%0" ((USItype)(ah)), \ 243 | "g" ((USItype)(bh)), \ 244 | "%1" ((USItype)(al)), \ 245 | "g" ((USItype)(bl))) 246 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 247 | __asm__ ("sub.w %5,%1 \ 248 | subx %3,%0" \ 249 | : "=g" ((USItype)(sh)), \ 250 | "=&g" ((USItype)(sl)) \ 251 | : "0" ((USItype)(ah)), \ 252 | "g" ((USItype)(bh)), \ 253 | "1" ((USItype)(al)), \ 254 | "g" ((USItype)(bl))) 255 | #define umul_ppmm(ph, pl, m0, m1) \ 256 | __asm__ ("mulx %3,%0,%1" \ 257 | : "=g" ((USItype)(ph)), \ 258 | "=r" ((USItype)(pl)) \ 259 | : "%0" ((USItype)(m0)), \ 260 | "g" ((USItype)(m1))) 261 | #define udiv_qrnnd(q, r, nh, nl, d) \ 262 | __asm__ ("divx %4,%0,%1" \ 263 | : "=g" ((USItype)(q)), \ 264 | "=r" ((USItype)(r)) \ 265 | : "1" ((USItype)(nh)), \ 266 | "0" ((USItype)(nl)), \ 267 | "g" ((USItype)(d))) 268 | #define count_leading_zeros(count, x) \ 269 | __asm__ ("bsch/1 %1,%0" \ 270 | : "=g" (count) \ 271 | : "g" ((USItype)(x)), \ 272 | "0" ((USItype)0)) 273 | #endif 274 | 275 | #if defined (__hppa) && W_TYPE_SIZE == 32 276 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 277 | __asm__ ("add %4,%5,%1 \ 278 | addc %2,%3,%0" \ 279 | : "=r" ((USItype)(sh)), \ 280 | "=&r" ((USItype)(sl)) \ 281 | : "%rM" ((USItype)(ah)), \ 282 | "rM" ((USItype)(bh)), \ 283 | "%rM" ((USItype)(al)), \ 284 | "rM" ((USItype)(bl))) 285 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 286 | __asm__ ("sub %4,%5,%1 \ 287 | subb %2,%3,%0" \ 288 | : "=r" ((USItype)(sh)), \ 289 | "=&r" ((USItype)(sl)) \ 290 | : "rM" ((USItype)(ah)), \ 291 | "rM" ((USItype)(bh)), \ 292 | "rM" ((USItype)(al)), \ 293 | "rM" ((USItype)(bl))) 294 | #if defined (_PA_RISC1_1) 295 | #define umul_ppmm(wh, wl, u, v) \ 296 | do { \ 297 | union {UDItype __ll; \ 298 | struct {USItype __h, __l;} __i; \ 299 | } __xx; \ 300 | __asm__ ("xmpyu %1,%2,%0" \ 301 | : "=fx" (__xx.__ll) \ 302 | : "fx" ((USItype)(u)), \ 303 | "fx" ((USItype)(v))); \ 304 | (wh) = __xx.__i.__h; \ 305 | (wl) = __xx.__i.__l; \ 306 | } while (0) 307 | #define UMUL_TIME 8 308 | #define UDIV_TIME 60 309 | #else 310 | #define UMUL_TIME 40 311 | #define UDIV_TIME 80 312 | #endif 313 | #ifndef LONGLONG_STANDALONE 314 | #define udiv_qrnnd(q, r, n1, n0, d) \ 315 | do { USItype __r; \ 316 | (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ 317 | (r) = __r; \ 318 | } while (0) 319 | extern USItype __udiv_qrnnd (); 320 | #endif /* LONGLONG_STANDALONE */ 321 | #define count_leading_zeros(count, x) \ 322 | do { \ 323 | USItype __tmp; \ 324 | __asm__ ( \ 325 | "ldi 1,%0 \ 326 | extru,= %1,15,16,%%r0 ; Bits 31..16 zero? \ 327 | extru,tr %1,15,16,%1 ; No. Shift down, skip add. \ 328 | ldo 16(%0),%0 ; Yes. Perform add. \ 329 | extru,= %1,23,8,%%r0 ; Bits 15..8 zero? \ 330 | extru,tr %1,23,8,%1 ; No. Shift down, skip add. \ 331 | ldo 8(%0),%0 ; Yes. Perform add. \ 332 | extru,= %1,27,4,%%r0 ; Bits 7..4 zero? \ 333 | extru,tr %1,27,4,%1 ; No. Shift down, skip add. \ 334 | ldo 4(%0),%0 ; Yes. Perform add. \ 335 | extru,= %1,29,2,%%r0 ; Bits 3..2 zero? \ 336 | extru,tr %1,29,2,%1 ; No. Shift down, skip add. \ 337 | ldo 2(%0),%0 ; Yes. Perform add. \ 338 | extru %1,30,1,%1 ; Extract bit 1. \ 339 | sub %0,%1,%0 ; Subtract it. \ 340 | " : "=r" (count), "=r" (__tmp) : "1" (x)); \ 341 | } while (0) 342 | #endif 343 | 344 | #if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32 345 | #define umul_ppmm(xh, xl, m0, m1) \ 346 | do { \ 347 | union {UDItype __ll; \ 348 | struct {USItype __h, __l;} __i; \ 349 | } __xx; \ 350 | USItype __m0 = (m0), __m1 = (m1); \ 351 | __asm__ ("mr %0,%3" \ 352 | : "=r" (__xx.__i.__h), \ 353 | "=r" (__xx.__i.__l) \ 354 | : "%1" (__m0), \ 355 | "r" (__m1)); \ 356 | (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 357 | (xh) += ((((SItype) __m0 >> 31) & __m1) \ 358 | + (((SItype) __m1 >> 31) & __m0)); \ 359 | } while (0) 360 | #define smul_ppmm(xh, xl, m0, m1) \ 361 | do { \ 362 | union {DItype __ll; \ 363 | struct {USItype __h, __l;} __i; \ 364 | } __xx; \ 365 | __asm__ ("mr %0,%3" \ 366 | : "=r" (__xx.__i.__h), \ 367 | "=r" (__xx.__i.__l) \ 368 | : "%1" (m0), \ 369 | "r" (m1)); \ 370 | (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 371 | } while (0) 372 | #define sdiv_qrnnd(q, r, n1, n0, d) \ 373 | do { \ 374 | union {DItype __ll; \ 375 | struct {USItype __h, __l;} __i; \ 376 | } __xx; \ 377 | __xx.__i.__h = n1; __xx.__i.__l = n0; \ 378 | __asm__ ("dr %0,%2" \ 379 | : "=r" (__xx.__ll) \ 380 | : "0" (__xx.__ll), "r" (d)); \ 381 | (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ 382 | } while (0) 383 | #endif 384 | 385 | #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 386 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 387 | __asm__ ("addl %5,%1 \ 388 | adcl %3,%0" \ 389 | : "=r" ((USItype)(sh)), \ 390 | "=&r" ((USItype)(sl)) \ 391 | : "%0" ((USItype)(ah)), \ 392 | "g" ((USItype)(bh)), \ 393 | "%1" ((USItype)(al)), \ 394 | "g" ((USItype)(bl))) 395 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 396 | __asm__ ("subl %5,%1 \ 397 | sbbl %3,%0" \ 398 | : "=r" ((USItype)(sh)), \ 399 | "=&r" ((USItype)(sl)) \ 400 | : "0" ((USItype)(ah)), \ 401 | "g" ((USItype)(bh)), \ 402 | "1" ((USItype)(al)), \ 403 | "g" ((USItype)(bl))) 404 | #define umul_ppmm(w1, w0, u, v) \ 405 | __asm__ ("mull %3" \ 406 | : "=a" ((USItype)(w0)), \ 407 | "=d" ((USItype)(w1)) \ 408 | : "%0" ((USItype)(u)), \ 409 | "rm" ((USItype)(v))) 410 | #define udiv_qrnnd(q, r, n1, n0, d) \ 411 | __asm__ ("divl %4" \ 412 | : "=a" ((USItype)(q)), \ 413 | "=d" ((USItype)(r)) \ 414 | : "0" ((USItype)(n0)), \ 415 | "1" ((USItype)(n1)), \ 416 | "rm" ((USItype)(d))) 417 | #define count_leading_zeros(count, x) \ 418 | do { \ 419 | USItype __cbtmp; \ 420 | __asm__ ("bsrl %1,%0" \ 421 | : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ 422 | (count) = __cbtmp ^ 31; \ 423 | } while (0) 424 | #define count_trailing_zeros(count, x) \ 425 | __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))) 426 | #define UMUL_TIME 40 427 | #define UDIV_TIME 40 428 | #endif /* 80x86 */ 429 | 430 | #if defined (__i960__) && W_TYPE_SIZE == 32 431 | #define umul_ppmm(w1, w0, u, v) \ 432 | ({union {UDItype __ll; \ 433 | struct {USItype __l, __h;} __i; \ 434 | } __xx; \ 435 | __asm__ ("emul %2,%1,%0" \ 436 | : "=d" (__xx.__ll) \ 437 | : "%dI" ((USItype)(u)), \ 438 | "dI" ((USItype)(v))); \ 439 | (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) 440 | #define __umulsidi3(u, v) \ 441 | ({UDItype __w; \ 442 | __asm__ ("emul %2,%1,%0" \ 443 | : "=d" (__w) \ 444 | : "%dI" ((USItype)(u)), \ 445 | "dI" ((USItype)(v))); \ 446 | __w; }) 447 | #endif /* __i960__ */ 448 | 449 | #if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 450 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 451 | __asm__ ("add%.l %5,%1 \ 452 | addx%.l %3,%0" \ 453 | : "=d" ((USItype)(sh)), \ 454 | "=&d" ((USItype)(sl)) \ 455 | : "%0" ((USItype)(ah)), \ 456 | "d" ((USItype)(bh)), \ 457 | "%1" ((USItype)(al)), \ 458 | "g" ((USItype)(bl))) 459 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 460 | __asm__ ("sub%.l %5,%1 \ 461 | subx%.l %3,%0" \ 462 | : "=d" ((USItype)(sh)), \ 463 | "=&d" ((USItype)(sl)) \ 464 | : "0" ((USItype)(ah)), \ 465 | "d" ((USItype)(bh)), \ 466 | "1" ((USItype)(al)), \ 467 | "g" ((USItype)(bl))) 468 | #if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 469 | #define umul_ppmm(w1, w0, u, v) \ 470 | __asm__ ("mulu%.l %3,%1:%0" \ 471 | : "=d" ((USItype)(w0)), \ 472 | "=d" ((USItype)(w1)) \ 473 | : "%0" ((USItype)(u)), \ 474 | "dmi" ((USItype)(v))) 475 | #define UMUL_TIME 45 476 | #define udiv_qrnnd(q, r, n1, n0, d) \ 477 | __asm__ ("divu%.l %4,%1:%0" \ 478 | : "=d" ((USItype)(q)), \ 479 | "=d" ((USItype)(r)) \ 480 | : "0" ((USItype)(n0)), \ 481 | "1" ((USItype)(n1)), \ 482 | "dmi" ((USItype)(d))) 483 | #define UDIV_TIME 90 484 | #define sdiv_qrnnd(q, r, n1, n0, d) \ 485 | __asm__ ("divs%.l %4,%1:%0" \ 486 | : "=d" ((USItype)(q)), \ 487 | "=d" ((USItype)(r)) \ 488 | : "0" ((USItype)(n0)), \ 489 | "1" ((USItype)(n1)), \ 490 | "dmi" ((USItype)(d))) 491 | #define count_leading_zeros(count, x) \ 492 | __asm__ ("bfffo %1{%b2:%b2},%0" \ 493 | : "=d" ((USItype)(count)) \ 494 | : "od" ((USItype)(x)), "n" (0)) 495 | #else /* not mc68020 */ 496 | #define umul_ppmmxx(xh, xl, a, b) \ 497 | do { USItype __umul_tmp1, __umul_tmp2; \ 498 | __asm__ ("| Inlined umul_ppmm \ 499 | move%.l %5,%3 \ 500 | move%.l %2,%0 \ 501 | move%.w %3,%1 \ 502 | swap %3 \ 503 | swap %0 \ 504 | mulu %2,%1 \ 505 | mulu %3,%0 \ 506 | mulu %2,%3 \ 507 | swap %2 \ 508 | mulu %5,%2 \ 509 | add%.l %3,%2 \ 510 | jcc 1f \ 511 | add%.l #0x10000,%0 \ 512 | 1: move%.l %2,%3 \ 513 | clr%.w %2 \ 514 | swap %2 \ 515 | swap %3 \ 516 | clr%.w %3 \ 517 | add%.l %3,%1 \ 518 | addx%.l %2,%0 \ 519 | | End inlined umul_ppmm" \ 520 | : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ 521 | "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ 522 | : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ 523 | } while (0) 524 | #define UMUL_TIME 100 525 | #define UDIV_TIME 400 526 | #endif /* not mc68020 */ 527 | #endif /* mc68000 */ 528 | 529 | #if defined (__m88000__) && W_TYPE_SIZE == 32 530 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 531 | __asm__ ("addu.co %1,%r4,%r5 \ 532 | addu.ci %0,%r2,%r3" \ 533 | : "=r" ((USItype)(sh)), \ 534 | "=&r" ((USItype)(sl)) \ 535 | : "%rJ" ((USItype)(ah)), \ 536 | "rJ" ((USItype)(bh)), \ 537 | "%rJ" ((USItype)(al)), \ 538 | "rJ" ((USItype)(bl))) 539 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 540 | __asm__ ("subu.co %1,%r4,%r5 \ 541 | subu.ci %0,%r2,%r3" \ 542 | : "=r" ((USItype)(sh)), \ 543 | "=&r" ((USItype)(sl)) \ 544 | : "rJ" ((USItype)(ah)), \ 545 | "rJ" ((USItype)(bh)), \ 546 | "rJ" ((USItype)(al)), \ 547 | "rJ" ((USItype)(bl))) 548 | #define count_leading_zeros(count, x) \ 549 | do { \ 550 | USItype __cbtmp; \ 551 | __asm__ ("ff1 %0,%1" \ 552 | : "=r" (__cbtmp) \ 553 | : "r" ((USItype)(x))); \ 554 | (count) = __cbtmp ^ 31; \ 555 | } while (0) 556 | #if defined (__mc88110__) 557 | #define umul_ppmm(wh, wl, u, v) \ 558 | do { \ 559 | union {UDItype __ll; \ 560 | struct {USItype __h, __l;} __i; \ 561 | } __xx; \ 562 | __asm__ ("mulu.d %0,%1,%2" \ 563 | : "=r" (__xx.__ll) \ 564 | : "r" ((USItype)(u)), \ 565 | "r" ((USItype)(v))); \ 566 | (wh) = __xx.__i.__h; \ 567 | (wl) = __xx.__i.__l; \ 568 | } while (0) 569 | #define udiv_qrnnd(q, r, n1, n0, d) \ 570 | ({union {UDItype __ll; \ 571 | struct {USItype __h, __l;} __i; \ 572 | } __xx; \ 573 | USItype __q; \ 574 | __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 575 | __asm__ ("divu.d %0,%1,%2" \ 576 | : "=r" (__q) \ 577 | : "r" (__xx.__ll), \ 578 | "r" ((USItype)(d))); \ 579 | (r) = (n0) - __q * (d); (q) = __q; }) 580 | #define UMUL_TIME 5 581 | #define UDIV_TIME 25 582 | #else 583 | #define UMUL_TIME 17 584 | #define UDIV_TIME 150 585 | #endif /* __mc88110__ */ 586 | #endif /* __m88000__ */ 587 | 588 | #if defined (__mips__) && W_TYPE_SIZE == 32 589 | #define umul_ppmm(w1, w0, u, v) \ 590 | __asm__ ("multu %2,%3 \ 591 | mflo %0 \ 592 | mfhi %1" \ 593 | : "=d" ((USItype)(w0)), \ 594 | "=d" ((USItype)(w1)) \ 595 | : "d" ((USItype)(u)), \ 596 | "d" ((USItype)(v))) 597 | #define UMUL_TIME 10 598 | #define UDIV_TIME 100 599 | #endif /* __mips__ */ 600 | 601 | #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 602 | #define umul_ppmm(w1, w0, u, v) \ 603 | __asm__ ("dmultu %2,%3 \ 604 | mflo %0 \ 605 | mfhi %1" \ 606 | : "=d" ((UDItype)(w0)), \ 607 | "=d" ((UDItype)(w1)) \ 608 | : "d" ((UDItype)(u)), \ 609 | "d" ((UDItype)(v))) 610 | #define UMUL_TIME 10 611 | #define UDIV_TIME 100 612 | #endif /* __mips__ */ 613 | 614 | #if defined (__ns32000__) && W_TYPE_SIZE == 32 615 | #define umul_ppmm(w1, w0, u, v) \ 616 | ({union {UDItype __ll; \ 617 | struct {USItype __l, __h;} __i; \ 618 | } __xx; \ 619 | __asm__ ("meid %2,%0" \ 620 | : "=g" (__xx.__ll) \ 621 | : "%0" ((USItype)(u)), \ 622 | "g" ((USItype)(v))); \ 623 | (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) 624 | #define __umulsidi3(u, v) \ 625 | ({UDItype __w; \ 626 | __asm__ ("meid %2,%0" \ 627 | : "=g" (__w) \ 628 | : "%0" ((USItype)(u)), \ 629 | "g" ((USItype)(v))); \ 630 | __w; }) 631 | #define udiv_qrnnd(q, r, n1, n0, d) \ 632 | ({union {UDItype __ll; \ 633 | struct {USItype __l, __h;} __i; \ 634 | } __xx; \ 635 | __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 636 | __asm__ ("deid %2,%0" \ 637 | : "=g" (__xx.__ll) \ 638 | : "0" (__xx.__ll), \ 639 | "g" ((USItype)(d))); \ 640 | (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) 641 | #define count_trailing_zeros(count,x) \ 642 | do { \ 643 | __asm__ ("ffsd %2,%0" \ 644 | : "=r" ((USItype) (count)) \ 645 | : "0" ((USItype) 0), \ 646 | "r" ((USItype) (x))); \ 647 | } while (0) 648 | #endif /* __ns32000__ */ 649 | 650 | #if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32 651 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 652 | do { \ 653 | if (__builtin_constant_p (bh) && (bh) == 0) \ 654 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ 655 | : "=r" ((USItype)(sh)), \ 656 | "=&r" ((USItype)(sl)) \ 657 | : "%r" ((USItype)(ah)), \ 658 | "%r" ((USItype)(al)), \ 659 | "rI" ((USItype)(bl))); \ 660 | else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ 661 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ 662 | : "=r" ((USItype)(sh)), \ 663 | "=&r" ((USItype)(sl)) \ 664 | : "%r" ((USItype)(ah)), \ 665 | "%r" ((USItype)(al)), \ 666 | "rI" ((USItype)(bl))); \ 667 | else \ 668 | __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ 669 | : "=r" ((USItype)(sh)), \ 670 | "=&r" ((USItype)(sl)) \ 671 | : "%r" ((USItype)(ah)), \ 672 | "r" ((USItype)(bh)), \ 673 | "%r" ((USItype)(al)), \ 674 | "rI" ((USItype)(bl))); \ 675 | } while (0) 676 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 677 | do { \ 678 | if (__builtin_constant_p (ah) && (ah) == 0) \ 679 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ 680 | : "=r" ((USItype)(sh)), \ 681 | "=&r" ((USItype)(sl)) \ 682 | : "r" ((USItype)(bh)), \ 683 | "rI" ((USItype)(al)), \ 684 | "r" ((USItype)(bl))); \ 685 | else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ 686 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ 687 | : "=r" ((USItype)(sh)), \ 688 | "=&r" ((USItype)(sl)) \ 689 | : "r" ((USItype)(bh)), \ 690 | "rI" ((USItype)(al)), \ 691 | "r" ((USItype)(bl))); \ 692 | else if (__builtin_constant_p (bh) && (bh) == 0) \ 693 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ 694 | : "=r" ((USItype)(sh)), \ 695 | "=&r" ((USItype)(sl)) \ 696 | : "r" ((USItype)(ah)), \ 697 | "rI" ((USItype)(al)), \ 698 | "r" ((USItype)(bl))); \ 699 | else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ 700 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ 701 | : "=r" ((USItype)(sh)), \ 702 | "=&r" ((USItype)(sl)) \ 703 | : "r" ((USItype)(ah)), \ 704 | "rI" ((USItype)(al)), \ 705 | "r" ((USItype)(bl))); \ 706 | else \ 707 | __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ 708 | : "=r" ((USItype)(sh)), \ 709 | "=&r" ((USItype)(sl)) \ 710 | : "r" ((USItype)(ah)), \ 711 | "r" ((USItype)(bh)), \ 712 | "rI" ((USItype)(al)), \ 713 | "r" ((USItype)(bl))); \ 714 | } while (0) 715 | #define count_leading_zeros(count, x) \ 716 | __asm__ ("{cntlz|cntlzw} %0,%1" \ 717 | : "=r" ((USItype)(count)) \ 718 | : "r" ((USItype)(x))) 719 | #if defined (_ARCH_PPC) 720 | #define umul_ppmm(ph, pl, m0, m1) \ 721 | do { \ 722 | USItype __m0 = (m0), __m1 = (m1); \ 723 | __asm__ ("mulhwu %0,%1,%2" \ 724 | : "=r" ((USItype) ph) \ 725 | : "%r" (__m0), \ 726 | "r" (__m1)); \ 727 | (pl) = __m0 * __m1; \ 728 | } while (0) 729 | #define UMUL_TIME 15 730 | #define smul_ppmm(ph, pl, m0, m1) \ 731 | do { \ 732 | SItype __m0 = (m0), __m1 = (m1); \ 733 | __asm__ ("mulhw %0,%1,%2" \ 734 | : "=r" ((SItype) ph) \ 735 | : "%r" (__m0), \ 736 | "r" (__m1)); \ 737 | (pl) = __m0 * __m1; \ 738 | } while (0) 739 | #define SMUL_TIME 14 740 | #define UDIV_TIME 120 741 | #else 742 | #define umul_ppmm(xh, xl, m0, m1) \ 743 | do { \ 744 | USItype __m0 = (m0), __m1 = (m1); \ 745 | __asm__ ("mul %0,%2,%3" \ 746 | : "=r" ((USItype)(xh)), \ 747 | "=q" ((USItype)(xl)) \ 748 | : "r" (__m0), \ 749 | "r" (__m1)); \ 750 | (xh) += ((((SItype) __m0 >> 31) & __m1) \ 751 | + (((SItype) __m1 >> 31) & __m0)); \ 752 | } while (0) 753 | #define UMUL_TIME 8 754 | #define smul_ppmm(xh, xl, m0, m1) \ 755 | __asm__ ("mul %0,%2,%3" \ 756 | : "=r" ((SItype)(xh)), \ 757 | "=q" ((SItype)(xl)) \ 758 | : "r" (m0), \ 759 | "r" (m1)) 760 | #define SMUL_TIME 4 761 | #define sdiv_qrnnd(q, r, nh, nl, d) \ 762 | __asm__ ("div %0,%2,%4" \ 763 | : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ 764 | : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) 765 | #define UDIV_TIME 100 766 | #endif 767 | #endif /* Power architecture variants. */ 768 | 769 | #if defined (__pyr__) && W_TYPE_SIZE == 32 770 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 771 | __asm__ ("addw %5,%1 \ 772 | addwc %3,%0" \ 773 | : "=r" ((USItype)(sh)), \ 774 | "=&r" ((USItype)(sl)) \ 775 | : "%0" ((USItype)(ah)), \ 776 | "g" ((USItype)(bh)), \ 777 | "%1" ((USItype)(al)), \ 778 | "g" ((USItype)(bl))) 779 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 780 | __asm__ ("subw %5,%1 \ 781 | subwb %3,%0" \ 782 | : "=r" ((USItype)(sh)), \ 783 | "=&r" ((USItype)(sl)) \ 784 | : "0" ((USItype)(ah)), \ 785 | "g" ((USItype)(bh)), \ 786 | "1" ((USItype)(al)), \ 787 | "g" ((USItype)(bl))) 788 | /* This insn doesn't work on ancient pyramids. */ 789 | #define umul_ppmm(w1, w0, u, v) \ 790 | ({union {UDItype __ll; \ 791 | struct {USItype __h, __l;} __i; \ 792 | } __xx; \ 793 | __xx.__i.__l = u; \ 794 | __asm__ ("uemul %3,%0" \ 795 | : "=r" (__xx.__i.__h), \ 796 | "=r" (__xx.__i.__l) \ 797 | : "1" (__xx.__i.__l), \ 798 | "g" ((USItype)(v))); \ 799 | (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) 800 | #endif /* __pyr__ */ 801 | 802 | #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 803 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 804 | __asm__ ("a %1,%5 \ 805 | ae %0,%3" \ 806 | : "=r" ((USItype)(sh)), \ 807 | "=&r" ((USItype)(sl)) \ 808 | : "%0" ((USItype)(ah)), \ 809 | "r" ((USItype)(bh)), \ 810 | "%1" ((USItype)(al)), \ 811 | "r" ((USItype)(bl))) 812 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 813 | __asm__ ("s %1,%5 \ 814 | se %0,%3" \ 815 | : "=r" ((USItype)(sh)), \ 816 | "=&r" ((USItype)(sl)) \ 817 | : "0" ((USItype)(ah)), \ 818 | "r" ((USItype)(bh)), \ 819 | "1" ((USItype)(al)), \ 820 | "r" ((USItype)(bl))) 821 | #define umul_ppmm(ph, pl, m0, m1) \ 822 | do { \ 823 | USItype __m0 = (m0), __m1 = (m1); \ 824 | __asm__ ( \ 825 | "s r2,r2 \ 826 | mts r10,%2 \ 827 | m r2,%3 \ 828 | m r2,%3 \ 829 | m r2,%3 \ 830 | m r2,%3 \ 831 | m r2,%3 \ 832 | m r2,%3 \ 833 | m r2,%3 \ 834 | m r2,%3 \ 835 | m r2,%3 \ 836 | m r2,%3 \ 837 | m r2,%3 \ 838 | m r2,%3 \ 839 | m r2,%3 \ 840 | m r2,%3 \ 841 | m r2,%3 \ 842 | m r2,%3 \ 843 | cas %0,r2,r0 \ 844 | mfs r10,%1" \ 845 | : "=r" ((USItype)(ph)), \ 846 | "=r" ((USItype)(pl)) \ 847 | : "%r" (__m0), \ 848 | "r" (__m1) \ 849 | : "r2"); \ 850 | (ph) += ((((SItype) __m0 >> 31) & __m1) \ 851 | + (((SItype) __m1 >> 31) & __m0)); \ 852 | } while (0) 853 | #define UMUL_TIME 20 854 | #define UDIV_TIME 200 855 | #define count_leading_zeros(count, x) \ 856 | do { \ 857 | if ((x) >= 0x10000) \ 858 | __asm__ ("clz %0,%1" \ 859 | : "=r" ((USItype)(count)) \ 860 | : "r" ((USItype)(x) >> 16)); \ 861 | else \ 862 | { \ 863 | __asm__ ("clz %0,%1" \ 864 | : "=r" ((USItype)(count)) \ 865 | : "r" ((USItype)(x))); \ 866 | (count) += 16; \ 867 | } \ 868 | } while (0) 869 | #endif 870 | 871 | #if defined (__sparc__) && W_TYPE_SIZE == 32 872 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 873 | __asm__ ("addcc %r4,%5,%1 \ 874 | addx %r2,%3,%0" \ 875 | : "=r" ((USItype)(sh)), \ 876 | "=&r" ((USItype)(sl)) \ 877 | : "%rJ" ((USItype)(ah)), \ 878 | "rI" ((USItype)(bh)), \ 879 | "%rJ" ((USItype)(al)), \ 880 | "rI" ((USItype)(bl)) \ 881 | __CLOBBER_CC) 882 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 883 | __asm__ ("subcc %r4,%5,%1 \ 884 | subx %r2,%3,%0" \ 885 | : "=r" ((USItype)(sh)), \ 886 | "=&r" ((USItype)(sl)) \ 887 | : "rJ" ((USItype)(ah)), \ 888 | "rI" ((USItype)(bh)), \ 889 | "rJ" ((USItype)(al)), \ 890 | "rI" ((USItype)(bl)) \ 891 | __CLOBBER_CC) 892 | #if defined (__sparc_v8__) 893 | /* Don't match immediate range because, 1) it is not often useful, 894 | 2) the 'I' flag thinks of the range as a 13 bit signed interval, 895 | while we want to match a 13 bit interval, sign extended to 32 bits, 896 | but INTERPRETED AS UNSIGNED. */ 897 | #define umul_ppmm(w1, w0, u, v) \ 898 | __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 899 | : "=r" ((USItype)(w1)), \ 900 | "=r" ((USItype)(w0)) \ 901 | : "r" ((USItype)(u)), \ 902 | "r" ((USItype)(v))) 903 | #define UMUL_TIME 5 904 | #ifndef SUPERSPARC 905 | #define udiv_qrnnd(q, r, n1, n0, d) \ 906 | __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\ 907 | : "=&r" ((USItype)(q)), \ 908 | "=&r" ((USItype)(r)) \ 909 | : "r" ((USItype)(n1)), \ 910 | "r" ((USItype)(n0)), \ 911 | "r" ((USItype)(d))) 912 | #define UDIV_TIME 25 913 | #endif /* SUPERSPARC */ 914 | #else /* ! __sparc_v8__ */ 915 | #if defined (__sparclite__) 916 | /* This has hardware multiply but not divide. It also has two additional 917 | instructions scan (ffs from high bit) and divscc. */ 918 | #define umul_ppmm(w1, w0, u, v) \ 919 | __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 920 | : "=r" ((USItype)(w1)), \ 921 | "=r" ((USItype)(w0)) \ 922 | : "r" ((USItype)(u)), \ 923 | "r" ((USItype)(v))) 924 | #define UMUL_TIME 5 925 | #define udiv_qrnnd(q, r, n1, n0, d) \ 926 | __asm__ ("! Inlined udiv_qrnnd \ 927 | wr %%g0,%2,%%y ! Not a delayed write for sparclite \ 928 | tst %%g0 \ 929 | divscc %3,%4,%%g1 \ 930 | divscc %%g1,%4,%%g1 \ 931 | divscc %%g1,%4,%%g1 \ 932 | divscc %%g1,%4,%%g1 \ 933 | divscc %%g1,%4,%%g1 \ 934 | divscc %%g1,%4,%%g1 \ 935 | divscc %%g1,%4,%%g1 \ 936 | divscc %%g1,%4,%%g1 \ 937 | divscc %%g1,%4,%%g1 \ 938 | divscc %%g1,%4,%%g1 \ 939 | divscc %%g1,%4,%%g1 \ 940 | divscc %%g1,%4,%%g1 \ 941 | divscc %%g1,%4,%%g1 \ 942 | divscc %%g1,%4,%%g1 \ 943 | divscc %%g1,%4,%%g1 \ 944 | divscc %%g1,%4,%%g1 \ 945 | divscc %%g1,%4,%%g1 \ 946 | divscc %%g1,%4,%%g1 \ 947 | divscc %%g1,%4,%%g1 \ 948 | divscc %%g1,%4,%%g1 \ 949 | divscc %%g1,%4,%%g1 \ 950 | divscc %%g1,%4,%%g1 \ 951 | divscc %%g1,%4,%%g1 \ 952 | divscc %%g1,%4,%%g1 \ 953 | divscc %%g1,%4,%%g1 \ 954 | divscc %%g1,%4,%%g1 \ 955 | divscc %%g1,%4,%%g1 \ 956 | divscc %%g1,%4,%%g1 \ 957 | divscc %%g1,%4,%%g1 \ 958 | divscc %%g1,%4,%%g1 \ 959 | divscc %%g1,%4,%%g1 \ 960 | divscc %%g1,%4,%0 \ 961 | rd %%y,%1 \ 962 | bl,a 1f \ 963 | add %1,%4,%1 \ 964 | 1: ! End of inline udiv_qrnnd" \ 965 | : "=r" ((USItype)(q)), \ 966 | "=r" ((USItype)(r)) \ 967 | : "r" ((USItype)(n1)), \ 968 | "r" ((USItype)(n0)), \ 969 | "rI" ((USItype)(d)) \ 970 | : "%g1" __AND_CLOBBER_CC) 971 | #define UDIV_TIME 37 972 | #define count_leading_zeros(count, x) \ 973 | __asm__ ("scan %1,0,%0" \ 974 | : "=r" ((USItype)(x)) \ 975 | : "r" ((USItype)(count))) 976 | #endif /* __sparclite__ */ 977 | #endif /* __sparc_v8__ */ 978 | /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ 979 | #ifndef umul_ppmm 980 | #define umul_ppmm(w1, w0, u, v) \ 981 | __asm__ ("! Inlined umul_ppmm \ 982 | wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr \ 983 | sra %3,31,%%g2 ! Don't move this insn \ 984 | and %2,%%g2,%%g2 ! Don't move this insn \ 985 | andcc %%g0,0,%%g1 ! Don't move this insn \ 986 | mulscc %%g1,%3,%%g1 \ 987 | mulscc %%g1,%3,%%g1 \ 988 | mulscc %%g1,%3,%%g1 \ 989 | mulscc %%g1,%3,%%g1 \ 990 | mulscc %%g1,%3,%%g1 \ 991 | mulscc %%g1,%3,%%g1 \ 992 | mulscc %%g1,%3,%%g1 \ 993 | mulscc %%g1,%3,%%g1 \ 994 | mulscc %%g1,%3,%%g1 \ 995 | mulscc %%g1,%3,%%g1 \ 996 | mulscc %%g1,%3,%%g1 \ 997 | mulscc %%g1,%3,%%g1 \ 998 | mulscc %%g1,%3,%%g1 \ 999 | mulscc %%g1,%3,%%g1 \ 1000 | mulscc %%g1,%3,%%g1 \ 1001 | mulscc %%g1,%3,%%g1 \ 1002 | mulscc %%g1,%3,%%g1 \ 1003 | mulscc %%g1,%3,%%g1 \ 1004 | mulscc %%g1,%3,%%g1 \ 1005 | mulscc %%g1,%3,%%g1 \ 1006 | mulscc %%g1,%3,%%g1 \ 1007 | mulscc %%g1,%3,%%g1 \ 1008 | mulscc %%g1,%3,%%g1 \ 1009 | mulscc %%g1,%3,%%g1 \ 1010 | mulscc %%g1,%3,%%g1 \ 1011 | mulscc %%g1,%3,%%g1 \ 1012 | mulscc %%g1,%3,%%g1 \ 1013 | mulscc %%g1,%3,%%g1 \ 1014 | mulscc %%g1,%3,%%g1 \ 1015 | mulscc %%g1,%3,%%g1 \ 1016 | mulscc %%g1,%3,%%g1 \ 1017 | mulscc %%g1,%3,%%g1 \ 1018 | mulscc %%g1,0,%%g1 \ 1019 | add %%g1,%%g2,%0 \ 1020 | rd %%y,%1" \ 1021 | : "=r" ((USItype)(w1)), \ 1022 | "=r" ((USItype)(w0)) \ 1023 | : "%rI" ((USItype)(u)), \ 1024 | "r" ((USItype)(v)) \ 1025 | : "%g1", "%g2" __AND_CLOBBER_CC) 1026 | #define UMUL_TIME 39 /* 39 instructions */ 1027 | #endif 1028 | #ifndef udiv_qrnnd 1029 | #ifndef LONGLONG_STANDALONE 1030 | #define udiv_qrnnd(q, r, n1, n0, d) \ 1031 | do { USItype __r; \ 1032 | (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ 1033 | (r) = __r; \ 1034 | } while (0) \ 1035 | extern USItype __udiv_qrnnd (); 1036 | #define UDIV_TIME 140 1037 | #endif /* LONGLONG_STANDALONE */ 1038 | #endif /* udiv_qrnnd */ 1039 | #endif /* __sparc__ */ 1040 | 1041 | #if defined (__vax__) && W_TYPE_SIZE == 32 1042 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1043 | __asm__ ("addl2 %5,%1 \ 1044 | adwc %3,%0" \ 1045 | : "=g" ((USItype)(sh)), \ 1046 | "=&g" ((USItype)(sl)) \ 1047 | : "%0" ((USItype)(ah)), \ 1048 | "g" ((USItype)(bh)), \ 1049 | "%1" ((USItype)(al)), \ 1050 | "g" ((USItype)(bl))) 1051 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1052 | __asm__ ("subl2 %5,%1 \ 1053 | sbwc %3,%0" \ 1054 | : "=g" ((USItype)(sh)), \ 1055 | "=&g" ((USItype)(sl)) \ 1056 | : "0" ((USItype)(ah)), \ 1057 | "g" ((USItype)(bh)), \ 1058 | "1" ((USItype)(al)), \ 1059 | "g" ((USItype)(bl))) 1060 | #define umul_ppmm(xh, xl, m0, m1) \ 1061 | do { \ 1062 | union {UDItype __ll; \ 1063 | struct {USItype __l, __h;} __i; \ 1064 | } __xx; \ 1065 | USItype __m0 = (m0), __m1 = (m1); \ 1066 | __asm__ ("emul %1,%2,$0,%0" \ 1067 | : "=g" (__xx.__ll) \ 1068 | : "g" (__m0), \ 1069 | "g" (__m1)); \ 1070 | (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1071 | (xh) += ((((SItype) __m0 >> 31) & __m1) \ 1072 | + (((SItype) __m1 >> 31) & __m0)); \ 1073 | } while (0) 1074 | #define sdiv_qrnnd(q, r, n1, n0, d) \ 1075 | do { \ 1076 | union {DItype __ll; \ 1077 | struct {SItype __l, __h;} __i; \ 1078 | } __xx; \ 1079 | __xx.__i.__h = n1; __xx.__i.__l = n0; \ 1080 | __asm__ ("ediv %3,%2,%0,%1" \ 1081 | : "=g" (q), "=g" (r) \ 1082 | : "g" (__xx.ll), "g" (d)); \ 1083 | } while (0) 1084 | #endif /* __vax__ */ 1085 | 1086 | #if defined (__z8000__) && W_TYPE_SIZE == 16 1087 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1088 | __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ 1089 | : "=r" ((unsigned int)(sh)), \ 1090 | "=&r" ((unsigned int)(sl)) \ 1091 | : "%0" ((unsigned int)(ah)), \ 1092 | "r" ((unsigned int)(bh)), \ 1093 | "%1" ((unsigned int)(al)), \ 1094 | "rQR" ((unsigned int)(bl))) 1095 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1096 | __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ 1097 | : "=r" ((unsigned int)(sh)), \ 1098 | "=&r" ((unsigned int)(sl)) \ 1099 | : "0" ((unsigned int)(ah)), \ 1100 | "r" ((unsigned int)(bh)), \ 1101 | "1" ((unsigned int)(al)), \ 1102 | "rQR" ((unsigned int)(bl))) 1103 | #define umul_ppmm(xh, xl, m0, m1) \ 1104 | do { \ 1105 | union {long int __ll; \ 1106 | struct {unsigned int __h, __l;} __i; \ 1107 | } __xx; \ 1108 | unsigned int __m0 = (m0), __m1 = (m1); \ 1109 | __asm__ ("mult %S0,%H3" \ 1110 | : "=r" (__xx.__i.__h), \ 1111 | "=r" (__xx.__i.__l) \ 1112 | : "%1" (__m0), \ 1113 | "rQR" (__m1)); \ 1114 | (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 1115 | (xh) += ((((signed int) __m0 >> 15) & __m1) \ 1116 | + (((signed int) __m1 >> 15) & __m0)); \ 1117 | } while (0) 1118 | #define umul_ppmm_off(xh, xl, m0, m1) \ 1119 | do { \ 1120 | union {long int __ll; \ 1121 | struct {unsigned int __h, __l;} __i; \ 1122 | } __xx; \ 1123 | __asm__ ("mult %S0,%H3" \ 1124 | : "=r" (__xx.__i.__h), \ 1125 | "=r" (__xx.__i.__l) \ 1126 | : "%1" (m0), \ 1127 | "rQR" (m1)); \ 1128 | (xh) = __xx.__i.__h + ((((signed int) m0 >> 15) & m1) \ 1129 | + (((signed int) m1 >> 15) & m0)); \ 1130 | (xl) = __xx.__i.__l; \ 1131 | } while (0) 1132 | #endif /* __z8000__ */ 1133 | 1134 | #endif /* __GNUC__ */ 1135 | 1136 | 1137 | #if !defined (umul_ppmm) && defined (__umulsidi3) 1138 | #define umul_ppmm(ph, pl, m0, m1) \ 1139 | { \ 1140 | UDWtype __ll = __umulsidi3 (m0, m1); \ 1141 | ph = (UWtype) (__ll >> W_TYPE_SIZE); \ 1142 | pl = (UWtype) __ll; \ 1143 | } 1144 | #endif 1145 | 1146 | #if !defined (__umulsidi3) 1147 | #define __umulsidi3(u, v) \ 1148 | ({UWtype __hi, __lo; \ 1149 | umul_ppmm (__hi, __lo, u, v); \ 1150 | ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) 1151 | #endif 1152 | 1153 | /* If this machine has no inline assembler, use C macros. */ 1154 | 1155 | #if !defined (add_ssaaaa) 1156 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 1157 | do { \ 1158 | UWtype __x; \ 1159 | __x = (al) + (bl); \ 1160 | (sh) = (ah) + (bh) + (__x < (al)); \ 1161 | (sl) = __x; \ 1162 | } while (0) 1163 | #endif 1164 | 1165 | #if !defined (sub_ddmmss) 1166 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 1167 | do { \ 1168 | UWtype __x; \ 1169 | __x = (al) - (bl); \ 1170 | (sh) = (ah) - (bh) - (__x > (al)); \ 1171 | (sl) = __x; \ 1172 | } while (0) 1173 | #endif 1174 | 1175 | #if !defined (umul_ppmm) 1176 | #define umul_ppmm(w1, w0, u, v) \ 1177 | do { \ 1178 | UWtype __x0, __x1, __x2, __x3; \ 1179 | UHWtype __ul, __vl, __uh, __vh; \ 1180 | UWtype __u = (u), __v = (v); \ 1181 | \ 1182 | __ul = __ll_lowpart (__u); \ 1183 | __uh = __ll_highpart (__u); \ 1184 | __vl = __ll_lowpart (__v); \ 1185 | __vh = __ll_highpart (__v); \ 1186 | \ 1187 | __x0 = (UWtype) __ul * __vl; \ 1188 | __x1 = (UWtype) __ul * __vh; \ 1189 | __x2 = (UWtype) __uh * __vl; \ 1190 | __x3 = (UWtype) __uh * __vh; \ 1191 | \ 1192 | __x1 += __ll_highpart (__x0);/* this can't give carry */ \ 1193 | __x1 += __x2; /* but this indeed can */ \ 1194 | if (__x1 < __x2) /* did we get it? */ \ 1195 | __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 1196 | \ 1197 | (w1) = __x3 + __ll_highpart (__x1); \ 1198 | (w0) = (__ll_lowpart (__x1) << W_TYPE_SIZE/2) + __ll_lowpart (__x0);\ 1199 | } while (0) 1200 | #endif 1201 | 1202 | #if !defined (umul_ppmm) 1203 | #define smul_ppmm(w1, w0, u, v) \ 1204 | do { \ 1205 | UWtype __w1; \ 1206 | USItype __m0 = (u), __m1 = (v); \ 1207 | umul_ppmm (__w1, w0, __m0, __m1); \ 1208 | (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ 1209 | - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ 1210 | } while (0) 1211 | #endif 1212 | 1213 | /* Define this unconditionally, so it can be used for debugging. */ 1214 | #define __udiv_qrnnd_c(q, r, n1, n0, d) \ 1215 | do { \ 1216 | UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ 1217 | __d1 = __ll_highpart (d); \ 1218 | __d0 = __ll_lowpart (d); \ 1219 | \ 1220 | __r1 = (n1) % __d1; \ 1221 | __q1 = (n1) / __d1; \ 1222 | __m = (UWtype) __q1 * __d0; \ 1223 | __r1 = __r1 * __ll_B | __ll_highpart (n0); \ 1224 | if (__r1 < __m) \ 1225 | { \ 1226 | __q1--, __r1 += (d); \ 1227 | if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ 1228 | if (__r1 < __m) \ 1229 | __q1--, __r1 += (d); \ 1230 | } \ 1231 | __r1 -= __m; \ 1232 | \ 1233 | __r0 = __r1 % __d1; \ 1234 | __q0 = __r1 / __d1; \ 1235 | __m = (UWtype) __q0 * __d0; \ 1236 | __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ 1237 | if (__r0 < __m) \ 1238 | { \ 1239 | __q0--, __r0 += (d); \ 1240 | if (__r0 >= (d)) \ 1241 | if (__r0 < __m) \ 1242 | __q0--, __r0 += (d); \ 1243 | } \ 1244 | __r0 -= __m; \ 1245 | \ 1246 | (q) = (UWtype) __q1 * __ll_B | __q0; \ 1247 | (r) = __r0; \ 1248 | } while (0) 1249 | 1250 | /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through 1251 | __udiv_w_sdiv (defined in libgcc or elsewhere). */ 1252 | #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) 1253 | #define udiv_qrnnd(q, r, nh, nl, d) \ 1254 | do { \ 1255 | UWtype __r; \ 1256 | (q) = __udiv_w_sdiv (&__r, nh, nl, d); \ 1257 | (r) = __r; \ 1258 | } while (0) 1259 | #endif 1260 | 1261 | /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ 1262 | #if !defined (udiv_qrnnd) 1263 | #define UDIV_NEEDS_NORMALIZATION 1 1264 | #define udiv_qrnnd __udiv_qrnnd_c 1265 | #endif 1266 | 1267 | #if !defined (count_leading_zeros) 1268 | extern 1269 | #ifdef __STDC__ 1270 | const 1271 | #endif 1272 | unsigned char __clz_tab[]; 1273 | #define count_leading_zeros(count, x) \ 1274 | do { \ 1275 | UWtype __xr = (x); \ 1276 | UWtype __a; \ 1277 | \ 1278 | if (W_TYPE_SIZE <= 32) \ 1279 | { \ 1280 | __a = __xr < ((UWtype) 1 << 2*__BITS4) \ 1281 | ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \ 1282 | : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4);\ 1283 | } \ 1284 | else \ 1285 | { \ 1286 | for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ 1287 | if (((__xr >> __a) & 0xff) != 0) \ 1288 | break; \ 1289 | } \ 1290 | \ 1291 | (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ 1292 | } while (0) 1293 | /* This version gives a well-defined value for zero. */ 1294 | #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE 1295 | #endif 1296 | 1297 | #if !defined (count_trailing_zeros) 1298 | /* Define count_trailing_zeros using count_leading_zeros. The latter might be 1299 | defined in asm, but if it is not, the C version above is good enough. */ 1300 | #define count_trailing_zeros(count, x) \ 1301 | do { \ 1302 | UWtype __ctz_x = (x); \ 1303 | UWtype __ctz_c; \ 1304 | count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ 1305 | (count) = W_TYPE_SIZE - 1 - __ctz_c; \ 1306 | } while (0) 1307 | #endif 1308 | 1309 | #ifndef UDIV_NEEDS_NORMALIZATION 1310 | #define UDIV_NEEDS_NORMALIZATION 0 1311 | #endif 1312 | -------------------------------------------------------------------------------- /run_program.def: -------------------------------------------------------------------------------- 1 | /* Superoptimizer -- execute a instruction sequence to in order to 2 | test it's correctness. 3 | 4 | Copyright (C) 1991, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. 5 | 6 | This program is free software; you can redistribute it and/or modify it 7 | under the terms of the GNU General Public License as published by the 8 | Free Software Foundation; either version 2, or (at your option) any 9 | later version. 10 | 11 | This program is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License along 17 | with this program; see the file COPYING. If not, write to the Free 18 | Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ 19 | 20 | int 21 | #if HAS_NULLIFICATION 22 | run_program(insn_t *sequence, int n_insns, word *regs, int arity) 23 | #else 24 | run_program(insn_t *sequence, int n_insns, word *regs) 25 | #endif 26 | { 27 | int pc; 28 | insn_t insn; 29 | word v, r1, r2; 30 | int co, ci = -1; /* Combine co and ci into cy? */ 31 | #if HAS_NULLIFICATION 32 | int nullify_flag = 0; 33 | char reg_defined[0x100]; 34 | int i; 35 | #endif 36 | 37 | if (n_insns == 0) 38 | return ci; 39 | 40 | #if HAS_NULLIFICATION 41 | memset (reg_defined, 0, 0x100); 42 | 43 | for (i = 0; i < arity; i++) 44 | reg_defined[i] = 1; 45 | /* The immediate values should be considered `defined'. */ 46 | for (i = -1; i < BITS_PER_WORD; i++) 47 | reg_defined[0x20 + i] = 1; 48 | reg_defined[0x20 - 2] = 1; 49 | reg_defined[0x20 - 3] = 1; 50 | reg_defined[0x20 - 4] = 1; 51 | reg_defined[0x20 - 5] = 1; 52 | #endif 53 | 54 | for (pc = 0; pc < n_insns; pc++) 55 | { 56 | insn = sequence[pc]; 57 | 58 | #if HAS_NULLIFICATION 59 | if (nullify_flag) 60 | { 61 | nullify_flag = 0; 62 | continue; 63 | } 64 | 65 | /* Check if the source operands has become defined. */ 66 | if (!reg_defined[insn.s1] || !reg_defined[insn.s2]) 67 | return -2; 68 | #endif 69 | 70 | r1 = regs[insn.s1]; 71 | r2 = regs[insn.s2]; 72 | 73 | switch (insn.opcode) 74 | { 75 | default: 76 | fprintf(stderr, 77 | "internal error: undefined instruction generated\n"); 78 | abort(); 79 | 80 | case COPY: PERFORM_COPY(v, co, r1, ci); break; 81 | case EXCHANGE: 82 | regs[insn.s1] = r2; 83 | regs[insn.s2] = r1; 84 | continue; 85 | 86 | case ADD: PERFORM_ADD(v, co, r1, r2, ci); break; 87 | case ADD_CI: PERFORM_ADD_CI(v, co, r1, r2, ci); break; 88 | case ADD_CO: PERFORM_ADD_CO(v, co, r1, r2, ci); break; 89 | case ADD_CIO: PERFORM_ADD_CIO(v, co, r1, r2, ci); break; 90 | 91 | case SUB: PERFORM_SUB(v, co, r1, r2, ci); break; 92 | case SUB_CI: PERFORM_SUB_CI(v, co, r1, r2, ci); break; 93 | case SUB_CO: PERFORM_SUB_CO(v, co, r1, r2, ci); break; 94 | case SUB_CIO: PERFORM_SUB_CIO(v, co, r1, r2, ci); break; 95 | 96 | case ADC_CI: PERFORM_ADC_CI(v, co, r1, r2, ci); break; 97 | case ADC_CO: PERFORM_ADC_CO(v, co, r1, r2, ci); break; 98 | case ADC_CIO: PERFORM_ADC_CIO(v, co, r1, r2, ci); break; 99 | 100 | case ADDCMPL: PERFORM_ADDCMPL(v, co, r1, r2, ci); break; 101 | 102 | case LDA16F: PERFORM_LDA16F(v, co, r1, r2, ci); break; 103 | case LDA16B: PERFORM_LDA16B(v, co, r1, r2, ci); break; 104 | case LDAWF: PERFORM_LDAWF(v, co, r1, r2, ci); break; 105 | case LDAWB: PERFORM_LDAWB(v, co, r1, r2, ci); break; 106 | 107 | case CMP: PERFORM_CMP(v, co, r1, r2, ci); break; 108 | case CMPC: PERFORM_CMPC(v, co, r1, r2, ci); break; 109 | case CMPPAR: PERFORM_CMPPAR(v, co, r1, r2, ci); break; 110 | 111 | case AND: PERFORM_AND(v, co, r1, r2, ci); break; 112 | case IOR: PERFORM_IOR(v, co, r1, r2, ci); break; 113 | case XOR: PERFORM_XOR(v, co, r1, r2, ci); break; 114 | case ANDC: PERFORM_ANDC(v, co, r1, r2, ci); break; 115 | case IORC: PERFORM_IORC(v, co, r1, r2, ci); break; 116 | case EQV: PERFORM_EQV(v, co, r1, r2, ci); break; 117 | case NAND: PERFORM_NAND(v, co, r1, r2, ci); break; 118 | case NOR: PERFORM_NOR(v, co, r1, r2, ci); break; 119 | 120 | case AND_RC: PERFORM_AND_RC(v, co, r1, r2, ci); break; 121 | case IOR_RC: PERFORM_IOR_RC(v, co, r1, r2, ci); break; 122 | case XOR_RC: PERFORM_XOR_RC(v, co, r1, r2, ci); break; 123 | case ANDC_RC: PERFORM_ANDC_RC(v, co, r1, r2, ci); break; 124 | case IORC_RC: PERFORM_IORC_RC(v, co, r1, r2, ci); break; 125 | case EQV_RC: PERFORM_EQV_RC(v, co, r1, r2, ci); break; 126 | case NAND_RC: PERFORM_NAND_RC(v, co, r1, r2, ci); break; 127 | case NOR_RC: PERFORM_NOR_RC(v, co, r1, r2, ci); break; 128 | 129 | case AND_CC: PERFORM_AND_CC(v, co, r1, r2, ci); break; 130 | case IOR_CC: PERFORM_IOR_CC(v, co, r1, r2, ci); break; 131 | case XOR_CC: PERFORM_XOR_CC(v, co, r1, r2, ci); break; 132 | case ANDC_CC: PERFORM_ANDC_CC(v, co, r1, r2, ci); break; 133 | case IORC_CC: PERFORM_IORC_CC(v, co, r1, r2, ci); break; 134 | case EQV_CC: PERFORM_EQV_CC(v, co, r1, r2, ci); break; 135 | case NAND_CC: PERFORM_NAND_CC(v, co, r1, r2, ci); break; 136 | case NOR_CC: PERFORM_NOR_CC(v, co, r1, r2, ci); break; 137 | 138 | case LSHIFTR: PERFORM_LSHIFTR(v, co, r1, r2, ci); break; 139 | case ASHIFTR: PERFORM_ASHIFTR(v, co, r1, r2, ci); break; 140 | case SHIFTL: PERFORM_SHIFTL(v, co, r1, r2, ci); break; 141 | case ROTATEL: PERFORM_ROTATEL(v, co, r1, r2, ci); break; 142 | case LSHIFTR_CO:PERFORM_LSHIFTR_CO(v, co, r1, r2, ci); break; 143 | case ASHIFTR_CO:PERFORM_ASHIFTR_CO(v, co, r1, r2, ci); break; 144 | case SHIFTL_CO: PERFORM_SHIFTL_CO(v, co, r1, r2, ci); break; 145 | case ROTATEL_CO:PERFORM_ROTATEL_CO(v, co, r1, r2, ci); break; 146 | case ROTATER_CO:PERFORM_ROTATER_CO(v, co, r1, r2, ci); break; 147 | case ROTATEXL_CIO:PERFORM_ROTATEXL_CIO(v, co, r1, r2, ci); break; 148 | case ROTATEXR_CIO:PERFORM_ROTATEXR_CIO(v, co, r1, r2, ci); break; 149 | case ASHIFTR_CON:PERFORM_ASHIFTR_CON(v, co, r1, r2, ci); break; 150 | 151 | case EXTS1: PERFORM_EXTS1(v, co, r1, r2, ci); break; 152 | case EXTS2: PERFORM_EXTS2(v, co, r1, r2, ci); break; 153 | case EXTS8: PERFORM_EXTS8(v, co, r1, r2, ci); break; 154 | case EXTS16: PERFORM_EXTS16(v, co, r1, r2, ci); break; 155 | case EXTU1: PERFORM_EXTU1(v, co, r1, r2, ci); break; 156 | case EXTU2: PERFORM_EXTU2(v, co, r1, r2, ci); break; 157 | 158 | case CLZ: PERFORM_CLZ(v, co, r1, ci); break; 159 | case CTZ: PERFORM_CTZ(v, co, r1, ci); break; 160 | case BITREV: PERFORM_BITREV(v, co, r1, ci); break; 161 | case BYTEREV: PERFORM_BYTEREV(v, co, r1, ci); break; 162 | case FF1: PERFORM_FF1(v, co, r1, ci); break; 163 | case FF0: PERFORM_FF0(v, co, r1, ci); break; 164 | case BSF86: PERFORM_BSF86(v, co, r1, ci); break; 165 | 166 | case ABSVAL: PERFORM_ABSVAL(v, co, r1, ci); break; 167 | case NABSVAL: PERFORM_NABSVAL(v, co, r1, ci); break; 168 | 169 | case MKMSK: PERFORM_MKMSK(v, co, r1, ci); break; 170 | 171 | case ZEXT: PERFORM_ZEXT(v, co, r1, r2, ci); break; 172 | case SEXT: PERFORM_SEXT(v, co, r1, r2, ci); break; 173 | 174 | case DOZ: PERFORM_DOZ(v, co, r1, r2, ci); break; 175 | case SETCY: co = 1; break; 176 | case CLRCY: co = 0; break; 177 | case COMCY: co = ci ^ 1; break; 178 | 179 | case CPEQ: PERFORM_CPEQ(v, co, r1, r2, ci); break; 180 | case CPGE: PERFORM_CPGE(v, co, r1, r2, ci); break; 181 | case CPGEU: PERFORM_CPGEU(v, co, r1, r2, ci); break; 182 | case CPGT: PERFORM_CPGT(v, co, r1, r2, ci); break; 183 | case CPGTU: PERFORM_CPGTU(v, co, r1, r2, ci); break; 184 | case CPLE: PERFORM_CPLE(v, co, r1, r2, ci); break; 185 | case CPLEU: PERFORM_CPLEU(v, co, r1, r2, ci); break; 186 | case CPLT: PERFORM_CPLT(v, co, r1, r2, ci); break; 187 | case CPLTU: PERFORM_CPLTU(v, co, r1, r2, ci); break; 188 | case CPNEQ: PERFORM_CPNEQ(v, co, r1, r2, ci); break; 189 | 190 | case CMPEQ: PERFORM_CMPEQ(v, co, r1, r2, ci); break; 191 | case CMPLE: PERFORM_CMPLE(v, co, r1, r2, ci); break; 192 | case CMPLEU: PERFORM_CMPLEU(v, co, r1, r2, ci); break; 193 | case CMPLT: PERFORM_CMPLT(v, co, r1, r2, ci); break; 194 | case CMPLTU: PERFORM_CMPLTU(v, co, r1, r2, ci); break; 195 | 196 | case CYEQ: PERFORM_CYEQ(v, co, r1, r2, ci); break; 197 | case CYGES: PERFORM_CYGES(v, co, r1, r2, ci); break; 198 | case CYGEU: PERFORM_CYGEU(v, co, r1, r2, ci); break; 199 | case CYGTS: PERFORM_CYGTS(v, co, r1, r2, ci); break; 200 | case CYGTU: PERFORM_CYGTU(v, co, r1, r2, ci); break; 201 | case CYAND: PERFORM_CYAND(v, co, r1, r2, ci); break; 202 | 203 | case MERGE16: PERFORM_MERGE16(v, co, r1, r2, ci); break; 204 | case DECR_CYEQ: PERFORM_DECR_CYEQ(v, co, r1, r2, ci); break; 205 | 206 | case CMOVEQ: 207 | v = regs[insn.d]; 208 | PERFORM_CMOVEQ(v, co, r1, r2, ci); 209 | break; 210 | case CMOVNE: 211 | v = regs[insn.d]; 212 | PERFORM_CMOVNE(v, co, r1, r2, ci); 213 | break; 214 | case CMOVLT: 215 | v = regs[insn.d]; 216 | PERFORM_CMOVLT(v, co, r1, r2, ci); 217 | break; 218 | case CMOVGE: 219 | v = regs[insn.d]; 220 | PERFORM_CMOVGE(v, co, r1, r2, ci); 221 | break; 222 | case CMOVLE: 223 | v = regs[insn.d]; 224 | PERFORM_CMOVLE(v, co, r1, r2, ci); 225 | break; 226 | case CMOVGT: 227 | v = regs[insn.d]; 228 | PERFORM_CMOVGT(v, co, r1, r2, ci); 229 | break; 230 | 231 | case MUL: PERFORM_MUL(v, co, r1, r2, ci); break; 232 | case UMULWIDEN_HI: PERFORM_UMULWIDEN_HI(v, co, r1, r2, ci); break; 233 | case INVDIV: PERFORM_INVDIV(v, co, r1, ci); break; 234 | case INVMOD: PERFORM_INVMOD(v, co, r1, ci); break; 235 | 236 | #if HAS_NULLIFICATION 237 | case ADD_SEQ: 238 | PERFORM_ADD_SEQ(v, co, nullify_flag, r1, r2, ci); 239 | break; 240 | case ADD_SNE: 241 | PERFORM_ADD_SNE(v, co, nullify_flag, r1, r2, ci); 242 | break; 243 | case ADD_SLTS: 244 | PERFORM_ADD_SLTS(v, co, nullify_flag, r1, r2, ci); 245 | break; 246 | case ADD_SGES: 247 | PERFORM_ADD_SGES(v, co, nullify_flag, r1, r2, ci); 248 | break; 249 | case ADD_SLES: 250 | PERFORM_ADD_SLES(v, co, nullify_flag, r1, r2, ci); 251 | break; 252 | case ADD_SGTS: 253 | PERFORM_ADD_SGTS(v, co, nullify_flag, r1, r2, ci); 254 | break; 255 | case ADD_SLTU: 256 | PERFORM_ADD_SLTU(v, co, nullify_flag, r1, r2, ci); 257 | break; 258 | case ADD_SGEU: 259 | PERFORM_ADD_SGEU(v, co, nullify_flag, r1, r2, ci); 260 | break; 261 | case ADD_SLEU: 262 | PERFORM_ADD_SLEU(v, co, nullify_flag, r1, r2, ci); 263 | break; 264 | case ADD_SGTU: 265 | PERFORM_ADD_SGTU(v, co, nullify_flag, r1, r2, ci); 266 | break; 267 | case ADD_SOVS: 268 | PERFORM_ADD_SOVS(v, co, nullify_flag, r1, r2, ci); 269 | break; 270 | case ADD_SNVS: 271 | PERFORM_ADD_SNVS(v, co, nullify_flag, r1, r2, ci); 272 | break; 273 | case ADD_SODD: 274 | PERFORM_ADD_SODD(v, co, nullify_flag, r1, r2, ci); 275 | break; 276 | case ADD_SEVN: 277 | PERFORM_ADD_SEVN(v, co, nullify_flag, r1, r2, ci); 278 | break; 279 | case ADD_S: 280 | PERFORM_ADD_S(v, co, nullify_flag, r1, r2, ci); 281 | break; 282 | case ADD_CIO_SEQ: 283 | PERFORM_ADD_CIO_SEQ(v, co, nullify_flag, r1, r2, ci); 284 | break; 285 | case ADD_CIO_SNE: 286 | PERFORM_ADD_CIO_SNE(v, co, nullify_flag, r1, r2, ci); 287 | break; 288 | case ADD_CIO_SLTU: 289 | PERFORM_ADD_CIO_SLTU(v, co, nullify_flag, r1, r2, ci); 290 | break; 291 | case ADD_CIO_SGEU: 292 | PERFORM_ADD_CIO_SGEU(v, co, nullify_flag, r1, r2, ci); 293 | break; 294 | case ADD_CIO_SLEU: 295 | PERFORM_ADD_CIO_SLEU(v, co, nullify_flag, r1, r2, ci); 296 | break; 297 | case ADD_CIO_SGTU: 298 | PERFORM_ADD_CIO_SGTU(v, co, nullify_flag, r1, r2, ci); 299 | break; 300 | case ADD_CIO_SODD: 301 | PERFORM_ADD_CIO_SODD(v, co, nullify_flag, r1, r2, ci); 302 | break; 303 | case ADD_CIO_SEVN: 304 | PERFORM_ADD_CIO_SEVN(v, co, nullify_flag, r1, r2, ci); 305 | break; 306 | case ADD_CIO_S: 307 | PERFORM_ADD_CIO_S(v, co, nullify_flag, r1, r2, ci); 308 | break; 309 | case ADD_CO_SEQ: 310 | PERFORM_ADD_CO_SEQ(v, co, nullify_flag, r1, r2, ci); 311 | break; 312 | case ADD_CO_SNE: 313 | PERFORM_ADD_CO_SNE(v, co, nullify_flag, r1, r2, ci); 314 | break; 315 | case ADD_CO_SLTU: 316 | PERFORM_ADD_CO_SLTU(v, co, nullify_flag, r1, r2, ci); 317 | break; 318 | case ADD_CO_SGEU: 319 | PERFORM_ADD_CO_SGEU(v, co, nullify_flag, r1, r2, ci); 320 | break; 321 | case ADD_CO_SLEU: 322 | PERFORM_ADD_CO_SLEU(v, co, nullify_flag, r1, r2, ci); 323 | break; 324 | case ADD_CO_SGTU: 325 | PERFORM_ADD_CO_SGTU(v, co, nullify_flag, r1, r2, ci); 326 | break; 327 | case ADD_CO_SODD: 328 | PERFORM_ADD_CO_SODD(v, co, nullify_flag, r1, r2, ci); 329 | break; 330 | case ADD_CO_SEVN: 331 | PERFORM_ADD_CO_SEVN(v, co, nullify_flag, r1, r2, ci); 332 | break; 333 | case ADD_CO_S: 334 | PERFORM_ADD_CO_S(v, co, nullify_flag, r1, r2, ci); 335 | break; 336 | case SUB_SEQ: 337 | PERFORM_SUB_SEQ(v, co, nullify_flag, r1, r2, ci); 338 | break; 339 | case SUB_SNE: 340 | PERFORM_SUB_SNE(v, co, nullify_flag, r1, r2, ci); 341 | break; 342 | case SUB_SLTS: 343 | PERFORM_SUB_SLTS(v, co, nullify_flag, r1, r2, ci); 344 | break; 345 | case SUB_SGES: 346 | PERFORM_SUB_SGES(v, co, nullify_flag, r1, r2, ci); 347 | break; 348 | case SUB_SLES: 349 | PERFORM_SUB_SLES(v, co, nullify_flag, r1, r2, ci); 350 | break; 351 | case SUB_SGTS: 352 | PERFORM_SUB_SGTS(v, co, nullify_flag, r1, r2, ci); 353 | break; 354 | case SUB_SODD: 355 | PERFORM_SUB_SODD(v, co, nullify_flag, r1, r2, ci); 356 | break; 357 | case SUB_SEVN: 358 | PERFORM_SUB_SEVN(v, co, nullify_flag, r1, r2, ci); 359 | break; 360 | case SUB_S: 361 | PERFORM_SUB_S(v, co, nullify_flag, r1, r2, ci); 362 | break; 363 | case ADC_CIO_SEQ: 364 | PERFORM_ADC_CIO_SEQ(v, co, nullify_flag, r1, r2, ci); 365 | break; 366 | case ADC_CIO_SNE: 367 | PERFORM_ADC_CIO_SNE(v, co, nullify_flag, r1, r2, ci); 368 | break; 369 | case ADC_CIO_SLTU: 370 | PERFORM_ADC_CIO_SLTU(v, co, nullify_flag, r1, r2, ci); 371 | break; 372 | case ADC_CIO_SGEU: 373 | PERFORM_ADC_CIO_SGEU(v, co, nullify_flag, r1, r2, ci); 374 | break; 375 | case ADC_CIO_SLEU: 376 | PERFORM_ADC_CIO_SLEU(v, co, nullify_flag, r1, r2, ci); 377 | break; 378 | case ADC_CIO_SGTU: 379 | PERFORM_ADC_CIO_SGTU(v, co, nullify_flag, r1, r2, ci); 380 | break; 381 | case ADC_CIO_SODD: 382 | PERFORM_ADC_CIO_SODD(v, co, nullify_flag, r1, r2, ci); 383 | break; 384 | case ADC_CIO_SEVN: 385 | PERFORM_ADC_CIO_SEVN(v, co, nullify_flag, r1, r2, ci); 386 | break; 387 | case ADC_CIO_S: 388 | PERFORM_ADC_CIO_S(v, co, nullify_flag, r1, r2, ci); 389 | break; 390 | case ADC_CO_SEQ: 391 | PERFORM_ADC_CO_SEQ(v, co, nullify_flag, r1, r2, ci); 392 | break; 393 | case ADC_CO_SNE: 394 | PERFORM_ADC_CO_SNE(v, co, nullify_flag, r1, r2, ci); 395 | break; 396 | case ADC_CO_SLTU: 397 | PERFORM_ADC_CO_SLTU(v, co, nullify_flag, r1, r2, ci); 398 | break; 399 | case ADC_CO_SGEU: 400 | PERFORM_ADC_CO_SGEU(v, co, nullify_flag, r1, r2, ci); 401 | break; 402 | case ADC_CO_SLEU: 403 | PERFORM_ADC_CO_SLEU(v, co, nullify_flag, r1, r2, ci); 404 | break; 405 | case ADC_CO_SGTU: 406 | PERFORM_ADC_CO_SGTU(v, co, nullify_flag, r1, r2, ci); 407 | break; 408 | case ADC_CO_SODD: 409 | PERFORM_ADC_CO_SODD(v, co, nullify_flag, r1, r2, ci); 410 | break; 411 | case ADC_CO_SEVN: 412 | PERFORM_ADC_CO_SEVN(v, co, nullify_flag, r1, r2, ci); 413 | break; 414 | case ADC_CO_S: 415 | PERFORM_ADC_CO_S(v, co, nullify_flag, r1, r2, ci); 416 | break; 417 | 418 | case COMCLR_SEQ: 419 | PERFORM_COMCLR_SEQ(v, co, nullify_flag, r1, r2, ci); 420 | break; 421 | case COMCLR_SNE: 422 | PERFORM_COMCLR_SNE(v, co, nullify_flag, r1, r2, ci); 423 | break; 424 | case COMCLR_SLTS: 425 | PERFORM_COMCLR_SLTS(v, co, nullify_flag, r1, r2, ci); 426 | break; 427 | case COMCLR_SGES: 428 | PERFORM_COMCLR_SGES(v, co, nullify_flag, r1, r2, ci); 429 | break; 430 | case COMCLR_SLES: 431 | PERFORM_COMCLR_SLES(v, co, nullify_flag, r1, r2, ci); 432 | break; 433 | case COMCLR_SGTS: 434 | PERFORM_COMCLR_SGTS(v, co, nullify_flag, r1, r2, ci); 435 | break; 436 | case COMCLR_SLTU: 437 | PERFORM_COMCLR_SLTU(v, co, nullify_flag, r1, r2, ci); 438 | break; 439 | case COMCLR_SGEU: 440 | PERFORM_COMCLR_SGEU(v, co, nullify_flag, r1, r2, ci); 441 | break; 442 | case COMCLR_SLEU: 443 | PERFORM_COMCLR_SLEU(v, co, nullify_flag, r1, r2, ci); 444 | break; 445 | case COMCLR_SGTU: 446 | PERFORM_COMCLR_SGTU(v, co, nullify_flag, r1, r2, ci); 447 | break; 448 | case COMCLR_SODD: 449 | PERFORM_COMCLR_SODD(v, co, nullify_flag, r1, r2, ci); 450 | break; 451 | case COMCLR_SEVN: 452 | PERFORM_COMCLR_SEVN(v, co, nullify_flag, r1, r2, ci); 453 | break; 454 | 455 | case AND_SEQ: 456 | PERFORM_AND_SEQ(v, co, nullify_flag, r1, r2, ci); 457 | break; 458 | case AND_SNE: 459 | PERFORM_AND_SNE(v, co, nullify_flag, r1, r2, ci); 460 | break; 461 | case AND_SLTS: 462 | PERFORM_AND_SLTS(v, co, nullify_flag, r1, r2, ci); 463 | break; 464 | case AND_SGES: 465 | PERFORM_AND_SGES(v, co, nullify_flag, r1, r2, ci); 466 | break; 467 | case AND_SLES: 468 | PERFORM_AND_SLES(v, co, nullify_flag, r1, r2, ci); 469 | break; 470 | case AND_SGTS: 471 | PERFORM_AND_SGTS(v, co, nullify_flag, r1, r2, ci); 472 | break; 473 | case AND_SODD: 474 | PERFORM_AND_SODD(v, co, nullify_flag, r1, r2, ci); 475 | break; 476 | case AND_SEVN: 477 | PERFORM_AND_SEVN(v, co, nullify_flag, r1, r2, ci); 478 | break; 479 | case AND_S: 480 | PERFORM_AND_S(v, co, nullify_flag, r1, r2, ci); 481 | break; 482 | case IOR_SEQ: 483 | PERFORM_IOR_SEQ(v, co, nullify_flag, r1, r2, ci); 484 | break; 485 | case IOR_SNE: 486 | PERFORM_IOR_SNE(v, co, nullify_flag, r1, r2, ci); 487 | break; 488 | case IOR_SLTS: 489 | PERFORM_IOR_SLTS(v, co, nullify_flag, r1, r2, ci); 490 | break; 491 | case IOR_SGES: 492 | PERFORM_IOR_SGES(v, co, nullify_flag, r1, r2, ci); 493 | break; 494 | case IOR_SLES: 495 | PERFORM_IOR_SLES(v, co, nullify_flag, r1, r2, ci); 496 | break; 497 | case IOR_SGTS: 498 | PERFORM_IOR_SGTS(v, co, nullify_flag, r1, r2, ci); 499 | break; 500 | case IOR_SODD: 501 | PERFORM_IOR_SODD(v, co, nullify_flag, r1, r2, ci); 502 | break; 503 | case IOR_SEVN: 504 | PERFORM_IOR_SEVN(v, co, nullify_flag, r1, r2, ci); 505 | break; 506 | case IOR_S: 507 | PERFORM_IOR_S(v, co, nullify_flag, r1, r2, ci); 508 | break; 509 | case XOR_SEQ: 510 | PERFORM_XOR_SEQ(v, co, nullify_flag, r1, r2, ci); 511 | break; 512 | case XOR_SNE: 513 | PERFORM_XOR_SNE(v, co, nullify_flag, r1, r2, ci); 514 | break; 515 | case XOR_SLTS: 516 | PERFORM_XOR_SLTS(v, co, nullify_flag, r1, r2, ci); 517 | break; 518 | case XOR_SGES: 519 | PERFORM_XOR_SGES(v, co, nullify_flag, r1, r2, ci); 520 | break; 521 | case XOR_SLES: 522 | PERFORM_XOR_SLES(v, co, nullify_flag, r1, r2, ci); 523 | break; 524 | case XOR_SGTS: 525 | PERFORM_XOR_SGTS(v, co, nullify_flag, r1, r2, ci); 526 | break; 527 | case XOR_SODD: 528 | PERFORM_XOR_SODD(v, co, nullify_flag, r1, r2, ci); 529 | break; 530 | case XOR_SEVN: 531 | PERFORM_XOR_SEVN(v, co, nullify_flag, r1, r2, ci); 532 | break; 533 | case XOR_S: 534 | PERFORM_XOR_S(v, co, nullify_flag, r1, r2, ci); 535 | break; 536 | case ANDC_SEQ: 537 | PERFORM_ANDC_SEQ(v, co, nullify_flag, r1, r2, ci); 538 | break; 539 | case ANDC_SNE: 540 | PERFORM_ANDC_SNE(v, co, nullify_flag, r1, r2, ci); 541 | break; 542 | case ANDC_SLTS: 543 | PERFORM_ANDC_SLTS(v, co, nullify_flag, r1, r2, ci); 544 | break; 545 | case ANDC_SGES: 546 | PERFORM_ANDC_SGES(v, co, nullify_flag, r1, r2, ci); 547 | break; 548 | case ANDC_SLES: 549 | PERFORM_ANDC_SLES(v, co, nullify_flag, r1, r2, ci); 550 | break; 551 | case ANDC_SGTS: 552 | PERFORM_ANDC_SGTS(v, co, nullify_flag, r1, r2, ci); 553 | break; 554 | case ANDC_SODD: 555 | PERFORM_ANDC_SODD(v, co, nullify_flag, r1, r2, ci); 556 | break; 557 | case ANDC_SEVN: 558 | PERFORM_ANDC_SEVN(v, co, nullify_flag, r1, r2, ci); 559 | break; 560 | case ANDC_S: 561 | PERFORM_ANDC_S(v, co, nullify_flag, r1, r2, ci); 562 | break; 563 | case LSHIFTR_S: 564 | PERFORM_LSHIFTR_S(v, co, nullify_flag, r1, r2, ci); 565 | break; 566 | case ASHIFTR_S: 567 | PERFORM_ASHIFTR_S(v, co, nullify_flag, r1, r2, ci); 568 | break; 569 | case SHIFTL_S: 570 | PERFORM_SHIFTL_S(v, co, nullify_flag, r1, r2, ci); 571 | break; 572 | case ROTATEL_S: 573 | PERFORM_ROTATEL_S(v, co, nullify_flag, r1, r2, ci); 574 | break; 575 | case EXTS1_S: 576 | PERFORM_EXTS1_S(v, co, nullify_flag, r1, r2, ci); 577 | break; 578 | case EXTS2_S: 579 | PERFORM_EXTS2_S(v, co, nullify_flag, r1, r2, ci); 580 | break; 581 | case EXTS8_S: 582 | PERFORM_EXTS8_S(v, co, nullify_flag, r1, r2, ci); 583 | break; 584 | case EXTS16_S: 585 | PERFORM_EXTS16_S(v, co, nullify_flag, r1, r2, ci); 586 | break; 587 | case EXTU1_S: 588 | PERFORM_EXTU1_S(v, co, nullify_flag, r1, r2, ci); 589 | break; 590 | case EXTU2_S: 591 | PERFORM_EXTU2_S(v, co, nullify_flag, r1, r2, ci); 592 | break; 593 | case COPY_S: 594 | PERFORM_COPY_S(v, co, nullify_flag, r1, ci); 595 | break; 596 | 597 | #endif /* HAS_NULLIFICATION */ 598 | 599 | case ADDC_960: PERFORM_ADDC_960(v, co, r1, r2, ci); break; 600 | case SUBC_960: PERFORM_SUBC_960(v, co, r1, r2, ci); break; 601 | case SEL_NO_960: PERFORM_SEL_NO_960(v, co, r1, r2, ci); break; 602 | case SEL_G_960: PERFORM_SEL_G_960(v, co, r1, r2, ci); break; 603 | case SEL_E_960: PERFORM_SEL_E_960(v, co, r1, r2, ci); break; 604 | case SEL_GE_960: PERFORM_SEL_GE_960(v, co, r1, r2, ci); break; 605 | case SEL_L_960: PERFORM_SEL_L_960(v, co, r1, r2, ci); break; 606 | case SEL_NE_960: PERFORM_SEL_NE_960(v, co, r1, r2, ci); break; 607 | case SEL_LE_960: PERFORM_SEL_LE_960(v, co, r1, r2, ci); break; 608 | case SEL_O_960: PERFORM_SEL_O_960(v, co, r1, r2, ci); break; 609 | case CONCMPO_960: PERFORM_CONCMPO_960(v, co, r1, r2, ci); break; 610 | case CONCMPI_960: PERFORM_CONCMPI_960(v, co, r1, r2, ci); break; 611 | case CMPO_960: PERFORM_CMPO_960(v, co, r1, r2, ci); break; 612 | case CMPI_960: PERFORM_CMPI_960(v, co, r1, r2, ci); break; 613 | case SHIFTL_NT: PERFORM_SHIFTL_NT(v, co, r1, r2, ci); break; 614 | case LSHIFTR_NT: PERFORM_LSHIFTR_NT(v, co, r1, r2, ci); break; 615 | case ASHIFTR_NT: PERFORM_ASHIFTR_NT(v, co, r1, r2, ci); break; 616 | case ADDO_NO_960: 617 | v = regs[insn.d]; 618 | PERFORM_ADDO_NO_960(v, co, r1, r2, ci); 619 | break; 620 | case ADDO_G_960: 621 | v = regs[insn.d]; 622 | PERFORM_ADDO_G_960(v, co, r1, r2, ci); 623 | break; 624 | case ADDO_E_960: 625 | v = regs[insn.d]; 626 | PERFORM_ADDO_E_960(v, co, r1, r2, ci); 627 | break; 628 | case ADDO_GE_960: 629 | v = regs[insn.d]; 630 | PERFORM_ADDO_GE_960(v, co, r1, r2, ci); 631 | break; 632 | case ADDO_L_960: 633 | v = regs[insn.d]; 634 | PERFORM_ADDO_L_960(v, co, r1, r2, ci); 635 | break; 636 | case ADDO_NE_960: 637 | v = regs[insn.d]; 638 | PERFORM_ADDO_NE_960(v, co, r1, r2, ci); 639 | break; 640 | case ADDO_LE_960: 641 | v = regs[insn.d]; 642 | PERFORM_ADDO_LE_960(v, co, r1, r2, ci); 643 | break; 644 | case ADDO_O_960: 645 | v = regs[insn.d]; 646 | PERFORM_ADDO_O_960(v, co, r1, r2, ci); 647 | break; 648 | case SUBO_NO_960: 649 | v = regs[insn.d]; 650 | PERFORM_SUBO_NO_960(v, co, r1, r2, ci); 651 | break; 652 | case SUBO_G_960: 653 | v = regs[insn.d]; 654 | PERFORM_SUBO_G_960(v, co, r1, r2, ci); 655 | break; 656 | case SUBO_E_960: 657 | v = regs[insn.d]; 658 | PERFORM_SUBO_E_960(v, co, r1, r2, ci); 659 | break; 660 | case SUBO_GE_960: 661 | v = regs[insn.d]; 662 | PERFORM_SUBO_GE_960(v, co, r1, r2, ci); 663 | break; 664 | case SUBO_L_960: 665 | v = regs[insn.d]; 666 | PERFORM_SUBO_L_960(v, co, r1, r2, ci); 667 | break; 668 | case SUBO_NE_960: 669 | v = regs[insn.d]; 670 | PERFORM_SUBO_NE_960(v, co, r1, r2, ci); 671 | break; 672 | case SUBO_LE_960: 673 | v = regs[insn.d]; 674 | PERFORM_SUBO_LE_960(v, co, r1, r2, ci); 675 | break; 676 | case SUBO_O_960: 677 | v = regs[insn.d]; 678 | PERFORM_SUBO_O_960(v, co, r1, r2, ci); 679 | break; 680 | 681 | case ALTERBIT: PERFORM_ALTERBIT(v, co, r1, r2, ci); break; 682 | case SETBIT: PERFORM_SETBIT(v, co, r1, r2, ci); break; 683 | case CLRBIT: PERFORM_CLRBIT(v, co, r1, r2, ci); break; 684 | case CHKBIT: PERFORM_CHKBIT(v, co, r1, r2, ci); break; 685 | case NOTBIT: PERFORM_NOTBIT(v, co, r1, r2, ci); break; 686 | 687 | #ifdef UDIV_WITH_SDIV 688 | case SDIV: PERFORM_SDIV(v, co, r1, r2, ci); break; 689 | #endif 690 | } 691 | 692 | #if HAS_NULLIFICATION 693 | reg_defined[insn.d] = 1; 694 | #endif 695 | /* Store result. */ 696 | regs[insn.d] = v; 697 | ci = co; 698 | } 699 | 700 | #if HAS_NULLIFICATION 701 | /* Check if the destination has become defined for the current arguments. */ 702 | if (!reg_defined[insn.d]) 703 | return -2; 704 | #endif 705 | 706 | return ci; 707 | } 708 | -------------------------------------------------------------------------------- /version.h: -------------------------------------------------------------------------------- 1 | char *version_string = "2.5"; 2 | --------------------------------------------------------------------------------