├── .gitignore ├── .gitmodules ├── Binopt.sublime-project ├── LICENSE ├── README.md ├── benchmarks ├── meson.build └── stencil.c ├── examples ├── div.c ├── fastexp.c ├── indcall-spec.c ├── indcall.c ├── meson.build ├── polynom.c ├── polynom2.c └── simple.c ├── include ├── binopt-config.h ├── binopt.h └── meson.build ├── meson.build ├── meson_options.txt ├── rewriter ├── dbll │ ├── ConstMemProp.cc │ ├── ConstMemProp.h │ ├── Logging.h │ ├── LowerNativeCall.cc │ ├── LowerNativeCall.h │ ├── PtrToIntFold.cc │ ├── PtrToIntFold.h │ ├── dbll.cc │ └── meson.build ├── dbrew │ ├── dbrew.c │ └── meson.build ├── drob │ ├── drob.c │ └── meson.build └── meson.build ├── src ├── default.c └── meson.build └── tests ├── alignment.c ├── call.c ├── common.h ├── constmem.c ├── indjmp-dynamic.c ├── indjmp-static.c ├── manyparams.c ├── memory.c ├── meson.build ├── simple.c └── writablemem.c /.gitignore: -------------------------------------------------------------------------------- 1 | /build* 2 | *.sublime-workspace 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "subprojects/dbrew"] 2 | path = subprojects/dbrew 3 | url = https://github.com/caps-tum/dbrew.git 4 | [submodule "subprojects/fadec"] 5 | path = subprojects/fadec 6 | url = https://github.com/aengelke/fadec.git 7 | [submodule "subprojects/rellume"] 8 | path = subprojects/rellume 9 | url = https://github.com/aengelke/rellume.git 10 | [submodule "subprojects/drob"] 11 | path = subprojects/drob 12 | url = https://github.com/davidhildenbrand/drob.git 13 | -------------------------------------------------------------------------------- /Binopt.sublime-project: -------------------------------------------------------------------------------- 1 | { 2 | "folders": 3 | [ 4 | { 5 | "path": ".", 6 | "folder_exclude_patterns": ["build*", "install", "subprojects/*"], 7 | } 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 2.1, February 1999 3 | 4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | [This is the first released version of the Lesser GPL. It also counts 10 | as the successor of the GNU Library Public License, version 2, hence 11 | the version number 2.1.] 12 | 13 | Preamble 14 | 15 | The licenses for most software are designed to take away your 16 | freedom to share and change it. By contrast, the GNU General Public 17 | Licenses are intended to guarantee your freedom to share and change 18 | free software--to make sure the software is free for all its users. 19 | 20 | This license, the Lesser General Public License, applies to some 21 | specially designated software packages--typically libraries--of the 22 | Free Software Foundation and other authors who decide to use it. You 23 | can use it too, but we suggest you first think carefully about whether 24 | this license or the ordinary General Public License is the better 25 | strategy to use in any particular case, based on the explanations below. 26 | 27 | When we speak of free software, we are referring to freedom of use, 28 | not price. Our General Public Licenses are designed to make sure that 29 | you have the freedom to distribute copies of free software (and charge 30 | for this service if you wish); that you receive source code or can get 31 | it if you want it; that you can change the software and use pieces of 32 | it in new free programs; and that you are informed that you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid 36 | distributors to deny you these rights or to ask you to surrender these 37 | rights. These restrictions translate to certain responsibilities for 38 | you if you distribute copies of the library or if you modify it. 39 | 40 | For example, if you distribute copies of the library, whether gratis 41 | or for a fee, you must give the recipients all the rights that we gave 42 | you. You must make sure that they, too, receive or can get the source 43 | code. If you link other code with the library, you must provide 44 | complete object files to the recipients, so that they can relink them 45 | with the library after making changes to the library and recompiling 46 | it. And you must show them these terms so they know their rights. 47 | 48 | We protect your rights with a two-step method: (1) we copyright the 49 | library, and (2) we offer you this license, which gives you legal 50 | permission to copy, distribute and/or modify the library. 51 | 52 | To protect each distributor, we want to make it very clear that 53 | there is no warranty for the free library. Also, if the library is 54 | modified by someone else and passed on, the recipients should know 55 | that what they have is not the original version, so that the original 56 | author's reputation will not be affected by problems that might be 57 | introduced by others. 58 | 59 | Finally, software patents pose a constant threat to the existence of 60 | any free program. We wish to make sure that a company cannot 61 | effectively restrict the users of a free program by obtaining a 62 | restrictive license from a patent holder. Therefore, we insist that 63 | any patent license obtained for a version of the library must be 64 | consistent with the full freedom of use specified in this license. 65 | 66 | Most GNU software, including some libraries, is covered by the 67 | ordinary GNU General Public License. This license, the GNU Lesser 68 | General Public License, applies to certain designated libraries, and 69 | is quite different from the ordinary General Public License. We use 70 | this license for certain libraries in order to permit linking those 71 | libraries into non-free programs. 72 | 73 | When a program is linked with a library, whether statically or using 74 | a shared library, the combination of the two is legally speaking a 75 | combined work, a derivative of the original library. The ordinary 76 | General Public License therefore permits such linking only if the 77 | entire combination fits its criteria of freedom. The Lesser General 78 | Public License permits more lax criteria for linking other code with 79 | the library. 80 | 81 | We call this license the "Lesser" General Public License because it 82 | does Less to protect the user's freedom than the ordinary General 83 | Public License. It also provides other free software developers Less 84 | of an advantage over competing non-free programs. These disadvantages 85 | are the reason we use the ordinary General Public License for many 86 | libraries. However, the Lesser license provides advantages in certain 87 | special circumstances. 88 | 89 | For example, on rare occasions, there may be a special need to 90 | encourage the widest possible use of a certain library, so that it becomes 91 | a de-facto standard. To achieve this, non-free programs must be 92 | allowed to use the library. A more frequent case is that a free 93 | library does the same job as widely used non-free libraries. In this 94 | case, there is little to gain by limiting the free library to free 95 | software only, so we use the Lesser General Public License. 96 | 97 | In other cases, permission to use a particular library in non-free 98 | programs enables a greater number of people to use a large body of 99 | free software. For example, permission to use the GNU C Library in 100 | non-free programs enables many more people to use the whole GNU 101 | operating system, as well as its variant, the GNU/Linux operating 102 | system. 103 | 104 | Although the Lesser General Public License is Less protective of the 105 | users' freedom, it does ensure that the user of a program that is 106 | linked with the Library has the freedom and the wherewithal to run 107 | that program using a modified version of the Library. 108 | 109 | The precise terms and conditions for copying, distribution and 110 | modification follow. Pay close attention to the difference between a 111 | "work based on the library" and a "work that uses the library". The 112 | former contains code derived from the library, whereas the latter must 113 | be combined with the library in order to run. 114 | 115 | GNU LESSER GENERAL PUBLIC LICENSE 116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 117 | 118 | 0. This License Agreement applies to any software library or other 119 | program which contains a notice placed by the copyright holder or 120 | other authorized party saying it may be distributed under the terms of 121 | this Lesser General Public License (also called "this License"). 122 | Each licensee is addressed as "you". 123 | 124 | A "library" means a collection of software functions and/or data 125 | prepared so as to be conveniently linked with application programs 126 | (which use some of those functions and data) to form executables. 127 | 128 | The "Library", below, refers to any such software library or work 129 | which has been distributed under these terms. A "work based on the 130 | Library" means either the Library or any derivative work under 131 | copyright law: that is to say, a work containing the Library or a 132 | portion of it, either verbatim or with modifications and/or translated 133 | straightforwardly into another language. (Hereinafter, translation is 134 | included without limitation in the term "modification".) 135 | 136 | "Source code" for a work means the preferred form of the work for 137 | making modifications to it. For a library, complete source code means 138 | all the source code for all modules it contains, plus any associated 139 | interface definition files, plus the scripts used to control compilation 140 | and installation of the library. 141 | 142 | Activities other than copying, distribution and modification are not 143 | covered by this License; they are outside its scope. The act of 144 | running a program using the Library is not restricted, and output from 145 | such a program is covered only if its contents constitute a work based 146 | on the Library (independent of the use of the Library in a tool for 147 | writing it). Whether that is true depends on what the Library does 148 | and what the program that uses the Library does. 149 | 150 | 1. You may copy and distribute verbatim copies of the Library's 151 | complete source code as you receive it, in any medium, provided that 152 | you conspicuously and appropriately publish on each copy an 153 | appropriate copyright notice and disclaimer of warranty; keep intact 154 | all the notices that refer to this License and to the absence of any 155 | warranty; and distribute a copy of this License along with the 156 | Library. 157 | 158 | You may charge a fee for the physical act of transferring a copy, 159 | and you may at your option offer warranty protection in exchange for a 160 | fee. 161 | 162 | 2. You may modify your copy or copies of the Library or any portion 163 | of it, thus forming a work based on the Library, and copy and 164 | distribute such modifications or work under the terms of Section 1 165 | above, provided that you also meet all of these conditions: 166 | 167 | a) The modified work must itself be a software library. 168 | 169 | b) You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | 172 | c) You must cause the whole of the work to be licensed at no 173 | charge to all third parties under the terms of this License. 174 | 175 | d) If a facility in the modified Library refers to a function or a 176 | table of data to be supplied by an application program that uses 177 | the facility, other than as an argument passed when the facility 178 | is invoked, then you must make a good faith effort to ensure that, 179 | in the event an application does not supply such function or 180 | table, the facility still operates, and performs whatever part of 181 | its purpose remains meaningful. 182 | 183 | (For example, a function in a library to compute square roots has 184 | a purpose that is entirely well-defined independent of the 185 | application. Therefore, Subsection 2d requires that any 186 | application-supplied function or table used by this function must 187 | be optional: if the application does not supply it, the square 188 | root function must still compute square roots.) 189 | 190 | These requirements apply to the modified work as a whole. If 191 | identifiable sections of that work are not derived from the Library, 192 | and can be reasonably considered independent and separate works in 193 | themselves, then this License, and its terms, do not apply to those 194 | sections when you distribute them as separate works. But when you 195 | distribute the same sections as part of a whole which is a work based 196 | on the Library, the distribution of the whole must be on the terms of 197 | this License, whose permissions for other licensees extend to the 198 | entire whole, and thus to each and every part regardless of who wrote 199 | it. 200 | 201 | Thus, it is not the intent of this section to claim rights or contest 202 | your rights to work written entirely by you; rather, the intent is to 203 | exercise the right to control the distribution of derivative or 204 | collective works based on the Library. 205 | 206 | In addition, mere aggregation of another work not based on the Library 207 | with the Library (or with a work based on the Library) on a volume of 208 | a storage or distribution medium does not bring the other work under 209 | the scope of this License. 210 | 211 | 3. You may opt to apply the terms of the ordinary GNU General Public 212 | License instead of this License to a given copy of the Library. To do 213 | this, you must alter all the notices that refer to this License, so 214 | that they refer to the ordinary GNU General Public License, version 2, 215 | instead of to this License. (If a newer version than version 2 of the 216 | ordinary GNU General Public License has appeared, then you can specify 217 | that version instead if you wish.) Do not make any other change in 218 | these notices. 219 | 220 | Once this change is made in a given copy, it is irreversible for 221 | that copy, so the ordinary GNU General Public License applies to all 222 | subsequent copies and derivative works made from that copy. 223 | 224 | This option is useful when you wish to copy part of the code of 225 | the Library into a program that is not a library. 226 | 227 | 4. You may copy and distribute the Library (or a portion or 228 | derivative of it, under Section 2) in object code or executable form 229 | under the terms of Sections 1 and 2 above provided that you accompany 230 | it with the complete corresponding machine-readable source code, which 231 | must be distributed under the terms of Sections 1 and 2 above on a 232 | medium customarily used for software interchange. 233 | 234 | If distribution of object code is made by offering access to copy 235 | from a designated place, then offering equivalent access to copy the 236 | source code from the same place satisfies the requirement to 237 | distribute the source code, even though third parties are not 238 | compelled to copy the source along with the object code. 239 | 240 | 5. A program that contains no derivative of any portion of the 241 | Library, but is designed to work with the Library by being compiled or 242 | linked with it, is called a "work that uses the Library". Such a 243 | work, in isolation, is not a derivative work of the Library, and 244 | therefore falls outside the scope of this License. 245 | 246 | However, linking a "work that uses the Library" with the Library 247 | creates an executable that is a derivative of the Library (because it 248 | contains portions of the Library), rather than a "work that uses the 249 | library". The executable is therefore covered by this License. 250 | Section 6 states terms for distribution of such executables. 251 | 252 | When a "work that uses the Library" uses material from a header file 253 | that is part of the Library, the object code for the work may be a 254 | derivative work of the Library even though the source code is not. 255 | Whether this is true is especially significant if the work can be 256 | linked without the Library, or if the work is itself a library. The 257 | threshold for this to be true is not precisely defined by law. 258 | 259 | If such an object file uses only numerical parameters, data 260 | structure layouts and accessors, and small macros and small inline 261 | functions (ten lines or less in length), then the use of the object 262 | file is unrestricted, regardless of whether it is legally a derivative 263 | work. (Executables containing this object code plus portions of the 264 | Library will still fall under Section 6.) 265 | 266 | Otherwise, if the work is a derivative of the Library, you may 267 | distribute the object code for the work under the terms of Section 6. 268 | Any executables containing that work also fall under Section 6, 269 | whether or not they are linked directly with the Library itself. 270 | 271 | 6. As an exception to the Sections above, you may also combine or 272 | link a "work that uses the Library" with the Library to produce a 273 | work containing portions of the Library, and distribute that work 274 | under terms of your choice, provided that the terms permit 275 | modification of the work for the customer's own use and reverse 276 | engineering for debugging such modifications. 277 | 278 | You must give prominent notice with each copy of the work that the 279 | Library is used in it and that the Library and its use are covered by 280 | this License. You must supply a copy of this License. If the work 281 | during execution displays copyright notices, you must include the 282 | copyright notice for the Library among them, as well as a reference 283 | directing the user to the copy of this License. Also, you must do one 284 | of these things: 285 | 286 | a) Accompany the work with the complete corresponding 287 | machine-readable source code for the Library including whatever 288 | changes were used in the work (which must be distributed under 289 | Sections 1 and 2 above); and, if the work is an executable linked 290 | with the Library, with the complete machine-readable "work that 291 | uses the Library", as object code and/or source code, so that the 292 | user can modify the Library and then relink to produce a modified 293 | executable containing the modified Library. (It is understood 294 | that the user who changes the contents of definitions files in the 295 | Library will not necessarily be able to recompile the application 296 | to use the modified definitions.) 297 | 298 | b) Use a suitable shared library mechanism for linking with the 299 | Library. A suitable mechanism is one that (1) uses at run time a 300 | copy of the library already present on the user's computer system, 301 | rather than copying library functions into the executable, and (2) 302 | will operate properly with a modified version of the library, if 303 | the user installs one, as long as the modified version is 304 | interface-compatible with the version that the work was made with. 305 | 306 | c) Accompany the work with a written offer, valid for at 307 | least three years, to give the same user the materials 308 | specified in Subsection 6a, above, for a charge no more 309 | than the cost of performing this distribution. 310 | 311 | d) If distribution of the work is made by offering access to copy 312 | from a designated place, offer equivalent access to copy the above 313 | specified materials from the same place. 314 | 315 | e) Verify that the user has already received a copy of these 316 | materials or that you have already sent this user a copy. 317 | 318 | For an executable, the required form of the "work that uses the 319 | Library" must include any data and utility programs needed for 320 | reproducing the executable from it. However, as a special exception, 321 | the materials to be distributed need not include anything that is 322 | normally distributed (in either source or binary form) with the major 323 | components (compiler, kernel, and so on) of the operating system on 324 | which the executable runs, unless that component itself accompanies 325 | the executable. 326 | 327 | It may happen that this requirement contradicts the license 328 | restrictions of other proprietary libraries that do not normally 329 | accompany the operating system. Such a contradiction means you cannot 330 | use both them and the Library together in an executable that you 331 | distribute. 332 | 333 | 7. You may place library facilities that are a work based on the 334 | Library side-by-side in a single library together with other library 335 | facilities not covered by this License, and distribute such a combined 336 | library, provided that the separate distribution of the work based on 337 | the Library and of the other library facilities is otherwise 338 | permitted, and provided that you do these two things: 339 | 340 | a) Accompany the combined library with a copy of the same work 341 | based on the Library, uncombined with any other library 342 | facilities. This must be distributed under the terms of the 343 | Sections above. 344 | 345 | b) Give prominent notice with the combined library of the fact 346 | that part of it is a work based on the Library, and explaining 347 | where to find the accompanying uncombined form of the same work. 348 | 349 | 8. You may not copy, modify, sublicense, link with, or distribute 350 | the Library except as expressly provided under this License. Any 351 | attempt otherwise to copy, modify, sublicense, link with, or 352 | distribute the Library is void, and will automatically terminate your 353 | rights under this License. However, parties who have received copies, 354 | or rights, from you under this License will not have their licenses 355 | terminated so long as such parties remain in full compliance. 356 | 357 | 9. You are not required to accept this License, since you have not 358 | signed it. However, nothing else grants you permission to modify or 359 | distribute the Library or its derivative works. These actions are 360 | prohibited by law if you do not accept this License. Therefore, by 361 | modifying or distributing the Library (or any work based on the 362 | Library), you indicate your acceptance of this License to do so, and 363 | all its terms and conditions for copying, distributing or modifying 364 | the Library or works based on it. 365 | 366 | 10. Each time you redistribute the Library (or any work based on the 367 | Library), the recipient automatically receives a license from the 368 | original licensor to copy, distribute, link with or modify the Library 369 | subject to these terms and conditions. You may not impose any further 370 | restrictions on the recipients' exercise of the rights granted herein. 371 | You are not responsible for enforcing compliance by third parties with 372 | this License. 373 | 374 | 11. If, as a consequence of a court judgment or allegation of patent 375 | infringement or for any other reason (not limited to patent issues), 376 | conditions are imposed on you (whether by court order, agreement or 377 | otherwise) that contradict the conditions of this License, they do not 378 | excuse you from the conditions of this License. If you cannot 379 | distribute so as to satisfy simultaneously your obligations under this 380 | License and any other pertinent obligations, then as a consequence you 381 | may not distribute the Library at all. For example, if a patent 382 | license would not permit royalty-free redistribution of the Library by 383 | all those who receive copies directly or indirectly through you, then 384 | the only way you could satisfy both it and this License would be to 385 | refrain entirely from distribution of the Library. 386 | 387 | If any portion of this section is held invalid or unenforceable under any 388 | particular circumstance, the balance of the section is intended to apply, 389 | and the section as a whole is intended to apply in other circumstances. 390 | 391 | It is not the purpose of this section to induce you to infringe any 392 | patents or other property right claims or to contest validity of any 393 | such claims; this section has the sole purpose of protecting the 394 | integrity of the free software distribution system which is 395 | implemented by public license practices. Many people have made 396 | generous contributions to the wide range of software distributed 397 | through that system in reliance on consistent application of that 398 | system; it is up to the author/donor to decide if he or she is willing 399 | to distribute software through any other system and a licensee cannot 400 | impose that choice. 401 | 402 | This section is intended to make thoroughly clear what is believed to 403 | be a consequence of the rest of this License. 404 | 405 | 12. If the distribution and/or use of the Library is restricted in 406 | certain countries either by patents or by copyrighted interfaces, the 407 | original copyright holder who places the Library under this License may add 408 | an explicit geographical distribution limitation excluding those countries, 409 | so that distribution is permitted only in or among countries not thus 410 | excluded. In such case, this License incorporates the limitation as if 411 | written in the body of this License. 412 | 413 | 13. The Free Software Foundation may publish revised and/or new 414 | versions of the Lesser General Public License from time to time. 415 | Such new versions will be similar in spirit to the present version, 416 | but may differ in detail to address new problems or concerns. 417 | 418 | Each version is given a distinguishing version number. If the Library 419 | specifies a version number of this License which applies to it and 420 | "any later version", you have the option of following the terms and 421 | conditions either of that version or of any later version published by 422 | the Free Software Foundation. If the Library does not specify a 423 | license version number, you may choose any version ever published by 424 | the Free Software Foundation. 425 | 426 | 14. If you wish to incorporate parts of the Library into other free 427 | programs whose distribution conditions are incompatible with these, 428 | write to the author to ask for permission. For software which is 429 | copyrighted by the Free Software Foundation, write to the Free 430 | Software Foundation; we sometimes make exceptions for this. Our 431 | decision will be guided by the two goals of preserving the free status 432 | of all derivatives of our free software and of promoting the sharing 433 | and reuse of software generally. 434 | 435 | NO WARRANTY 436 | 437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 446 | 447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 456 | DAMAGES. 457 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BinOpt — A Library for Self-guided Runtime Binary Optimization 2 | 3 | This library enables explicit optimization of compiled (x86-64) machine code at runtime. The optimization is controlled directly by the application itself (*self-guided*): the application can specify a function to optimize, and a new function with the same interface will be generated, which can be used seamlessly instead of the original function. There are two types of specializations: (1) fixation of function parameters (including pointers), and (2) marking memory regions as constant. An optimizer may honor such configurations to derive more optimized code for the given constraints. Examples of possible optimizations are, constant propagation, dead code removal, and loop unrolling. 4 | 5 | ### Example 6 | 7 | Consider the following example (see [examples/simple.c](examples/simple.c)): 8 | 9 | ```c 10 | static int func(int a, int b) { 11 | return a * b; 12 | } 13 | 14 | int main(int argc, char** argv) { 15 | // Create a new handle into the rewriter 16 | BinoptHandle boh = binopt_init(); 17 | // Create a new function configuration 18 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 19 | // Configure the parameter types (return value, 2 parameters) 20 | binopt_cfg_type(bcfg, 2, BINOPT_TY_INT32, BINOPT_TY_INT32, BINOPT_TY_INT32); 21 | // Set parameter 2 to the constant integer 42 22 | binopt_cfg_set_parami(bcfg, 1, 42); 23 | 24 | int (* new_func)(int, int); 25 | // Optimize the code; using additional knowledge of the configuration 26 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 27 | 28 | int res = new_func(8, 16); // just for demonstration, always call with 42! 29 | // If the constant was propagated, this will print 336 instead of 128. 30 | printf("8 * 16(42) = %d\n", res); 31 | 32 | return 0; 33 | } 34 | ``` 35 | 36 | ### Rewriting Approaches 37 | 38 | In fact, this repository mostly provides a unified API for applications allowing the actual implementation of the optimizer to be changed transparently. The default implementation just returns the original function without any modifications. Other rewriters perform significantly deeper code transformations. The rewriter can be switched using the `LD_PRELOAD` environment variable — simply preload the rewriter you want to use. The following optimizers with the unified API are currently implemented: 39 | 40 | - [DBLL](rewriter/dbll): An LLVM-based binary specializer, which lifts the original function to LLVM-IR using [Rellume](https://github.com/aengelke/rellume), performs optimizations at LLVM-IR level, and generates new code using the LLVM MCJIT compiler. This is the rewriter with the highest instruction coverage supporting most of x86-64, excluding indirect jumps and function calls; and the x87 FPU, MMX, SSE3+, AVX. 41 | - [Drob](https://github.com/davidhildenbrand/drob): A low-level rewriter focusing on lower rewriting times while still doing whole function analyses and optimizations. While functions with unknown instructions are supported, optimization possibilities are limited in such cases. 42 | - [DBrew](https://github.com/caps-tum/dbrew): A tracing binary rewriter with emphasis on compile-time performance while also doing unlimited loop unrolling and inlining. This has the most limited scope and instruction coverage. Also, due to massive unrolling of loops with known bounds, code buffer sizes may be exceeded unintentionally. 43 | 44 | ### License 45 | 46 | This project is originally written and maintained by [Alexis Engelke](https://www.in.tum.de/caps/mitarbeiter/engelke/). All code in this repository is licensed under LGPLv2.1+. 47 | -------------------------------------------------------------------------------- /benchmarks/meson.build: -------------------------------------------------------------------------------- 1 | # Force -O3 and disable sanitizers for benchmarks 2 | benchmark_opts = ['-O3', '-fno-sanitize=all', '-D_POSIX_C_SOURCE=199309L', '-march=native', '-mno-avx'] 3 | executable('stencil', 'stencil.c', dependencies: [binopt], 4 | c_args: benchmark_opts + ['-ffast-math']) 5 | -------------------------------------------------------------------------------- /benchmarks/stencil.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | typedef struct { 13 | int64_t xdiff, ydiff; 14 | float factor; 15 | } StencilPoint; 16 | 17 | typedef struct { 18 | size_t points; 19 | const StencilPoint* p; 20 | } Stencil; 21 | 22 | typedef void(*StencilFunction)(const Stencil*, float* restrict, float* restrict, size_t); 23 | 24 | inline __attribute__((always_inline)) 25 | static void stencil_generic(const Stencil* stencil_par, float* restrict in_mat, 26 | float* restrict out_mat, size_t mat_size) { 27 | for (size_t y = 1; y < mat_size - 1; ++y) { 28 | for (size_t x = 1; x < mat_size - 1; ++x) { 29 | size_t index = y * mat_size + x; 30 | float res = 0; 31 | for(size_t i = 0; i < stencil_par->points; i++) { 32 | const StencilPoint* p = &stencil_par->p[i]; 33 | res += p->factor * in_mat[index + p->ydiff*mat_size + p->xdiff]; 34 | } 35 | out_mat[index] = res; 36 | } 37 | } 38 | } 39 | 40 | static const StencilPoint stencil_pts[4] = {{1, 0, 0.25}, {-1, 0, 0.25}, {0, 1, 0.25}, {0, -1, 0.25}}; 41 | static const Stencil stencil = {sizeof(stencil_pts)/sizeof(StencilPoint), stencil_pts}; 42 | 43 | static void stencil_special(const Stencil* stencil_par, float* restrict in_mat, 44 | float* restrict out_mat, size_t mat_size) { 45 | (void) stencil_par; 46 | stencil_generic(&stencil, in_mat, out_mat, mat_size); 47 | } 48 | 49 | static void init_matrix(size_t mat_size, float** matrix_in, float** matrix_out) { 50 | float* b = malloc(sizeof(float) * mat_size * mat_size); 51 | for (size_t i = 0; i < mat_size; i++) { 52 | for (size_t j = 0; j < mat_size; j++) { 53 | size_t index = i * mat_size + j; 54 | if (i == 0) // First Row 55 | b[index] = 1.0 - (j * 1.0 / (mat_size - 1)); 56 | else if (i == (mat_size - 1)) // Last Row 57 | b[index] = j * 1.0 / (mat_size - 1); 58 | else if (j == 0) // First Column 59 | b[index] = 1.0 - (i * 1.0 / (mat_size - 1)); 60 | else if (j == (mat_size - 1)) // Last Column 61 | b[index] = i * 1.0 / (mat_size - 1); 62 | else 63 | b[index] = 0; 64 | } 65 | } 66 | *matrix_out = malloc(sizeof(float) * mat_size * mat_size); 67 | memcpy(*matrix_out, b, sizeof(float) * mat_size * mat_size); 68 | 69 | *matrix_in = b; 70 | } 71 | 72 | static void print_matrix(size_t mat_size, float* matrix) { 73 | printf("Matrix:\n"); 74 | if (mat_size < 9 || ((mat_size - 9) & 7)) 75 | return; 76 | size_t stride = ((mat_size - 9) / 8) + 1; 77 | for (size_t y = 0; y < 9; y++) { 78 | for (size_t x = 0; x < 9; x++) { 79 | printf("%7.4f", matrix[y * stride * mat_size + x * stride]); 80 | } 81 | printf("\n"); 82 | } 83 | } 84 | 85 | static struct timespec time_get(void) { 86 | struct timespec ret; 87 | clock_gettime(CLOCK_MONOTONIC, &ret); 88 | return ret; 89 | } 90 | 91 | static double time_diff_secs(struct timespec* first, struct timespec* last) { 92 | time_t diff_secs = last->tv_sec - first->tv_sec; 93 | long diff_nsecs = last->tv_nsec - first->tv_nsec; 94 | return diff_secs + diff_nsecs * 1e-9; 95 | } 96 | 97 | int main(int argc, char** argv) { 98 | bool use_binopt = false; 99 | size_t run_count = 10000; 100 | size_t interlines = 20; 101 | 102 | int opt; 103 | while ((opt = getopt(argc, argv, "on:i:")) != -1) { 104 | switch (opt) { 105 | case 'o': use_binopt = true; break; 106 | case 'n': run_count = strtoul(optarg, NULL, 0); break; 107 | case 'i': interlines = strtoul(optarg, NULL, 0); break; 108 | default: 109 | fprintf(stderr, "usage: %s [-o] [-n run_count] [-i interlines]\n", argv[0]); 110 | exit(EXIT_FAILURE); 111 | } 112 | } 113 | 114 | 115 | size_t mat_size = 8 * interlines + 9; 116 | float* matrix_a; 117 | float* matrix_b; 118 | init_matrix(mat_size, &matrix_a, &matrix_b); 119 | 120 | BinoptHandle boh; 121 | StencilFunction stencil_fn; 122 | 123 | struct timespec time_start = time_get(); 124 | 125 | if (!use_binopt) { 126 | printf("Using compiler: " __VERSION__ "\n"); 127 | stencil_fn = stencil_special; 128 | } else { 129 | printf("Using rewriter: %s\n", binopt_driver()); 130 | 131 | boh = binopt_init(); 132 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) stencil_generic); 133 | binopt_cfg_type(bcfg, 4, BINOPT_TY_VOID, BINOPT_TY_PTR_NOALIAS, BINOPT_TY_PTR_NOALIAS, 134 | BINOPT_TY_PTR_NOALIAS, BINOPT_TY_UINT64); 135 | binopt_cfg_set(bcfg, BINOPT_F_FASTMATH, true); 136 | binopt_cfg_set_parami(bcfg, 3, mat_size); 137 | binopt_cfg_set_paramp(bcfg, 0, &stencil, sizeof(stencil), BINOPT_MEM_CONST); 138 | binopt_cfg_mem(bcfg, stencil.p, sizeof(StencilPoint)*stencil.points, BINOPT_MEM_CONST); 139 | 140 | *((BinoptFunc*) &stencil_fn) = binopt_spec_create(bcfg); 141 | } 142 | 143 | struct timespec time_exec = time_get(); 144 | 145 | for (size_t i = 0; i < run_count; i++) { 146 | stencil_fn(&stencil, matrix_a, matrix_b, mat_size); 147 | float* tmp = matrix_a; 148 | matrix_a = matrix_b; 149 | matrix_b = tmp; 150 | } 151 | 152 | struct timespec time_end = time_get(); 153 | 154 | print_matrix(mat_size, matrix_a); 155 | 156 | printf("Time (preparation): %8.4lf\n", time_diff_secs(&time_start, &time_exec)); 157 | printf("Time (execution): %8.4lf\n", time_diff_secs(&time_exec, &time_end)); 158 | printf("Time (total): %8.4lf\n", time_diff_secs(&time_start, &time_end)); 159 | 160 | if (use_binopt) 161 | binopt_fini(boh); 162 | 163 | return 0; 164 | } 165 | 166 | -------------------------------------------------------------------------------- /examples/div.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | static uint64_t div(uint64_t dividend, uint64_t divisor) { 8 | return dividend / divisor; 9 | } 10 | 11 | int main(int argc, char** argv) { 12 | printf("Rewriter: %s\n", binopt_driver()); 13 | 14 | BinoptHandle boh = binopt_init(); 15 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) div); 16 | binopt_cfg_type(bcfg, 2, BINOPT_TY_UINT64, BINOPT_TY_UINT64, 17 | BINOPT_TY_UINT64); 18 | binopt_cfg_set_parami(bcfg, 1, 7); 19 | 20 | uint64_t (* new_func)(uint64_t, uint64_t); 21 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 22 | 23 | uint64_t res = new_func(60, 10); 24 | printf("60//10 (//7) = %zu\n", res); 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /examples/fastexp.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | static double fast_exp(uint64_t exp, double val) { 8 | double ans = 1; 9 | while (exp) { 10 | if (exp & 1) 11 | ans *= val; 12 | val *= val; 13 | exp = exp >> 1; 14 | } 15 | return ans; 16 | } 17 | 18 | int main(int argc, char** argv) { 19 | printf("Rewriter: %s\n", binopt_driver()); 20 | 21 | BinoptHandle boh = binopt_init(); 22 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) fast_exp); 23 | binopt_cfg_type(bcfg, 2, BINOPT_TY_DOUBLE, BINOPT_TY_UINT64, 24 | BINOPT_TY_DOUBLE); 25 | binopt_cfg_set_parami(bcfg, 0, 42); 26 | 27 | double (* new_func)(uint64_t, double); 28 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 29 | 30 | double res = new_func(16, 0.99); 31 | printf("0.99**16 (**42) = %6.4f\n", res); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /examples/indcall-spec.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | 6 | static float add_one(float a) { 7 | return a + 1.0f; 8 | } 9 | 10 | static float add_two(float a) { 11 | return a + 2.0f; 12 | } 13 | 14 | static float func(float(* fn)(float), float a) { 15 | return fn(a) * a; 16 | } 17 | 18 | int main(int argc, char** argv) { 19 | printf("Rewriter: %s\n", binopt_driver()); 20 | 21 | BinoptHandle boh = binopt_init(); 22 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 23 | binopt_cfg_type(bcfg, 2, BINOPT_TY_FLOAT, BINOPT_TY_PTR, 24 | BINOPT_TY_FLOAT); 25 | binopt_cfg_set_parami(bcfg, 0, (size_t) (void*) add_one); 26 | 27 | float (* new_func)(float(*)(float), float); 28 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 29 | 30 | float res = new_func(add_two, 5); 31 | printf("5 * (5 + 2(1)) = %f\n", res); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /examples/indcall.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | 6 | static float add_one(float a) { 7 | return a + 1.0f; 8 | } 9 | 10 | static float add_two(float a) { 11 | return a + 2.0f; 12 | } 13 | 14 | static float func(float a, float(* fn)(float)) { 15 | return fn(a) * a; 16 | } 17 | 18 | int main(int argc, char** argv) { 19 | printf("Rewriter: %s\n", binopt_driver()); 20 | 21 | BinoptHandle boh = binopt_init(); 22 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 23 | binopt_cfg_type(bcfg, 2, BINOPT_TY_FLOAT, BINOPT_TY_FLOAT, 24 | BINOPT_TY_PTR); 25 | 26 | float (* new_func)(float, float(*)(float)); 27 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 28 | 29 | float res = new_func(5, add_two); 30 | printf("5 * (5 + 2) = %f\n", res); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /examples/meson.build: -------------------------------------------------------------------------------- 1 | executable('simple', 'simple.c', dependencies: [binopt]) 2 | executable('fastexp', 'fastexp.c', dependencies: [binopt]) 3 | executable('div', 'div.c', dependencies: [binopt]) 4 | executable('polynom', 'polynom.c', dependencies: [binopt]) 5 | executable('polynom2', 'polynom2.c', dependencies: [binopt]) 6 | executable('indcall', 'indcall.c', dependencies: [binopt]) 7 | executable('indcall-spec', 'indcall-spec.c', dependencies: [binopt]) 8 | -------------------------------------------------------------------------------- /examples/polynom.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | static int64_t func(const int64_t* poly, int64_t x) { 8 | int64_t res = 0; 9 | int64_t x_n = 1; 10 | for (int64_t i = 0; i < *poly; i++) { 11 | res += x_n * poly[i+1]; 12 | x_n *= x; 13 | } 14 | return res; 15 | } 16 | 17 | int main(int argc, char** argv) { 18 | printf("Rewriter: %s\n", binopt_driver()); 19 | 20 | BinoptHandle boh = binopt_init(); 21 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 22 | binopt_cfg_type(bcfg, 2, BINOPT_TY_INT64, BINOPT_TY_PTR, BINOPT_TY_INT64); 23 | int64_t poly[] = {3, 2, 1, 2}; 24 | binopt_cfg_set_paramp(bcfg, 0, poly, sizeof(poly), BINOPT_MEM_CONST); 25 | 26 | int64_t (* new_func)(const int64_t*, int64_t); 27 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 28 | 29 | poly[0] = 1; 30 | int64_t res = new_func(poly, 4); 31 | printf("2(2+1*4^1+2*4^2) = %ld\n", res); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /examples/polynom2.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | static double func(size_t len, const double poly[len], double x) { 9 | double res = 0; 10 | double x_n = 1; 11 | for (size_t i = 0; i < len; i++) { 12 | res += x_n * poly[i]; 13 | x_n *= x; 14 | } 15 | return res; 16 | } 17 | 18 | int main(int argc, char** argv) { 19 | printf("Rewriter: %s\n", binopt_driver()); 20 | 21 | BinoptHandle boh = binopt_init(); 22 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 23 | binopt_cfg_type(bcfg, 3, BINOPT_TY_DOUBLE, BINOPT_TY_UINT64, BINOPT_TY_PTR, 24 | BINOPT_TY_DOUBLE); 25 | double poly[] = {.5, 1, 2, 0, 0, 0, 3}; 26 | binopt_cfg_set(bcfg, BINOPT_F_FASTMATH, true); 27 | binopt_cfg_set_parami(bcfg, 0, sizeof poly / sizeof poly[0]); 28 | binopt_cfg_set_paramp(bcfg, 1, poly, sizeof(poly), BINOPT_MEM_CONST); 29 | 30 | double (* new_func)(size_t, const double*, double); 31 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 32 | 33 | double res = new_func(1, poly, 4); 34 | printf(".5(.5+1*4^1+2*4^2) = %f\n", res); 35 | 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /examples/simple.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | 6 | static int func(int a, int b) { 7 | return a * b; 8 | } 9 | 10 | int main(int argc, char** argv) { 11 | printf("Rewriter: %s\n", binopt_driver()); 12 | 13 | BinoptHandle boh = binopt_init(); 14 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 15 | binopt_cfg_type(bcfg, 2, BINOPT_TY_INT32, BINOPT_TY_INT32, BINOPT_TY_INT32); 16 | binopt_cfg_set_parami(bcfg, 1, 42); 17 | 18 | int (* new_func)(int, int); 19 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 20 | 21 | int res = new_func(8, 16); 22 | printf("8 * 16(42) = %d\n", res); 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /include/binopt-config.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef BINOPT_CONFIG_H 3 | #define BINOPT_CONFIG_H 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | /* Default implementation of configuration API. This is NOT to be used by 15 | * applications but only by rewriting tools which do not implement their own 16 | * configuration function. */ 17 | 18 | struct BinoptCfg { 19 | BinoptHandle handle; 20 | BinoptFunc func; 21 | BinoptType ret_ty; 22 | 23 | // Allow unsafe floating-point optimizations. 0 = none, 1 = all. 24 | // Upper bits may define individual optimizations in future. 25 | uint8_t fast_math; 26 | // Log level verbosity. 0 = none/quiet 27 | uint8_t log_level; 28 | 29 | size_t param_count; 30 | size_t param_alloc; 31 | struct BinoptCfgParam { 32 | BinoptType ty; 33 | void* const_val; 34 | }* params; 35 | 36 | size_t memrange_count; 37 | size_t memrange_alloc; 38 | struct BinoptCfgMemrange { 39 | void* base; 40 | size_t size; 41 | BinoptMemFlags flags; 42 | }* memranges; 43 | 44 | size_t implflag_count; 45 | size_t implflag_alloc; 46 | struct BinoptCfgFlag { 47 | BinoptOptFlags flag; 48 | size_t val; 49 | }* implflags; 50 | }; 51 | 52 | #ifdef __cplusplus 53 | } 54 | #endif 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /include/binopt.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef BINOPT_H 3 | #define BINOPT_H 4 | 5 | #include 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #define BINOPT_API(name) binopt_ ## name 12 | 13 | typedef void* BinoptFunc; 14 | 15 | typedef struct BinoptOpaqueHandle* BinoptHandle; 16 | typedef struct BinoptCfg* BinoptCfgRef; 17 | 18 | typedef enum { 19 | /// The memory region flags depend on the page mapping -- read-only pages 20 | /// are assumed to be constant, while unmapped and writable regions are 21 | /// treated as dynamic. 22 | BINOPT_MEM_DEFAULT = 0, 23 | /// The memory region is treated as constant. Behavior if the area is 24 | /// modified between configuration and the last call of the rewritten code 25 | /// is undefined. 26 | BINOPT_MEM_CONST, 27 | /// The memory region and all regions deduced from pointers (recursively) 28 | /// loaded from that are assumed to be constant. Some rewriters do not 29 | /// support detecting nested pointers and treat such regions as regular 30 | /// constant memory. 31 | BINOPT_MEM_NESTED_CONST, 32 | /// The memory region is treated as dynamic. 33 | BINOPT_MEM_DYNAMIC, 34 | } BinoptMemFlags; 35 | 36 | typedef enum { 37 | BINOPT_TY_VOID = 0, 38 | BINOPT_TY_INT8, 39 | BINOPT_TY_INT16, 40 | BINOPT_TY_INT32, 41 | BINOPT_TY_INT64, 42 | BINOPT_TY_UINT8, 43 | BINOPT_TY_UINT16, 44 | BINOPT_TY_UINT32, 45 | BINOPT_TY_UINT64, 46 | BINOPT_TY_FLOAT, 47 | BINOPT_TY_DOUBLE, 48 | BINOPT_TY_PTR, 49 | BINOPT_TY_PTR_NOALIAS, 50 | } BinoptType; 51 | 52 | typedef enum { 53 | /// Undefined flag value. Do not use. 54 | BINOPT_F_UNDEF = 0, 55 | /// Maximum stack size of optimized code. 56 | BINOPT_F_STACKSZ, 57 | /// Fast-math optimizations flags. 58 | BINOPT_F_FASTMATH, 59 | /// Log level verbosity. 0 = none/quiet (default). 60 | BINOPT_F_LOGLEVEL, 61 | } BinoptOptFlags; 62 | 63 | const char* BINOPT_API(driver)(void); 64 | BinoptHandle BINOPT_API(init)(void); 65 | void BINOPT_API(fini)(BinoptHandle handle); 66 | 67 | /// Create a new configuration for a given function. Implementations may deduce 68 | /// type information from DWARF or CTF information encoded in the binary, or 69 | /// other sources. If such information is not available, the type must be 70 | /// configured using #binopt_cfg_type. 71 | BinoptCfgRef BINOPT_API(cfg_new)(BinoptHandle handle, 72 | BinoptFunc base_func); 73 | /// Clone a configuration. This creates a deep clone of the previous 74 | /// configuration and inherits all properties. The new configuration is entirely 75 | /// independent of the old configuration. Implementations may use copy-on-write 76 | /// semantics internally. 77 | BinoptCfgRef BINOPT_API(cfg_clone)(BinoptCfgRef base_cfg); 78 | /// Set function signature with a specified number of parameters. Functions with 79 | /// a variable number of arguments are not supported. 80 | void BINOPT_API(cfg_type)(BinoptCfgRef cfg, unsigned count, BinoptType ret, ...); 81 | /// Set a configuration flag. Additional flags may be implementation-defined. 82 | void BINOPT_API(cfg_set)(BinoptCfgRef cfg, BinoptOptFlags flag, size_t val); 83 | /// Set a parameter to a constant value. Note that this API takes a non-captured 84 | /// pointer to the constant value. The size of the dereferenced memory is 85 | /// inferred from the argument type. The value is copied into an internal 86 | /// configuration storage. Behavior for an out-of-range index is undefined. 87 | void BINOPT_API(cfg_set_param)(BinoptCfgRef cfg, unsigned idx, const void* val); 88 | /// Explicitly configure a memory region. Constant memory regions are not 89 | /// copied and must not be modified afterwards. 90 | void BINOPT_API(cfg_mem)(BinoptCfgRef cfg, void* base, size_t size, 91 | BinoptMemFlags flags); 92 | /// Free a configuration. 93 | void BINOPT_API(cfg_free)(BinoptCfgRef cfg); 94 | 95 | /// Create a specialized implementation for a configuration. The ABI of the 96 | /// returned function is identical to the original function. Implementations are 97 | /// not required to actually return a specialized implementation, they may also 98 | /// return a pointer to the original function. Behavior of the function for 99 | /// incorrect configuration (including subsequent modifications of constant 100 | /// memory) is undefined. 101 | BinoptFunc BINOPT_API(spec_create)(BinoptCfgRef cfg); 102 | /// Delete a specialized implementation for a configuration. 103 | void BINOPT_API(spec_delete)(BinoptHandle handle, BinoptFunc spec_func); 104 | 105 | 106 | // Convenience functions 107 | 108 | /// Configure an integer parameter with a given value. 109 | void BINOPT_API(cfg_set_parami)(BinoptCfgRef cfg, unsigned idx, size_t val); 110 | /// Configure a pointer parameter with a given memory range. 111 | void BINOPT_API(cfg_set_paramp)(BinoptCfgRef cfg, unsigned idx, const void* ptr, 112 | size_t size, BinoptMemFlags flags); 113 | 114 | #undef BINOPT_API_NAME 115 | #undef BINOPT_API 116 | 117 | #ifdef __cplusplus 118 | } 119 | #endif 120 | 121 | #endif 122 | -------------------------------------------------------------------------------- /include/meson.build: -------------------------------------------------------------------------------- 1 | install_headers([ 2 | 'binopt.h', 3 | 'binopt-config.h', 4 | ]) 5 | -------------------------------------------------------------------------------- /meson.build: -------------------------------------------------------------------------------- 1 | project('binopt', ['c', 'cpp'], 2 | default_options: ['buildtype=debugoptimized', 3 | 'warning_level=3', 4 | 'c_std=gnu99', 5 | 'cpp_std=c++17']) 6 | 7 | if get_option('warning_level').to_int() >= 3 8 | add_project_arguments(['-Wmissing-field-initializers', 9 | '-Wold-style-definition', 10 | '-Wmissing-declarations', 11 | '-Wmissing-prototypes', 12 | '-Wredundant-decls', 13 | '-Wmissing-noreturn', 14 | '-Wshadow', 15 | '-Wpointer-arith', 16 | '-Wwrite-strings', 17 | '-Winline', 18 | '-Wformat-nonliteral', 19 | '-Wformat-security', 20 | '-Wswitch-default', 21 | '-Winit-self', 22 | '-Wnested-externs', 23 | '-Wstrict-prototypes', 24 | '-Wmissing-include-dirs', 25 | '-Wundef', 26 | '-Wmissing-format-attribute' 27 | ], language: 'c') 28 | endif 29 | 30 | add_project_arguments(['-Wno-unused-parameter'], language: 'c') 31 | add_project_arguments(['-Wno-unused-parameter'], language: 'cpp') 32 | 33 | binopt_inc = include_directories('include') 34 | subdir('include') 35 | subdir('src') 36 | binopt = declare_dependency(include_directories: binopt_inc, 37 | link_with: binopt_lib) 38 | 39 | subdir('rewriter') 40 | 41 | subdir('benchmarks') 42 | subdir('examples') 43 | subdir('tests') 44 | 45 | pkg = import('pkgconfig') 46 | pkg.generate(binopt_lib, 47 | version: '0.1', 48 | name: 'binopt', 49 | filebase: 'binopt', 50 | description: 'Binary optimization library') 51 | -------------------------------------------------------------------------------- /meson_options.txt: -------------------------------------------------------------------------------- 1 | option('build_dbrew', type: 'boolean') 2 | option('build_dbll', type: 'boolean') 3 | option('build_drob', type: 'boolean') 4 | -------------------------------------------------------------------------------- /rewriter/dbll/ConstMemProp.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "ConstMemProp.h" 3 | 4 | #include "Logging.h" 5 | 6 | #include "binopt-config.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | namespace dbll { 19 | 20 | class ConstMemProp { 21 | llvm::ArrayRef memranges; 22 | public: 23 | ConstMemProp(llvm::ArrayRef mr) : memranges(mr) {} 24 | 25 | private: 26 | // Returns 0 when the constant could not be folded. 27 | llvm::APInt ConstantValue(const llvm::Constant* c) { 28 | // A constant is either undefined... 29 | if (llvm::isa(c)) 30 | return llvm::APInt(64, 0); 31 | 32 | // ... or a constant expression ... 33 | if (const llvm::ConstantExpr* ce = llvm::dyn_cast(c)) { 34 | switch (ce->getOpcode()) { 35 | case llvm::Instruction::IntToPtr: { 36 | llvm::APInt ptr_val = ConstantValue(ce->getOperand(0)); 37 | return ptr_val.zextOrTrunc(64); 38 | } 39 | default: 40 | // Can't handle that expression 41 | return llvm::APInt(64, 0); 42 | } 43 | } 44 | 45 | // ... or simple. 46 | switch (c->getType()->getTypeID()) { 47 | case llvm::Type::IntegerTyID: 48 | return llvm::cast(c)->getValue(); 49 | default: 50 | // Can't handle that constant type 51 | return llvm::APInt(64, 0); 52 | } 53 | } 54 | 55 | std::pair GetConstantMem(uintptr_t addr, size_t size) { 56 | size_t size_bytes = (size + 7) / 8; 57 | for (const auto& [r_start, r_len] : memranges) { 58 | if (addr < r_start || addr + size_bytes > r_start + r_len) 59 | continue; 60 | 61 | auto const_ptr = reinterpret_cast(addr); 62 | llvm::APInt const_val(size, llvm::ArrayRef(const_ptr, size/64)); 63 | return std::make_pair(true, const_val); 64 | } 65 | return std::make_pair(false, llvm::APInt(size, 0)); 66 | } 67 | 68 | llvm::Constant* ConstantFoldLoad(llvm::LoadInst* load) { 69 | auto addr = llvm::dyn_cast(load->getPointerOperand()); 70 | if (!addr) 71 | return nullptr; 72 | 73 | uint64_t addr_val = ConstantValue(addr).trunc(64).getLimitedValue(); 74 | 75 | if (!addr_val) 76 | return nullptr; 77 | 78 | llvm::Type* target_ty = addr->getType()->getPointerElementType(); 79 | size_t target_bits = target_ty->getPrimitiveSizeInBits(); 80 | if (target_ty->isPointerTy()) 81 | target_bits = 64; 82 | 83 | auto const_mem = GetConstantMem(addr_val, target_bits); 84 | if (!const_mem.first) 85 | return nullptr; 86 | 87 | llvm::LLVMContext& ctx = target_ty->getContext(); 88 | llvm::Constant* const_int = llvm::ConstantInt::get(ctx, const_mem.second); 89 | llvm::Constant* const_val; 90 | if (target_ty->isPointerTy()) 91 | const_val = llvm::ConstantExpr::getIntToPtr(const_int, target_ty); 92 | else 93 | const_val = llvm::ConstantExpr::getBitCast(const_int, target_ty); 94 | 95 | 96 | return const_val; 97 | } 98 | 99 | public: 100 | bool run(llvm::Function& fn) { 101 | const llvm::DataLayout& dl = fn.getParent()->getDataLayout(); 102 | 103 | // strategy similar to llvm::ConstantPropagation::runOnFunction. 104 | llvm::SmallPtrSet queue; 105 | llvm::SmallVector queue_vec; 106 | for (llvm::Instruction& inst : llvm::instructions(fn)) { 107 | llvm::LoadInst* load = llvm::dyn_cast(&inst); 108 | if (load && llvm::isa(load->getPointerOperand())) { 109 | queue.insert(&inst); 110 | queue_vec.push_back(&inst); 111 | } 112 | } 113 | 114 | bool changed = false; 115 | while (!queue.empty()) { 116 | llvm::SmallVector new_queue_vec; 117 | for (llvm::Instruction* inst : queue_vec) { 118 | queue.erase(inst); 119 | llvm::Constant* const_repl = nullptr; 120 | if (auto load_inst = llvm::dyn_cast(inst)) 121 | const_repl = ConstantFoldLoad(load_inst); 122 | if (!const_repl) 123 | const_repl = llvm::ConstantFoldInstruction(inst, dl, nullptr); 124 | if (!const_repl) 125 | continue; 126 | 127 | for (llvm::User* user : inst->users()) { 128 | // If user not in the set, then add it to the vector. 129 | if (queue.insert(llvm::cast(user)).second) 130 | new_queue_vec.push_back(llvm::cast(user)); 131 | } 132 | 133 | inst->replaceAllUsesWith(const_repl); 134 | 135 | if (llvm::isInstructionTriviallyDead(inst, nullptr)) 136 | inst->eraseFromParent(); 137 | 138 | changed = true; 139 | } 140 | 141 | queue_vec = std::move(new_queue_vec); 142 | } 143 | 144 | return changed; 145 | } 146 | }; 147 | 148 | llvm::PreservedAnalyses ConstMemPropPass::run(llvm::Function& fn, 149 | llvm::FunctionAnalysisManager& fam) { 150 | ConstMemProp cmp(memranges); 151 | 152 | if (!cmp.run(fn)) 153 | return llvm::PreservedAnalyses::all(); 154 | 155 | llvm::PreservedAnalyses pa; 156 | pa.preserveSet(); 157 | return pa; 158 | } 159 | 160 | } // namespace dbll 161 | -------------------------------------------------------------------------------- /rewriter/dbll/ConstMemProp.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DBLL_CONST_MEM_PROP 3 | #define DBLL_CONST_MEM_PROP 4 | 5 | #include "binopt-config.h" 6 | 7 | #include 8 | #include 9 | 10 | namespace dbll { 11 | 12 | class ConstMemPropPass : public llvm::PassInfoMixin { 13 | public: 14 | using MemRange = std::pair; 15 | 16 | private: 17 | llvm::ArrayRef memranges; 18 | 19 | public: 20 | ConstMemPropPass(llvm::ArrayRef mr) : memranges(mr) {} 21 | 22 | llvm::PreservedAnalyses run(llvm::Function& fn, llvm::FunctionAnalysisManager& fam); 23 | }; 24 | 25 | } // namespace dbll 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /rewriter/dbll/Logging.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DBLL_LOGGING 3 | #define DBLL_LOGGING 4 | 5 | namespace dbll { 6 | 7 | namespace LogLevel { 8 | enum { 9 | QUIET = 0, 10 | WARNING, 11 | INFO, 12 | DEBUG, 13 | DEBUG2, 14 | DEBUG3, 15 | }; 16 | } // namespace LogLevel 17 | 18 | } // namespace dbll 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /rewriter/dbll/LowerNativeCall.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "LowerNativeCall.h" 3 | 4 | #include "Logging.h" 5 | 6 | #include "binopt-config.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | namespace dbll { 16 | 17 | static const char* NATIVE_CALL_NAME = "dbll.native_call"; 18 | 19 | static void LowerNativeTail(llvm::CallInst* call, llvm::GlobalValue* glob) { 20 | // Create alloca in entry block to avoid RBP-relative addressing due to a 21 | // variable-size stack frame. RBP is set in the inline-assembly, but LLVM 22 | // apperently doesn't recognize this and generates the following: 23 | // 24 | // mov rbp,rcx 25 | // ... 26 | // => mov rdi,QWORD PTR [rbp-0x40] 27 | // mov rcx,rdx 28 | // mov r9,rbx 29 | // ... 30 | // iretq 31 | // 32 | // TODO: maybe remove RBP from inline-assembly constraints 33 | llvm::Function* fn = call->getParent()->getParent(); 34 | llvm::IRBuilder<> irb(fn->getEntryBlock().getFirstNonPHI()); 35 | // TODO: just one alloca per function 36 | llvm::Value* asm_buf = irb.CreateAlloca(irb.getInt64Ty(), irb.getInt64(8)); 37 | 38 | irb.SetInsertPoint(call); 39 | 40 | llvm::Type* i64p = irb.getInt64Ty()->getPointerTo(); 41 | llvm::Value* sptr = irb.CreateBitCast(call->getArgOperand(0), i64p); 42 | llvm::Value* stored_rip_ptr = call->getArgOperand(1); 43 | 44 | llvm::Value* sptr_rsp = irb.CreateConstGEP1_64(sptr, 5); 45 | 46 | // Buffer area passed to inline asm. 47 | // Contains: userrip, cs, userrflags, userrsp, ss 48 | irb.CreateStore(irb.CreateLoad(sptr), irb.CreateConstGEP1_64(asm_buf, 0)); 49 | irb.CreateStore(irb.getInt64(0x33), irb.CreateConstGEP1_64(asm_buf, 1)); 50 | // TODO: actually compute rflags? 51 | irb.CreateStore(irb.getInt64(0x202), irb.CreateConstGEP1_64(asm_buf, 2)); 52 | irb.CreateStore(irb.CreateLoad(sptr_rsp), irb.CreateConstGEP1_64(asm_buf, 3)); 53 | irb.CreateStore(irb.getInt64(0x2b), irb.CreateConstGEP1_64(asm_buf, 4)); 54 | 55 | llvm::SmallVector sptr_geps; 56 | llvm::SmallVector asm_args; 57 | 58 | for (unsigned idx = 0; idx < 16; idx++) { 59 | if (idx == 4) 60 | continue; // Skip RSP 61 | llvm::Value* ptr = irb.CreateConstGEP1_64(sptr, 1 + idx); 62 | sptr_geps.push_back(ptr); 63 | asm_args.push_back(irb.CreateLoad(irb.getInt64Ty(), ptr)); 64 | } 65 | llvm::Type* sse_ty = llvm::VectorType::get(irb.getInt64Ty(), 2); 66 | llvm::Value* sptr128 = irb.CreateBitCast(sptr, sse_ty->getPointerTo()); 67 | for (uint8_t idx = 0; idx < 16; idx++) { 68 | llvm::Value* ptr = irb.CreateConstGEP1_64(sse_ty, sptr128, 10 + idx, "xmmgep"); 69 | sptr_geps.push_back(ptr); 70 | asm_args.push_back(irb.CreateLoad(sse_ty, ptr)); 71 | } 72 | 73 | // First construct ty_list for the return type 74 | llvm::SmallVector ty_list; 75 | for (llvm::Value* arg : asm_args) 76 | ty_list.push_back(arg->getType()); 77 | 78 | llvm::Type* asm_ret_ty = llvm::StructType::get(call->getContext(), ty_list); 79 | 80 | // Store RAX, RCX and RDX in asm_buf 81 | irb.CreateStore(asm_args[0], irb.CreateConstGEP1_64(asm_buf, 5)); 82 | irb.CreateStore(asm_args[1], irb.CreateConstGEP1_64(asm_buf, 6)); 83 | irb.CreateStore(asm_args[2], irb.CreateConstGEP1_64(asm_buf, 7)); 84 | // RAX = asm_buf, RCX = stored_rip_ptr, RDX = glob 85 | asm_args[0] = asm_buf; 86 | asm_args[1] = stored_rip_ptr; 87 | asm_args[2] = glob; 88 | ty_list[0] = i64p; 89 | ty_list[1] = i64p; 90 | ty_list[2] = i64p; 91 | 92 | asm_args.push_back(glob); 93 | ty_list.push_back(i64p); 94 | 95 | auto asm_ty = llvm::FunctionType::get(asm_ret_ty, ty_list, false); 96 | const auto constraints = 97 | "={ax},={cx},={dx},={bx},={bp},={si},={di},={r8},={r9},={r10},={r11}," 98 | "={r12},={r13},={r14},={r15},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4}," 99 | "={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12}," 100 | "={xmm13},={xmm14},={xmm15},0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16," 101 | "17,18,19,20,21,22,23,24,25,26,27,28,29,30,i"; 102 | const auto asm_code = 103 | "mov %rsp, (%rdx);" // store rsp in glob 104 | "lea 1f(%rip), %rdx;" 105 | "mov %rax, %rsp;" 106 | "mov 0x28(%rsp), %rax;" // setup user rax 107 | "mov %rdx, (%rcx);" // store return address in stored_rip_ptr 108 | "mov 0x30(%rsp), %rcx;" // setup user rcx 109 | "mov 0x38(%rsp), %rdx;" // setup user rdx 110 | "iretq;" // isn't this cool? -- no, it isn't. 111 | "1:" // TODO: perhaps check rsp somehow? 112 | "movabs $62, %rsp;" // throw away user rsp. 113 | "mov (%rsp), %rsp;"; 114 | 115 | auto asm_inst = llvm::InlineAsm::get(asm_ty, asm_code, constraints, 116 | /*hasSideEffects=*/true, 117 | /*alignStack=*/true, 118 | llvm::InlineAsm::AD_ATT); 119 | llvm::Value* asm_res = irb.CreateCall(asm_inst, asm_args); 120 | 121 | // RIP and RSP are set outside of this function to allow for better 122 | // optimizations for calls/jumps with known targets. 123 | for (unsigned i = 0; i < sptr_geps.size(); i++) 124 | irb.CreateStore(irb.CreateExtractValue(asm_res, i), sptr_geps[i]); 125 | } 126 | 127 | static void LowerNativeCall(llvm::CallInst* call, llvm::GlobalValue* glob) { 128 | // See above for a rationale for having the alloca in the entry block. 129 | // TODO: maybe remove RBP from inline-assembly constraints 130 | llvm::Function* fn = call->getParent()->getParent(); 131 | llvm::IRBuilder<> irb(fn->getEntryBlock().getFirstNonPHI()); 132 | // TODO: just one alloca per function 133 | llvm::Value* asm_buf = irb.CreateAlloca(irb.getInt64Ty(), irb.getInt64(3)); 134 | 135 | irb.SetInsertPoint(call); 136 | 137 | llvm::Type* i64p = irb.getInt64Ty()->getPointerTo(); 138 | llvm::Value* sptr = irb.CreateBitCast(call->getArgOperand(0), i64p); 139 | 140 | llvm::Value* sptr_rsp = irb.CreateConstGEP1_64(sptr, 5); 141 | llvm::Value* userrsp = irb.CreateLoad(irb.getInt64Ty(), sptr_rsp); 142 | userrsp = irb.CreateAdd(userrsp, irb.getInt64(8)); // we use call ourselves 143 | llvm::Value* userrsp_ptr = irb.CreateIntToPtr(userrsp, irb.getInt64Ty()->getPointerTo()); 144 | 145 | // Store user RIP at right address 146 | irb.CreateStore(irb.CreateLoad(sptr), irb.CreateConstGEP1_64(userrsp_ptr, -1)); 147 | 148 | // Buffer area passed to inline asm. 149 | // Contains: userrsp, userrdx, userrbx 150 | irb.CreateStore(userrsp, irb.CreateConstGEP1_64(asm_buf, 0)); 151 | 152 | llvm::SmallVector sptr_geps; 153 | llvm::SmallVector asm_args; 154 | 155 | for (unsigned idx = 0; idx < 16; idx++) { 156 | if (idx == 4) 157 | continue; // Skip RSP 158 | llvm::Value* ptr = irb.CreateConstGEP1_64(sptr, 1 + idx); 159 | sptr_geps.push_back(ptr); 160 | asm_args.push_back(irb.CreateLoad(irb.getInt64Ty(), ptr)); 161 | } 162 | llvm::Type* sse_ty = llvm::VectorType::get(irb.getInt64Ty(), 2); 163 | llvm::Value* sptr128 = irb.CreateBitCast(sptr, sse_ty->getPointerTo()); 164 | for (uint8_t idx = 0; idx < 16; idx++) { 165 | llvm::Value* ptr = irb.CreateConstGEP1_64(sse_ty, sptr128, 10 + idx, "xmmgep"); 166 | sptr_geps.push_back(ptr); 167 | asm_args.push_back(irb.CreateLoad(sse_ty, ptr)); 168 | } 169 | 170 | // First construct ty_list for the return type 171 | llvm::SmallVector ty_list; 172 | for (llvm::Value* arg : asm_args) 173 | ty_list.push_back(arg->getType()); 174 | 175 | llvm::Type* asm_ret_ty = llvm::StructType::get(irb.getContext(), ty_list); 176 | 177 | // Store RBX and RDX in asm_buf 178 | irb.CreateStore(asm_args[3], irb.CreateConstGEP1_64(asm_buf, 2)); 179 | irb.CreateStore(asm_args[2], irb.CreateConstGEP1_64(asm_buf, 1)); 180 | // RBX = asm_buf, RDX = glob 181 | asm_args[3] = asm_buf; 182 | asm_args[2] = glob; 183 | ty_list[3] = i64p; 184 | ty_list[2] = i64p; 185 | 186 | asm_args.push_back(glob); 187 | ty_list.push_back(i64p); 188 | 189 | auto asm_ty = llvm::FunctionType::get(asm_ret_ty, ty_list, false); 190 | const auto constraints = 191 | "={ax},={cx},={dx},={bx},={bp},={si},={di},={r8},={r9},={r10},={r11}," 192 | "={r12},={r13},={r14},={r15},={xmm0},={xmm1},={xmm2},={xmm3},={xmm4}," 193 | "={xmm5},={xmm6},={xmm7},={xmm8},={xmm9},={xmm10},={xmm11},={xmm12}," 194 | "={xmm13},={xmm14},={xmm15},0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16," 195 | "17,18,19,20,21,22,23,24,25,26,27,28,29,30,i"; 196 | const auto asm_code = 197 | "mov %rsp, (%rdx);" // store rsp in glob 198 | "mov 0x0(%rbx), %rsp;" // setup user rsp 199 | "mov 0x8(%rbx), %rdx;" // setup user rdx 200 | "mov 0x10(%rbx), %rbx;" // setup user rbx 201 | "callq *-0x8(%rsp);" 202 | "movabs $62, %rsp;" 203 | "mov (%rsp), %rsp;"; 204 | 205 | auto asm_inst = llvm::InlineAsm::get(asm_ty, asm_code, constraints, 206 | /*hasSideEffects=*/true, 207 | /*alignStack=*/true, 208 | llvm::InlineAsm::AD_ATT); 209 | llvm::Value* asm_res = irb.CreateCall(asm_inst, asm_args); 210 | 211 | // RIP and RSP are set outside of this function to allow for better 212 | // optimizations for calls/jumps with known targets. 213 | for (unsigned i = 0; i < sptr_geps.size(); i++) 214 | irb.CreateStore(irb.CreateExtractValue(asm_res, i), sptr_geps[i]); 215 | } 216 | 217 | llvm::PreservedAnalyses LowerNativeCallPass::run(llvm::Module& mod, 218 | llvm::ModuleAnalysisManager& mam) { 219 | llvm::Function* native_call_fn = mod.getFunction(NATIVE_CALL_NAME); 220 | if (!native_call_fn) 221 | return llvm::PreservedAnalyses::all(); 222 | 223 | llvm::SmallVector call_insts; 224 | for (const llvm::Use& use : native_call_fn->uses()) { 225 | llvm::CallInst* ci = llvm::dyn_cast(use.getUser()); 226 | if (!ci || ci->getCalledFunction() != native_call_fn) 227 | continue; 228 | call_insts.push_back(ci); 229 | } 230 | 231 | if (call_insts.empty()) 232 | return llvm::PreservedAnalyses::all(); 233 | 234 | // Lowering native calls may require some accessible storage space. Since 235 | // the MCJIT doesn't support thread-local storage, this is not thread-safe. 236 | llvm::Type* i64 = llvm::Type::getInt64Ty(mod.getContext()); 237 | auto* glob = new llvm::GlobalVariable(mod, i64, false, 238 | llvm::GlobalValue::PrivateLinkage, 239 | llvm::Constant::getNullValue(i64), 240 | "dbll.native_call.glob"); 241 | 242 | for (llvm::CallInst* call : call_insts) { 243 | auto is_call = llvm::dyn_cast(call->getArgOperand(3)); 244 | if (is_call && !is_call->isZeroValue()) 245 | LowerNativeCall(call, glob); 246 | else 247 | LowerNativeTail(call, glob); 248 | 249 | call->eraseFromParent(); 250 | } 251 | 252 | llvm::PreservedAnalyses pa; 253 | return pa; 254 | } 255 | 256 | llvm::Function* LowerNativeCallPass::CreateNativeCallFn(llvm::Module& mod) { 257 | llvm::LLVMContext& ctx = mod.getContext(); 258 | 259 | llvm::Type* i1 = llvm::Type::getInt1Ty(ctx); 260 | llvm::Type* i64 = llvm::Type::getInt64Ty(ctx); 261 | llvm::Type* i8p = llvm::Type::getInt8PtrTy(ctx); 262 | llvm::Type* i64p = i64->getPointerTo(); 263 | llvm::Type* void_ty = llvm::Type::getVoidTy(ctx); 264 | // This is void(i8* sptr, i64* retaddr_ptr, i64 rip, i1 is_call) 265 | auto fn_ty = llvm::FunctionType::get(void_ty, {i8p, i64p, i64, i1}, false); 266 | auto linkage = llvm::GlobalValue::ExternalLinkage; 267 | return llvm::Function::Create(fn_ty, linkage, NATIVE_CALL_NAME, &mod); 268 | } 269 | 270 | } // namespace dbll 271 | -------------------------------------------------------------------------------- /rewriter/dbll/LowerNativeCall.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DBLL_LOWER_NATIVE_CALL 3 | #define DBLL_LOWER_NATIVE_CALL 4 | 5 | #include "binopt-config.h" 6 | 7 | #include 8 | #include 9 | 10 | namespace dbll { 11 | 12 | class LowerNativeCallPass : public llvm::PassInfoMixin { 13 | public: 14 | LowerNativeCallPass() {} 15 | 16 | llvm::PreservedAnalyses run(llvm::Module& mod, llvm::ModuleAnalysisManager& mam); 17 | 18 | static llvm::Function* CreateNativeCallFn(llvm::Module& mod); 19 | }; 20 | 21 | } // namespace dbll 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /rewriter/dbll/PtrToIntFold.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "PtrToIntFold.h" 3 | 4 | #include "Logging.h" 5 | 6 | #include "binopt-config.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | namespace dbll { 18 | 19 | class PtrToIntFold { 20 | llvm::Function& fn; 21 | public: 22 | PtrToIntFold(llvm::Function& fn) : fn(fn) {} 23 | 24 | private: 25 | bool PropagatePTI(llvm::PtrToIntInst* pti, llvm::SmallVectorImpl* queue) { 26 | bool changed = false; 27 | llvm::SmallVector dead_queue; 28 | for (llvm::User* user : pti->users()) { 29 | if (auto* ipt = llvm::dyn_cast(user)) { 30 | if (ipt->getAddressSpace() != pti->getPointerAddressSpace()) 31 | continue; 32 | 33 | llvm::Value* new_val = pti->getPointerOperand(); 34 | if (ipt->getDestTy() != new_val->getType()) { 35 | llvm::IRBuilder<> irb(ipt); 36 | new_val = irb.CreateBitCast(new_val, ipt->getDestTy()); 37 | } 38 | ipt->replaceAllUsesWith(new_val); 39 | dead_queue.push_back(ipt); 40 | 41 | changed = true; 42 | } else if (auto* binop = llvm::dyn_cast(user)) { 43 | if (binop->getOpcode() != llvm::Instruction::Add) 44 | continue; 45 | 46 | llvm::IRBuilder<> irb(binop); 47 | llvm::Value* ptr = pti->getPointerOperand(); 48 | ptr = irb.CreateBitCast(ptr, irb.getInt8PtrTy()); 49 | 50 | llvm::Value* other_op; 51 | if (binop->getOperand(0) == pti) 52 | other_op = binop->getOperand(1); 53 | else if (binop->getOperand(1) == pti) 54 | other_op = binop->getOperand(0); 55 | else 56 | assert(false && "binop with invalid user"); 57 | 58 | llvm::Value* gep = irb.CreateGEP(ptr, other_op, "ptigep"); 59 | llvm::Value* new_pti = irb.CreatePtrToInt(gep, binop->getType()); 60 | 61 | queue->push_back(llvm::cast(new_pti)); 62 | binop->replaceAllUsesWith(new_pti); 63 | dead_queue.push_back(binop); 64 | 65 | changed = true; 66 | } 67 | } 68 | 69 | for (llvm::Instruction* inst : dead_queue) 70 | inst->eraseFromParent(); 71 | if (llvm::isInstructionTriviallyDead(pti, nullptr)) 72 | pti->eraseFromParent(); 73 | 74 | return changed; 75 | } 76 | 77 | public: 78 | bool run() { 79 | // strategy similar to llvm::ConstantPropagation::runOnFunction. 80 | llvm::SmallVector queue; 81 | for (llvm::Instruction& inst : llvm::instructions(fn)) { 82 | if (auto* pti = llvm::dyn_cast(&inst)) { 83 | queue.push_back(pti); 84 | } 85 | } 86 | 87 | bool changed = false; 88 | while (!queue.empty()) { 89 | llvm::SmallVector new_queue; 90 | for (llvm::PtrToIntInst* pti : queue) 91 | changed |= PropagatePTI(pti, &new_queue); 92 | 93 | queue = std::move(new_queue); 94 | } 95 | 96 | return changed; 97 | } 98 | }; 99 | 100 | llvm::PreservedAnalyses PtrToIntFoldPass::run(llvm::Function& fn, 101 | llvm::FunctionAnalysisManager& fam) { 102 | PtrToIntFold ptif(fn); 103 | 104 | if (!ptif.run()) 105 | return llvm::PreservedAnalyses::all(); 106 | 107 | llvm::PreservedAnalyses pa; 108 | pa.preserveSet(); 109 | return pa; 110 | } 111 | 112 | } // namespace dbll 113 | -------------------------------------------------------------------------------- /rewriter/dbll/PtrToIntFold.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DBLL_PTR_TO_INT_FOLD 3 | #define DBLL_PTR_TO_INT_FOLD 4 | 5 | #include "binopt-config.h" 6 | 7 | #include 8 | #include 9 | 10 | namespace dbll { 11 | 12 | class PtrToIntFoldPass : public llvm::PassInfoMixin { 13 | public: 14 | PtrToIntFoldPass() {} 15 | 16 | llvm::PreservedAnalyses run(llvm::Function& fn, llvm::FunctionAnalysisManager& fam); 17 | }; 18 | 19 | } // namespace dbll 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /rewriter/dbll/dbll.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "binopt.h" 3 | #include "binopt-config.h" 4 | 5 | #include "ConstMemProp.h" 6 | #include "LowerNativeCall.h" 7 | #include "PtrToIntFold.h" 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | 49 | /* DBLL based on LLVM+Rellume, using default configuration API */ 50 | 51 | namespace LogLevel { 52 | enum { 53 | QUIET = 0, 54 | WARNING, 55 | INFO, 56 | DEBUG, 57 | }; 58 | } // namespace LogLevel 59 | 60 | const char* binopt_driver(void) { 61 | return "DBLL"; 62 | } 63 | 64 | 65 | struct DbllHandle { 66 | llvm::LLVMContext ctx; 67 | 68 | DbllHandle() : ctx() {} 69 | }; 70 | 71 | BinoptHandle binopt_init(void) { 72 | llvm::InitializeNativeTarget(); 73 | llvm::InitializeNativeTargetAsmPrinter(); 74 | llvm::InitializeNativeTargetAsmParser(); 75 | return reinterpret_cast(new DbllHandle()); 76 | } 77 | void binopt_fini(BinoptHandle handle) { 78 | delete reinterpret_cast(handle); 79 | } 80 | 81 | static llvm::Type* dbll_map_type(BinoptType type, llvm::LLVMContext& ctx) { 82 | switch (type) { 83 | case BINOPT_TY_VOID: return llvm::Type::getVoidTy(ctx); 84 | case BINOPT_TY_INT8: return llvm::Type::getInt8Ty(ctx); 85 | case BINOPT_TY_INT16: return llvm::Type::getInt16Ty(ctx); 86 | case BINOPT_TY_INT32: return llvm::Type::getInt32Ty(ctx); 87 | case BINOPT_TY_INT64: return llvm::Type::getInt64Ty(ctx); 88 | case BINOPT_TY_UINT8: return llvm::Type::getInt8Ty(ctx); 89 | case BINOPT_TY_UINT16: return llvm::Type::getInt16Ty(ctx); 90 | case BINOPT_TY_UINT32: return llvm::Type::getInt32Ty(ctx); 91 | case BINOPT_TY_UINT64: return llvm::Type::getInt64Ty(ctx); 92 | case BINOPT_TY_FLOAT: return llvm::Type::getFloatTy(ctx); 93 | case BINOPT_TY_DOUBLE: return llvm::Type::getDoubleTy(ctx); 94 | case BINOPT_TY_PTR: return llvm::Type::getInt8PtrTy(ctx); 95 | case BINOPT_TY_PTR_NOALIAS: return llvm::Type::getInt8PtrTy(ctx); 96 | default: return nullptr; 97 | } 98 | } 99 | 100 | static llvm::FunctionType* dbll_map_function_type(BinoptCfgRef cfg) { 101 | DbllHandle* handle = reinterpret_cast(cfg->handle); 102 | llvm::Type* ret_ty = dbll_map_type(cfg->ret_ty, handle->ctx); 103 | if (ret_ty == nullptr) 104 | return nullptr; 105 | 106 | llvm::SmallVector params; 107 | for (unsigned i = 0; i < cfg->param_count; ++i) { 108 | llvm::Type* param_ty = dbll_map_type(cfg->params[i].ty, handle->ctx); 109 | if (param_ty == nullptr) 110 | return nullptr; 111 | params.push_back(param_ty); 112 | } 113 | 114 | return llvm::FunctionType::get(ret_ty, params, false); 115 | } 116 | 117 | static llvm::Type* dbll_get_cpu_type(llvm::LLVMContext& ctx) { 118 | // TODO: extract from rellume programatically 119 | llvm::SmallVector cpu_types; 120 | cpu_types.push_back(llvm::Type::getInt64Ty(ctx)); // instruction pointer 121 | cpu_types.push_back(llvm::ArrayType::get(llvm::Type::getInt64Ty(ctx), 16)); 122 | cpu_types.push_back(llvm::ArrayType::get(llvm::Type::getInt1Ty(ctx), 8)); 123 | cpu_types.push_back(llvm::ArrayType::get(llvm::Type::getInt64Ty(ctx), 2)); 124 | cpu_types.push_back(llvm::ArrayType::get(llvm::Type::getIntNTy(ctx, 128), 16)); 125 | return llvm::StructType::get(ctx, cpu_types); 126 | } 127 | 128 | static llvm::Value* dbll_gep_helper(llvm::IRBuilder<>& irb, llvm::Value* base, 129 | llvm::ArrayRef idxs) { 130 | llvm::SmallVector consts; 131 | for (auto& idx : idxs) 132 | consts.push_back(irb.getInt32(idx)); 133 | return irb.CreateGEP(base, consts); 134 | } 135 | 136 | class StrictSptrAAResult : public llvm::AAResultBase { 137 | friend llvm::AAResultBase; 138 | 139 | const llvm::DataLayout &DL; 140 | public: 141 | StrictSptrAAResult(const llvm::DataLayout &DL) : AAResultBase(), DL(DL) {} 142 | 143 | llvm::AliasResult alias(const llvm::MemoryLocation &LocA, const llvm::MemoryLocation &LocB, 144 | llvm::AAQueryInfo &AAQI) { 145 | if (!LocA.Ptr->getType()->isPointerTy() || !LocB.Ptr->getType()->isPointerTy()) 146 | return llvm::NoAlias; 147 | 148 | if (IsSptr(LocA.Ptr) != IsSptr(LocB.Ptr)) 149 | return llvm::NoAlias; 150 | 151 | return AAResultBase::alias(LocA, LocB, AAQI); 152 | } 153 | 154 | private: 155 | bool IsSptr(const llvm::Value* val) { 156 | const llvm::Value* underlying = llvm::GetUnderlyingObject(val, DL); 157 | if (auto* inst = llvm::dyn_cast(underlying)) 158 | return inst->getMetadata("dbll.sptr") != nullptr; 159 | if (auto* arg = llvm::dyn_cast(underlying)) { 160 | if (auto* md = arg->getParent()->getMetadata("dbll.sptr")) { 161 | llvm::Metadata* idx_md = md->getOperand(0); 162 | auto* idx_mdc = llvm::cast(idx_md); 163 | auto idx = idx_mdc->getValue()->getUniqueInteger().getLimitedValue(); 164 | return arg->getArgNo() == idx; 165 | } 166 | } 167 | return false; 168 | } 169 | }; 170 | 171 | class StrictSptrAA : public llvm::AnalysisInfoMixin { 172 | friend llvm::AnalysisInfoMixin; 173 | static llvm::AnalysisKey Key; 174 | public: 175 | using Result = StrictSptrAAResult; 176 | StrictSptrAAResult run(llvm::Function& f, llvm::FunctionAnalysisManager& fam) { 177 | return StrictSptrAAResult(f.getParent()->getDataLayout()); 178 | } 179 | }; 180 | 181 | llvm::AnalysisKey StrictSptrAA::Key; 182 | 183 | namespace { 184 | 185 | class Optimizer { 186 | BinoptCfgRef cfg; 187 | std::vector const_memranges; 188 | 189 | llvm::LLVMContext& ctx; 190 | llvm::Module* mod; 191 | llvm::TargetMachine* tm; 192 | 193 | LLConfig* rlcfg; 194 | 195 | llvm::Function* rl_func_call; 196 | llvm::Function* rl_func_tail; 197 | llvm::Function* func_helper_ext; 198 | llvm::GlobalVariable* llvm_used; 199 | 200 | llvm::DenseMap lifted_fns; 201 | 202 | Optimizer(BinoptCfgRef cfg, llvm::Module* mod) 203 | : cfg(cfg), ctx(mod->getContext()), mod(mod) {} 204 | ~Optimizer(); 205 | 206 | void DebugPrint(int log_level, const char* name); 207 | 208 | bool Init(); 209 | llvm::Function* Lift(BinoptFunc func); 210 | bool DiscoverAndLift(void); 211 | 212 | llvm::Function* Wrap(llvm::Function* orig_fn); 213 | 214 | static void OptimizeLight(llvm::Function* fn); 215 | 216 | void OptimizeHeavy(); 217 | void PrepareForCodeGen(); 218 | 219 | public: 220 | static BinoptFunc OptimizeFromConfig(BinoptCfgRef cfg); 221 | }; 222 | 223 | Optimizer::~Optimizer() { 224 | ll_config_free(rlcfg); 225 | } 226 | 227 | void Optimizer::DebugPrint(int log_level, const char* name) { 228 | if (cfg->log_level < log_level) 229 | return; 230 | llvm::dbgs() << "==================================================\n" 231 | << "== Module dump: " << name << "\n"; 232 | mod->print(llvm::dbgs(), nullptr); 233 | } 234 | 235 | bool Optimizer::Init() { 236 | std::string error; 237 | std::string triple = llvm::sys::getProcessTriple(); 238 | auto* target = llvm::TargetRegistry::lookupTarget(triple, error); 239 | if (!target) { 240 | if (cfg->log_level >= LogLevel::WARNING) 241 | llvm::errs() << "could not select target: " << error << "\n"; 242 | 243 | return false; 244 | } 245 | 246 | llvm::TargetOptions options; 247 | options.EnableFastISel = false; 248 | tm = target->createTargetMachine(triple, 249 | /*CPU=*/llvm::sys::getHostCPUName(), 250 | /*Features=*/"-avx", options, 251 | llvm::None, // llvm::Reloc::Default, 252 | llvm::None, // llvm::CodeModel::JITDefault, 253 | llvm::CodeGenOpt::Aggressive, /*JIT*/true); 254 | 255 | mod->setTargetTriple(triple); 256 | 257 | llvm::Type* i8p = llvm::Type::getInt8PtrTy(ctx); 258 | llvm::Type* void_ty = llvm::Type::getVoidTy(ctx); 259 | auto helper_ty = llvm::FunctionType::get(void_ty, {i8p}, false); 260 | auto linkage = llvm::GlobalValue::ExternalLinkage; 261 | rl_func_call = llvm::Function::Create(helper_ty, linkage, "call_fn", mod); 262 | rl_func_tail = llvm::Function::Create(helper_ty, linkage, "tail_fn", mod); 263 | func_helper_ext = dbll::LowerNativeCallPass::CreateNativeCallFn(*mod); 264 | 265 | // Store references to functions in llvm.used to prevent early removal. 266 | assert(!mod->getGlobalVariable("llvm.used") && "llvm.used already defined"); 267 | llvm::LLVMContext& ctx = mod->getContext(); 268 | llvm::Type* i8p_ty = llvm::Type::getInt8PtrTy(ctx); 269 | 270 | llvm::SmallVector used_vals; 271 | used_vals.push_back(llvm::ConstantExpr::getPointerCast(rl_func_call, i8p_ty)); 272 | used_vals.push_back(llvm::ConstantExpr::getPointerCast(rl_func_tail, i8p_ty)); 273 | used_vals.push_back(llvm::ConstantExpr::getPointerCast(func_helper_ext, i8p_ty)); 274 | 275 | llvm::ArrayType* used_ty = llvm::ArrayType::get(i8p_ty, used_vals.size()); 276 | llvm_used = new llvm::GlobalVariable( 277 | *mod, used_ty, /*const=*/false, llvm::GlobalValue::AppendingLinkage, 278 | llvm::ConstantArray::get(used_ty, used_vals), "llvm.used"); 279 | llvm_used->setSection("llvm.metadata"); 280 | 281 | // Create Rellume config 282 | rlcfg = ll_config_new(); 283 | ll_config_enable_fast_math(rlcfg, !!(cfg->fast_math & 1)); 284 | ll_config_set_call_ret_clobber_flags(rlcfg, true); 285 | ll_config_set_use_native_segment_base(rlcfg, true); 286 | ll_config_enable_full_facets(rlcfg, true); 287 | ll_config_set_tail_func(rlcfg, llvm::wrap(rl_func_tail)); 288 | ll_config_set_call_func(rlcfg, llvm::wrap(rl_func_call)); 289 | 290 | for (size_t i = 0; i < cfg->memrange_count; i++) { 291 | const auto* range = &cfg->memranges[i]; 292 | if (range->flags == BINOPT_MEM_CONST) { 293 | uintptr_t base = reinterpret_cast(range->base); 294 | const_memranges.push_back({base, range->size}); 295 | } 296 | } 297 | 298 | std::ifstream proc_maps; 299 | proc_maps.open("/proc/self/maps"); 300 | if (proc_maps.is_open()) { 301 | std::string map; 302 | while (std::getline(proc_maps, map)) { 303 | char *endptr; 304 | uintptr_t start = strtoull(map.c_str(), &endptr, 16); 305 | uintptr_t end = strtoull(++endptr, &endptr, 16); 306 | bool prot_r = *(++endptr) == 'r'; 307 | bool prot_w = *(++endptr) == 'w'; 308 | // bool prot_x = *(++endptr) == 'x'; 309 | 310 | if (prot_r && !prot_w) 311 | const_memranges.push_back({start, end-start}); 312 | } 313 | } 314 | 315 | return true; 316 | } 317 | 318 | llvm::Function* Optimizer::Lift(BinoptFunc func) { 319 | const auto& lifted_fns_iter = lifted_fns.find(func); 320 | if (lifted_fns_iter != lifted_fns.end()) 321 | return lifted_fns_iter->second; 322 | 323 | // Do not lift PLT entries. This is "jmp [rip + ]". 324 | if (*((uint8_t*) func) == 0xff && *((uint8_t*) func + 1) == 0x25) 325 | return nullptr; 326 | 327 | if (cfg->log_level >= LogLevel::DEBUG) 328 | llvm::dbgs() << "Lifting " << (void*)func << "\n"; 329 | 330 | // Note: rl_func_call/rl_func_tail must have no uses before this function. 331 | assert(rl_func_tail->hasOneUse() && "rl_func_tail has uses (before)"); 332 | assert(rl_func_call->hasOneUse() && "rl_func_call has uses (before)"); 333 | 334 | LLFunc* rlfn = ll_func_new(llvm::wrap(mod), rlcfg); 335 | bool fail = ll_func_decode_cfg(rlfn, reinterpret_cast(func), 336 | nullptr, nullptr); 337 | if (fail) { 338 | if (cfg->log_level >= LogLevel::DEBUG) 339 | llvm::dbgs() << "Lifting " << (void*)func << " FAILED (decode).\n"; 340 | 341 | ll_func_dispose(rlfn); 342 | return nullptr; 343 | } 344 | 345 | llvm::Value* fn_val = llvm::unwrap(ll_func_lift(rlfn)); 346 | ll_func_dispose(rlfn); 347 | 348 | if (!fn_val) { 349 | if (cfg->log_level >= LogLevel::DEBUG) 350 | llvm::dbgs() << "Lifting " << (void*)func << " FAILED (lift).\n"; 351 | return nullptr; 352 | } 353 | 354 | llvm::Function* fn = llvm::cast(fn_val); 355 | fn->setLinkage(llvm::GlobalValue::PrivateLinkage); 356 | 357 | std::stringstream fname; 358 | fname << "lift_" << std::hex << reinterpret_cast(func); 359 | fn->setName(fname.str()); 360 | 361 | llvm::IRBuilder<> irb(fn->getEntryBlock().getFirstNonPHI()); 362 | 363 | // Argument index 364 | llvm::Metadata* sptr_md = llvm::ConstantAsMetadata::get(irb.getInt32(0)); 365 | fn->setMetadata("dbll.sptr", llvm::MDNode::get(ctx, {sptr_md})); 366 | 367 | // fn has the signature void(i8* sptr), and may contain calls to 368 | // rl_func_call and rl_func_tail. First, we replace these helper functions 369 | // with other helper functions which additionally contain parameters for RIP 370 | // and a pointer to the (user) return address, so that constant propagation 371 | // will eventually give us a constant RIP. 372 | // 373 | // The difference between tail_func and call_func is the following: For 374 | // tail_func, we hook the return address of this function. For call_func we 375 | // must hook the return address which was just stored on the fake stack. 376 | 377 | llvm::SmallVector, 8> tmp_insts; 378 | for (const llvm::Use& use : rl_func_tail->uses()) { 379 | llvm::CallInst* call = llvm::dyn_cast(use.getUser()); 380 | if (!call || call->getCalledFunction() != rl_func_tail) 381 | continue; 382 | tmp_insts.push_back(std::make_pair(call, false)); 383 | } 384 | for (const llvm::Use& use : rl_func_call->uses()) { 385 | llvm::CallInst* call = llvm::dyn_cast(use.getUser()); 386 | if (!call || call->getCalledFunction() != rl_func_call) 387 | continue; 388 | tmp_insts.push_back(std::make_pair(call, true)); 389 | } 390 | 391 | llvm::Type* i64 = irb.getInt64Ty(); 392 | llvm::Type* i64p = i64->getPointerTo(); 393 | llvm::Value* sptr = &fn->arg_begin()[0]; 394 | llvm::Value* sptr_ip = irb.CreateBitCast(sptr, i64p); 395 | // Note: we first do the GEP and then cast. EarlyCSE is not very clever in 396 | // reasoning about where a GEP leads to. For the same reason, use ptrtoint 397 | // instead of casting to an i64**. 398 | llvm::Value* sptr_sp_i8p = irb.CreateConstGEP1_64(sptr, 5 * 8); 399 | llvm::Value* sptr_sp = irb.CreateBitCast(sptr_sp_i8p, i64p); 400 | llvm::Value* entry_sp = irb.CreateIntToPtr(irb.CreateLoad(i64, sptr_sp), i64p); 401 | 402 | llvm::Value* args[4]; 403 | for (auto [inst, is_call] : tmp_insts) { 404 | irb.SetInsertPoint(inst); 405 | 406 | assert(inst->getArgOperand(0) == sptr && "multiple sptrs"); 407 | args[0] = sptr; 408 | if (is_call) 409 | args[1] = irb.CreateIntToPtr(irb.CreateLoad(i64, sptr_sp), i64p); 410 | else 411 | args[1] = entry_sp; 412 | args[2] = irb.CreateLoad(irb.getInt64Ty(), sptr_ip); 413 | args[3] = irb.getInt1(is_call ? 1 : 0); 414 | 415 | llvm::Value* return_rip = irb.CreateLoad(irb.getInt64Ty(), args[1]); 416 | 417 | irb.CreateCall(func_helper_ext, args); 418 | 419 | // We have some information about ext_helper regarding RIP/RSP. 420 | // Set RIP to the address which was just stored on the stack before. 421 | irb.CreateStore(return_rip, sptr_ip); 422 | // Set user RSP to stored_rip_ptr + 8 423 | llvm::Value* new_sp = irb.CreateConstGEP1_64(args[1], 1); 424 | irb.CreateStore(irb.CreatePtrToInt(new_sp, i64), sptr_sp); 425 | 426 | // Remove call to rl_func_call/rl_func_tail. 427 | inst->eraseFromParent(); 428 | } 429 | tmp_insts.clear(); 430 | 431 | // Note: rl_func_call/rl_func_tail must have no uses after this function. 432 | assert(rl_func_tail->hasOneUse() && "rl_func_tail has uses (after)"); 433 | assert(rl_func_call->hasOneUse() && "rl_func_call has uses (after)"); 434 | 435 | OptimizeLight(fn); 436 | 437 | // Check if any tail call remains after optimization. If so, don't mark the 438 | // function as inline. 439 | bool has_tail_fn = false; 440 | for (const llvm::Use& use : func_helper_ext->uses()) { 441 | llvm::CallInst* ci = llvm::dyn_cast(use.getUser()); 442 | if (!ci || ci->getCalledFunction() != func_helper_ext || 443 | ci->getParent()->getParent() != fn) 444 | continue; 445 | auto is_call = llvm::dyn_cast(ci->getArgOperand(3)); 446 | if (!is_call || is_call->isZeroValue()) 447 | has_tail_fn = true; 448 | } 449 | if (!has_tail_fn) { 450 | fn->addFnAttr(llvm::Attribute::InlineHint); 451 | fn->addFnAttr(llvm::Attribute::AlwaysInline); 452 | } 453 | 454 | lifted_fns[func] = fn; 455 | 456 | return fn; 457 | } 458 | 459 | bool Optimizer::DiscoverAndLift() { 460 | // Try to iteratively discover called functions and lift them as well. 461 | bool new_code = false; 462 | bool changed = true; 463 | llvm::SmallVector, 8> ext_call_queue; 464 | while (changed) { 465 | changed = false; 466 | for (const llvm::Use& use : func_helper_ext->uses()) { 467 | llvm::CallInst* call = llvm::dyn_cast(use.getUser()); 468 | if (!call || call->getCalledFunction() != func_helper_ext) 469 | continue; 470 | if (auto c = llvm::dyn_cast(call->getArgOperand(2))) { 471 | uint64_t addr = c->getUniqueInteger().getLimitedValue(); 472 | ext_call_queue.push_back(std::make_pair(addr, call)); 473 | } 474 | } 475 | 476 | for (auto [addr, inst] : ext_call_queue) { 477 | llvm::Function* fn = Lift(reinterpret_cast(addr)); 478 | if (cfg->log_level >= LogLevel::DEBUG) { 479 | llvm::errs() << "selecting call: " << (void*) addr << " "; 480 | inst->print(llvm::errs()); 481 | llvm::errs() << "; got " << fn << "\n"; 482 | } 483 | if (!fn) 484 | continue; 485 | 486 | auto is_call = llvm::dyn_cast(inst->getArgOperand(3)); 487 | auto* new_inst = llvm::CallInst::Create(fn, {inst->getArgOperand(0)}); 488 | llvm::ReplaceInstWithInst(inst, new_inst); 489 | 490 | // Directly inline tail functions 491 | if (is_call && is_call->isZeroValue()) { 492 | llvm::InlineFunctionInfo ifi; 493 | #if DBLL_LLVM_MAJOR < 11 494 | llvm::InlineFunction(llvm::CallSite(new_inst), ifi); 495 | #else 496 | llvm::InlineFunction(*new_inst, ifi); 497 | #endif 498 | } 499 | new_code = true; 500 | changed = true; 501 | } 502 | 503 | ext_call_queue.clear(); 504 | } 505 | 506 | return new_code; 507 | } 508 | 509 | llvm::Function* Optimizer::Wrap(llvm::Function* orig_fn) { 510 | llvm::FunctionType* fnty = dbll_map_function_type(cfg); 511 | if (fnty == nullptr) {// if we don't support the type 512 | DebugPrint(LogLevel::WARNING, "unsupported function type"); 513 | return nullptr; 514 | } 515 | 516 | // Create new function 517 | llvm::LLVMContext& ctx = orig_fn->getContext(); 518 | auto linkage = llvm::GlobalValue::ExternalLinkage; 519 | llvm::Function* fn = llvm::Function::Create(fnty, linkage, "glob", 520 | orig_fn->getParent()); 521 | llvm::BasicBlock* bb = llvm::BasicBlock::Create(ctx, "", fn); 522 | llvm::IRBuilder<> irb(bb); 523 | 524 | // Allocate CPU struct 525 | llvm::Type* cpu_type = dbll_get_cpu_type(ctx); 526 | llvm::AllocaInst* alloca = irb.CreateAlloca(cpu_type, int{0}); 527 | alloca->setMetadata("dbll.sptr", llvm::MDNode::get(ctx, {})); 528 | #if DBLL_LLVM_MAJOR < 10 529 | alloca->setAlignment(16); 530 | #else 531 | alloca->setAlignment(llvm::Align(16)); 532 | #endif 533 | 534 | // Set direction flag to zero 535 | irb.CreateStore(irb.getFalse(), dbll_gep_helper(irb, alloca, {0, 2, 6})); 536 | 537 | unsigned gp_regs[6] = { 7, 6, 2, 1, 8, 9 }; 538 | unsigned gpRegOffset = 0; 539 | unsigned fpRegOffset = 0; 540 | unsigned stackOffset = 8; // return address 541 | llvm::SmallVector, 4> stack_slots; 542 | llvm::Value* target; 543 | unsigned arg_idx = 0; 544 | for (auto arg = fn->arg_begin(); arg != fn->arg_end(); ++arg, ++arg_idx) { 545 | llvm::Type* arg_type = arg->getType(); 546 | llvm::Value* arg_val = arg; 547 | 548 | if (cfg->params[arg_idx].ty == BINOPT_TY_PTR_NOALIAS) { 549 | fn->addParamAttr(arg_idx, llvm::Attribute::NoAlias); 550 | } 551 | 552 | // Fix known parameters 553 | if (const void* const_vptr = cfg->params[arg_idx].const_val) { 554 | auto const_ptr = reinterpret_cast(const_vptr); 555 | size_t const_sz = arg_type->getPrimitiveSizeInBits(); 556 | if (arg_type->isPointerTy()) 557 | const_sz = sizeof(void*) * 8; 558 | llvm::APInt const_val(const_sz, llvm::ArrayRef(const_ptr, const_sz/64)); 559 | arg_val = llvm::ConstantInt::get(ctx, const_val); 560 | } 561 | 562 | if (arg_type->isIntOrPtrTy()) { 563 | if (gpRegOffset < 6) { 564 | if (arg_type->isPointerTy()) 565 | arg_val = irb.CreatePtrToInt(arg_val, irb.getInt64Ty()); 566 | else // arg_type->isIntegerTy() 567 | arg_val = irb.CreateZExtOrTrunc(arg_val, irb.getInt64Ty()); 568 | 569 | target = dbll_gep_helper(irb, alloca, {0, 1, gp_regs[gpRegOffset]}); 570 | irb.CreateStore(arg_val, target); 571 | gpRegOffset++; 572 | } else { 573 | stack_slots.push_back(std::make_pair(stackOffset, arg_val)); 574 | stackOffset += 8; 575 | } 576 | } else if (arg_type->isFloatTy() || arg_type->isDoubleTy()) { 577 | if (fpRegOffset < 8) { 578 | llvm::Type* int_type = irb.getIntNTy(arg_type->getPrimitiveSizeInBits()); 579 | llvm::Value* int_val = irb.CreateBitCast(arg_val, int_type); 580 | 581 | target = dbll_gep_helper(irb, alloca, {0, 4, fpRegOffset}); 582 | llvm::Type* vec_type = target->getType()->getPointerElementType(); 583 | irb.CreateStore(irb.CreateZExt(int_val, vec_type), target); 584 | fpRegOffset++; 585 | } else { 586 | stack_slots.push_back(std::make_pair(stackOffset, arg_val)); 587 | stackOffset += 8; 588 | } 589 | } else { 590 | DebugPrint(LogLevel::WARNING, "unsupported parameter type"); 591 | return nullptr; 592 | } 593 | } 594 | 595 | std::size_t stack_frame_size = 4096 - 8; 596 | std::size_t stack_size = stack_frame_size + stackOffset; 597 | 598 | llvm::Value* stack_sz_val = irb.getInt64(stack_size); 599 | llvm::AllocaInst* stack = irb.CreateAlloca(irb.getInt8Ty(), stack_sz_val); 600 | #if DBLL_LLVM_MAJOR < 10 601 | stack->setAlignment(16); 602 | #else 603 | stack->setAlignment(llvm::Align(16)); 604 | #endif 605 | llvm::Value* sp_ptr = irb.CreateGEP(stack, irb.getInt64(stack_frame_size)); 606 | llvm::Value* sp = irb.CreatePtrToInt(sp_ptr, irb.getInt64Ty()); 607 | irb.CreateStore(sp, dbll_gep_helper(irb, alloca, {0, 1, 4})); 608 | 609 | for (const auto& [offset, value] : stack_slots) { 610 | llvm::Value* ptr = irb.CreateGEP(sp_ptr, irb.getInt64(offset)); 611 | irb.CreateStore(value, irb.CreateBitCast(ptr, value->getType()->getPointerTo())); 612 | } 613 | 614 | llvm::Value* call_arg = irb.CreatePointerCast(alloca, irb.getInt8PtrTy()); 615 | llvm::CallInst* call = irb.CreateCall(orig_fn, {call_arg}); 616 | 617 | llvm::Type* ret_type = fn->getReturnType(); 618 | switch (ret_type->getTypeID()) 619 | { 620 | llvm::Value* ret; 621 | 622 | case llvm::Type::TypeID::VoidTyID: 623 | irb.CreateRetVoid(); 624 | break; 625 | case llvm::Type::TypeID::IntegerTyID: 626 | ret = irb.CreateLoad(dbll_gep_helper(irb, alloca, {0, 1, 0})); 627 | ret = irb.CreateTruncOrBitCast(ret, ret_type); 628 | irb.CreateRet(ret); 629 | break; 630 | case llvm::Type::TypeID::PointerTyID: 631 | ret = irb.CreateLoad(dbll_gep_helper(irb, alloca, {0, 1, 0})); 632 | ret = irb.CreateIntToPtr(ret, ret_type); 633 | irb.CreateRet(ret); 634 | break; 635 | case llvm::Type::TypeID::FloatTyID: 636 | case llvm::Type::TypeID::DoubleTyID: 637 | ret = irb.CreateLoad(dbll_gep_helper(irb, alloca, {0, 4, 0})); 638 | ret = irb.CreateTrunc(ret, irb.getIntNTy(ret_type->getPrimitiveSizeInBits())); 639 | ret = irb.CreateBitCast(ret, ret_type); 640 | irb.CreateRet(ret); 641 | break; 642 | default: 643 | assert(false); 644 | break; 645 | } 646 | 647 | llvm::InlineFunctionInfo ifi; 648 | #if DBLL_LLVM_MAJOR < 11 649 | llvm::InlineFunction(llvm::CallSite(call), ifi); 650 | #else 651 | llvm::InlineFunction(*call, ifi); 652 | #endif 653 | 654 | OptimizeLight(fn); 655 | 656 | return fn; 657 | } 658 | 659 | void Optimizer::OptimizeLight(llvm::Function* fn) { 660 | // Do some very simple optimizations, so that calls to ext_helper are 661 | // simplified that a constant target RIP is propagated and subsequent 662 | // branches based on the RIP value are eliminated. 663 | 664 | llvm::PassBuilder pb; 665 | llvm::FunctionPassManager fpm(false); 666 | 667 | llvm::LoopAnalysisManager lam(false); 668 | llvm::FunctionAnalysisManager fam(false); 669 | llvm::CGSCCAnalysisManager cgam(false); 670 | llvm::ModuleAnalysisManager mam(false); 671 | 672 | fam.registerPass([&] { 673 | llvm::AAManager aa; 674 | aa.registerFunctionAnalysis(); 675 | aa.registerFunctionAnalysis(); 676 | return aa; 677 | }); 678 | fam.registerPass([&] { return StrictSptrAA(); }); 679 | 680 | // Register analysis passes... 681 | pb.registerModuleAnalyses(mam); 682 | pb.registerCGSCCAnalyses(cgam); 683 | pb.registerFunctionAnalyses(fam); 684 | pb.registerLoopAnalyses(lam); 685 | pb.crossRegisterProxies(lam, fam, cgam, mam); 686 | 687 | fpm.addPass(llvm::EarlyCSEPass(true)); 688 | fpm.addPass(llvm::InstCombinePass(false)); 689 | fpm.addPass(llvm::SimplifyCFGPass()); 690 | fpm.addPass(dbll::PtrToIntFoldPass()); 691 | // fpm.addPass(llvm::AAEvaluator()); 692 | fpm.run(*fn, fam); 693 | } 694 | 695 | void Optimizer::OptimizeHeavy() { 696 | llvm::PassInstrumentationCallbacks pic; 697 | llvm::StandardInstrumentations si; 698 | si.registerCallbacks(pic); 699 | 700 | llvm::PassBuilder pb(tm, llvm::PipelineTuningOptions(), llvm::None, &pic); 701 | 702 | llvm::LoopAnalysisManager lam(false); 703 | llvm::FunctionAnalysisManager fam(false); 704 | llvm::CGSCCAnalysisManager cgam(false); 705 | llvm::ModuleAnalysisManager mam(false); 706 | 707 | fam.registerPass([&] { 708 | llvm::AAManager aa; 709 | aa.registerFunctionAnalysis(); 710 | aa.registerFunctionAnalysis(); 711 | return aa; 712 | }); 713 | fam.registerPass([&] { return StrictSptrAA(); }); 714 | 715 | // Register analysis passes... 716 | pb.registerModuleAnalyses(mam); 717 | pb.registerCGSCCAnalyses(cgam); 718 | pb.registerFunctionAnalyses(fam); 719 | pb.registerLoopAnalyses(lam); 720 | pb.crossRegisterProxies(lam, fam, cgam, mam); 721 | 722 | pb.registerPeepholeEPCallback([this] (llvm::FunctionPassManager& fpm, 723 | llvm::PassBuilder::OptimizationLevel ol) { 724 | fpm.addPass(dbll::PtrToIntFoldPass()); 725 | fpm.addPass(dbll::ConstMemPropPass(const_memranges)); 726 | }); 727 | auto ol = llvm::PassBuilder::OptimizationLevel::O3; 728 | auto mpm = pb.buildPerModuleDefaultPipeline(ol, false); 729 | // mpm.addPass(llvm::createModuleToFunctionPassAdaptor(llvm::AAEvaluator())); 730 | 731 | mpm.run(*mod, mam); 732 | } 733 | 734 | void Optimizer::PrepareForCodeGen() { 735 | // Now we can let the optimizer remove all the function declarations. 736 | // TODO: this apparently doesn' work. 737 | // if (llvm_used) { 738 | // llvm_used->removeFromParent(); 739 | // llvm_used = nullptr; 740 | // } 741 | 742 | // TODO: Don't run a full optimization pipeline here. 743 | llvm::PassInstrumentationCallbacks pic; 744 | llvm::StandardInstrumentations si; 745 | si.registerCallbacks(pic); 746 | 747 | llvm::PassBuilder pb(tm, llvm::PipelineTuningOptions(), llvm::None, &pic); 748 | 749 | llvm::LoopAnalysisManager lam(false); 750 | llvm::FunctionAnalysisManager fam(false); 751 | llvm::CGSCCAnalysisManager cgam(false); 752 | llvm::ModuleAnalysisManager mam(false); 753 | 754 | // Register the AA manager 755 | // fam.registerPass([&] { return pb.buildDefaultAAPipeline(); }); 756 | fam.registerPass([&] { 757 | llvm::AAManager aa; 758 | aa.registerFunctionAnalysis(); 759 | aa.registerFunctionAnalysis(); 760 | return aa; 761 | }); 762 | fam.registerPass([&] { return StrictSptrAA(); }); 763 | 764 | // Register analysis passes... 765 | pb.registerModuleAnalyses(mam); 766 | pb.registerCGSCCAnalyses(cgam); 767 | pb.registerFunctionAnalyses(fam); 768 | pb.registerLoopAnalyses(lam); 769 | pb.crossRegisterProxies(lam, fam, cgam, mam); 770 | 771 | // Lower native calls in the beginning. 772 | pb.registerPipelineStartEPCallback([] (llvm::ModulePassManager& mpm) { 773 | mpm.addPass(dbll::LowerNativeCallPass()); 774 | }); 775 | auto ol = llvm::PassBuilder::OptimizationLevel::O3; 776 | auto mpm = pb.buildPerModuleDefaultPipeline(ol, false); 777 | 778 | mpm.run(*mod, mam); 779 | } 780 | 781 | BinoptFunc Optimizer::OptimizeFromConfig(BinoptCfgRef cfg) { 782 | DbllHandle* handle = reinterpret_cast(cfg->handle); 783 | auto mod_u = std::make_unique("binopt", handle->ctx); 784 | llvm::Module* mod = mod_u.get(); 785 | 786 | Optimizer opt(cfg, mod); 787 | if (!opt.Init()) 788 | return nullptr; 789 | 790 | llvm::Function* fn = opt.Lift(cfg->func); 791 | if (!fn) 792 | return nullptr; 793 | 794 | opt.DebugPrint(LogLevel::DEBUG, "Initially lifted"); 795 | 796 | llvm::Function* wrapped_fn = opt.Wrap(fn); 797 | if (wrapped_fn == nullptr) 798 | return nullptr; 799 | 800 | opt.DebugPrint(LogLevel::DEBUG, "After ABI wrap"); 801 | 802 | opt.DiscoverAndLift(); 803 | bool new_code; 804 | do { 805 | opt.DebugPrint(LogLevel::DEBUG, "After discovery iteration"); 806 | opt.OptimizeHeavy(); 807 | new_code = opt.DiscoverAndLift(); 808 | } while (new_code); 809 | 810 | opt.DebugPrint(LogLevel::DEBUG, "After full discovery"); 811 | 812 | opt.PrepareForCodeGen(); 813 | 814 | opt.DebugPrint(LogLevel::INFO, "Before codegen"); 815 | 816 | // This should only scream if our code has a bug. 817 | if (llvm::verifyFunction(*wrapped_fn, &llvm::errs())) { 818 | wrapped_fn->eraseFromParent(); 819 | return nullptr; 820 | } 821 | 822 | llvm::EngineBuilder builder(std::move(mod_u)); 823 | llvm::ExecutionEngine* engine = builder.create(opt.tm); 824 | if (!engine) 825 | return cfg->func; // we could not create the JIT engine 826 | 827 | const auto& name = wrapped_fn->getName(); 828 | auto raw_ptr = engine->getFunctionAddress(name.str()); 829 | 830 | return reinterpret_cast(raw_ptr); 831 | } 832 | 833 | } 834 | 835 | BinoptFunc binopt_spec_create(BinoptCfgRef cfg) { 836 | if (BinoptFunc new_fn = Optimizer::OptimizeFromConfig(cfg)) 837 | return new_fn; 838 | 839 | if (cfg->log_level >= LogLevel::WARNING) 840 | llvm::errs() << "warning: returning old function\n"; 841 | 842 | return cfg->func; 843 | } 844 | 845 | void binopt_spec_delete(BinoptHandle handle, BinoptFunc spec_func) { 846 | // TODO: implement 847 | } 848 | 849 | __attribute__((constructor)) 850 | static void dbll_support_pass_arguments(void) { 851 | llvm::cl::ParseEnvironmentOptions("binopt-dbll", "DBLL_OPTS"); 852 | } 853 | -------------------------------------------------------------------------------- /rewriter/dbll/meson.build: -------------------------------------------------------------------------------- 1 | libllvm = dependency('llvm', version: ['>=8', '<12']) 2 | cpp_args = ['-DDBLL_LLVM_MAJOR='+libllvm.version().split('.')[0]] 3 | 4 | rellume = subproject('rellume', default_options: ['with_rv64=false']) 5 | librellume = rellume.get_variable('librellume') 6 | 7 | sources = ['dbll.cc', 'ConstMemProp.cc', 'LowerNativeCall.cc', 'PtrToIntFold.cc'] 8 | 9 | binopt_dbll_lib = shared_library('binopt-dbll', sources, 10 | dependencies: [binopt, libllvm, librellume], 11 | cpp_args: cpp_args, 12 | install: true) 13 | -------------------------------------------------------------------------------- /rewriter/dbrew/dbrew.c: -------------------------------------------------------------------------------- 1 | 2 | #include "binopt.h" 3 | #include "binopt-config.h" 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /* DBrew bindings, using default configuration API */ 13 | 14 | const char* binopt_driver(void) { 15 | return "DBrew"; 16 | } 17 | BinoptHandle binopt_init(void) { return NULL; } 18 | void binopt_fini(BinoptHandle handle) { 19 | (void) handle; 20 | } 21 | 22 | BinoptFunc binopt_spec_create(BinoptCfgRef cfg) { 23 | // We only support 6 integer-pointer arguments. 24 | if (cfg->param_count > 6) 25 | return cfg->func; 26 | 27 | Rewriter* r = dbrew_new(); 28 | bool verbose = cfg->log_level >= 3; 29 | dbrew_verbose(r, verbose, verbose, verbose); 30 | dbrew_optverbose(r, verbose); 31 | dbrew_set_decoding_capacity(r, 100000, 100); 32 | // Set instrcount, bbcount, and codebuf size. 33 | dbrew_set_capture_capacity(r, 1000000, 100, 0x100000); 34 | dbrew_set_function(r, (uintptr_t) cfg->func); 35 | for (size_t i = 0; i < cfg->implflag_count; i++) { 36 | size_t val = cfg->implflags[i].val; 37 | switch (cfg->implflags[i].flag) { 38 | case 0x10001: // force unknown 39 | dbrew_config_force_unknown(r, val); 40 | break; 41 | default: break; // ignore other flags. 42 | } 43 | } 44 | dbrew_config_parcount(r, cfg->param_count); 45 | if (cfg->ret_ty == BINOPT_TY_FLOAT || cfg->ret_ty == BINOPT_TY_DOUBLE) 46 | dbrew_config_returnfp(r); 47 | void* args[6] = {0}; 48 | for (size_t i = 0; i < cfg->param_count; ++i) { 49 | if (!cfg->params[i].const_val) 50 | continue; 51 | switch (cfg->params[i].ty) { 52 | case BINOPT_TY_INT8: 53 | case BINOPT_TY_INT16: 54 | case BINOPT_TY_INT32: 55 | case BINOPT_TY_INT64: 56 | case BINOPT_TY_UINT8: 57 | case BINOPT_TY_UINT16: 58 | case BINOPT_TY_UINT32: 59 | case BINOPT_TY_UINT64: 60 | case BINOPT_TY_PTR: 61 | case BINOPT_TY_PTR_NOALIAS: 62 | dbrew_config_staticpar(r, i); 63 | memcpy(&args[i], cfg->params[i].const_val, sizeof(args[i])); 64 | break; 65 | default: 66 | dbrew_free(r); 67 | return cfg->func; 68 | } 69 | } 70 | for (size_t i = 0; i < cfg->memrange_count; ++i) { 71 | switch (cfg->memranges[i].flags) { 72 | case BINOPT_MEM_CONST: 73 | case BINOPT_MEM_NESTED_CONST: 74 | dbrew_config_set_memrange(r, "", false, 75 | (uint64_t) cfg->memranges[i].base, 76 | cfg->memranges[i].size); 77 | break; 78 | default: 79 | // Do nothing. 80 | break; 81 | } 82 | } 83 | 84 | return (BinoptFunc) dbrew_rewrite(r, args[0], args[1], args[2], args[3], 85 | args[4], args[5]); 86 | } 87 | 88 | void binopt_spec_delete(BinoptHandle handle, BinoptFunc spec_func) { 89 | // TODO: free rewriter... 90 | (void) handle; 91 | (void) spec_func; 92 | } 93 | -------------------------------------------------------------------------------- /rewriter/dbrew/meson.build: -------------------------------------------------------------------------------- 1 | dbrew_subproject = subproject('dbrew') 2 | dbrew = dbrew_subproject.get_variable('dbrew') 3 | 4 | sources = ['dbrew.c'] 5 | 6 | binopt_dbrew_lib = shared_library('binopt-dbrew', sources, 7 | dependencies: [binopt, dbrew], 8 | install: true) 9 | -------------------------------------------------------------------------------- /rewriter/drob/drob.c: -------------------------------------------------------------------------------- 1 | 2 | #include "binopt.h" 3 | #include "binopt-config.h" 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | /* Drob bindings */ 14 | 15 | const char* binopt_driver(void) { 16 | return "Drob"; 17 | } 18 | BinoptHandle binopt_init(void) { 19 | drob_setup(); 20 | return NULL; 21 | } 22 | void binopt_fini(BinoptHandle handle) { 23 | // No teardown, there may be multiple handles. 24 | (void) handle; 25 | } 26 | 27 | static drob_param_type binopt_drob_map_ty(BinoptType ty) { 28 | switch (ty) { 29 | case BINOPT_TY_VOID: return DROB_PARAM_TYPE_VOID; 30 | case BINOPT_TY_INT8: return DROB_PARAM_TYPE_INT8; 31 | case BINOPT_TY_INT16: return DROB_PARAM_TYPE_INT16; 32 | case BINOPT_TY_INT32: return DROB_PARAM_TYPE_INT32; 33 | case BINOPT_TY_INT64: return DROB_PARAM_TYPE_INT64; 34 | case BINOPT_TY_UINT8: return DROB_PARAM_TYPE_UINT8; 35 | case BINOPT_TY_UINT16: return DROB_PARAM_TYPE_UINT16; 36 | case BINOPT_TY_UINT32: return DROB_PARAM_TYPE_UINT32; 37 | case BINOPT_TY_UINT64: return DROB_PARAM_TYPE_UINT64; 38 | case BINOPT_TY_FLOAT: return DROB_PARAM_TYPE_FLOAT; 39 | case BINOPT_TY_DOUBLE: return DROB_PARAM_TYPE_DOUBLE; 40 | case BINOPT_TY_PTR: return DROB_PARAM_TYPE_PTR; 41 | case BINOPT_TY_PTR_NOALIAS: return DROB_PARAM_TYPE_PTR; 42 | default: return DROB_PARAM_TYPE_MAX; 43 | } 44 | } 45 | 46 | BinoptFunc binopt_spec_create(BinoptCfgRef cfg) { 47 | if (cfg->param_count > 6) 48 | return cfg->func; 49 | 50 | if (drob_set_logging(stderr, cfg->log_level)) 51 | fprintf(stderr, "binopt-drob: invalid loglevel %d\n", cfg->log_level); 52 | 53 | drob_param_type dty_ret = binopt_drob_map_ty(cfg->ret_ty); 54 | drob_param_type dty_args[6] = {0}; 55 | for (size_t i = 0; i < cfg->param_count; i++) 56 | dty_args[i] = binopt_drob_map_ty(cfg->params[i].ty); 57 | 58 | drob_cfg* dcfg = drob_cfg_new(dty_ret, cfg->param_count, dty_args[0], 59 | dty_args[1], dty_args[2], dty_args[3], 60 | dty_args[4], dty_args[5]); 61 | 62 | if (dcfg == NULL) 63 | return cfg->func; 64 | 65 | for (size_t i = 0; i < cfg->param_count; ++i) { 66 | struct BinoptCfgParam* param = &cfg->params[i]; 67 | if (param->ty == BINOPT_TY_PTR_NOALIAS) 68 | drob_cfg_set_ptr_flag(dcfg, i, DROB_PTR_FLAG_RESTRICT); 69 | if (!param->const_val) 70 | continue; 71 | switch (param->ty) { 72 | case BINOPT_TY_INT8: 73 | drob_cfg_set_param_int8(dcfg, i, *(int8_t*) param->const_val); 74 | break; 75 | case BINOPT_TY_INT16: 76 | drob_cfg_set_param_int16(dcfg, i, *(int16_t*) param->const_val); 77 | break; 78 | case BINOPT_TY_INT32: 79 | drob_cfg_set_param_int32(dcfg, i, *(int32_t*) param->const_val); 80 | break; 81 | case BINOPT_TY_INT64: 82 | drob_cfg_set_param_int64(dcfg, i, *(int64_t*) param->const_val); 83 | break; 84 | case BINOPT_TY_UINT8: 85 | drob_cfg_set_param_uint8(dcfg, i, *(uint8_t*) param->const_val); 86 | break; 87 | case BINOPT_TY_UINT16: 88 | drob_cfg_set_param_uint16(dcfg, i, *(uint16_t*) param->const_val); 89 | break; 90 | case BINOPT_TY_UINT32: 91 | drob_cfg_set_param_uint32(dcfg, i, *(uint32_t*) param->const_val); 92 | break; 93 | case BINOPT_TY_UINT64: 94 | drob_cfg_set_param_uint64(dcfg, i, *(uint64_t*) param->const_val); 95 | break; 96 | case BINOPT_TY_PTR: 97 | case BINOPT_TY_PTR_NOALIAS: 98 | drob_cfg_set_param_ptr(dcfg, i, *(const void**) param->const_val); 99 | break; 100 | default: 101 | return cfg->func; 102 | } 103 | } 104 | for (size_t i = 0; i < cfg->memrange_count; ++i) { 105 | switch (cfg->memranges[i].flags) { 106 | case BINOPT_MEM_CONST: 107 | case BINOPT_MEM_NESTED_CONST: 108 | drob_cfg_add_const_range(dcfg, cfg->memranges[i].base, 109 | cfg->memranges[i].size); 110 | break; 111 | default: 112 | // Do nothing. 113 | break; 114 | } 115 | } 116 | 117 | drob_cfg_dump(dcfg); 118 | drob_cfg_set_error_handling(dcfg, DROB_ERROR_HANDLING_RETURN_NULL); 119 | BinoptFunc res = (BinoptFunc) drob_optimize((drob_f) cfg->func, dcfg); 120 | return res ? res : cfg->func; 121 | } 122 | 123 | void binopt_spec_delete(BinoptHandle handle, BinoptFunc spec_func) { 124 | drob_free((drob_f) spec_func); 125 | (void) handle; 126 | } 127 | -------------------------------------------------------------------------------- /rewriter/drob/meson.build: -------------------------------------------------------------------------------- 1 | drob = subproject('drob') 2 | libdrob = drob.get_variable('drob') 3 | 4 | sources = ['drob.c'] 5 | 6 | binopt_drob_lib = shared_library('binopt-drob', sources, 7 | dependencies: [binopt, libdrob], 8 | install: true) 9 | -------------------------------------------------------------------------------- /rewriter/meson.build: -------------------------------------------------------------------------------- 1 | if get_option('build_dbrew') 2 | subdir('dbrew') 3 | endif 4 | if get_option('build_dbll') 5 | subdir('dbll') 6 | endif 7 | if get_option('build_drob') 8 | subdir('drob') 9 | endif 10 | -------------------------------------------------------------------------------- /src/default.c: -------------------------------------------------------------------------------- 1 | 2 | #include "binopt.h" 3 | #include "binopt-config.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define WEAK __attribute__((weak)) 12 | 13 | /* Dummy implementation which always returns the original function without 14 | * modifications. */ 15 | 16 | WEAK const char* binopt_driver(void) { 17 | return "Default (no rewriting)"; 18 | } 19 | WEAK BinoptHandle binopt_init(void) { return NULL; } 20 | WEAK void binopt_fini(BinoptHandle handle) {} 21 | 22 | static size_t binopt_type_size(BinoptType ty) { 23 | switch (ty) { 24 | case BINOPT_TY_VOID: return 0; 25 | case BINOPT_TY_INT8: return sizeof(int8_t); 26 | case BINOPT_TY_INT16: return sizeof(int16_t); 27 | case BINOPT_TY_INT32: return sizeof(int32_t); 28 | case BINOPT_TY_INT64: return sizeof(int64_t); 29 | case BINOPT_TY_UINT8: return sizeof(uint8_t); 30 | case BINOPT_TY_UINT16: return sizeof(uint16_t); 31 | case BINOPT_TY_UINT32: return sizeof(uint32_t); 32 | case BINOPT_TY_UINT64: return sizeof(uint64_t); 33 | case BINOPT_TY_FLOAT: return sizeof(float); 34 | case BINOPT_TY_DOUBLE: return sizeof(double); 35 | case BINOPT_TY_PTR: return sizeof(void*); 36 | case BINOPT_TY_PTR_NOALIAS: return sizeof(void*); 37 | default: return 0; 38 | } 39 | } 40 | 41 | WEAK BinoptCfgRef binopt_cfg_new(BinoptHandle handle, 42 | BinoptFunc base_func) { 43 | BinoptCfgRef cfg = calloc(1, sizeof(struct BinoptCfg)); 44 | if (cfg == NULL) 45 | return NULL; 46 | cfg->handle = handle; 47 | cfg->func = base_func; 48 | cfg->fast_math = 0; 49 | cfg->log_level = 0; 50 | 51 | const char* log_level_env = getenv("BINOPT_LOGLEVEL"); 52 | if (log_level_env) { 53 | unsigned log_level = strtoul(log_level_env, NULL, 10); 54 | if (log_level < UINT8_MAX) 55 | cfg->log_level = log_level; 56 | } 57 | 58 | return cfg; 59 | } 60 | WEAK BinoptCfgRef binopt_cfg_clone(BinoptCfgRef base_cfg) { 61 | BinoptCfgRef new_cfg = malloc(sizeof(struct BinoptCfg)); 62 | if (new_cfg == NULL) 63 | return NULL; 64 | memcpy(new_cfg, base_cfg, sizeof(struct BinoptCfg)); 65 | if (new_cfg->params != NULL) { 66 | struct BinoptCfgParam* new_params = malloc(sizeof(struct BinoptCfgParam) * new_cfg->param_alloc); 67 | if (new_params == NULL) { 68 | free(new_cfg); 69 | return NULL; 70 | } 71 | for (size_t i = 0; i < new_cfg->param_count; ++i) { 72 | struct BinoptCfgParam* base_param = &base_cfg->params[i]; 73 | struct BinoptCfgParam* new_param = &new_params[i]; 74 | size_t const_size = binopt_type_size(base_param->ty); 75 | new_param->ty = base_param->ty; 76 | new_param->const_val = malloc(const_size); 77 | // Silently don't clone constant values if we can't allocate memory 78 | if (new_param->const_val != NULL) 79 | memcpy(new_param->const_val, base_param->const_val, const_size); 80 | } 81 | new_cfg->params = new_params; 82 | } 83 | if (base_cfg->memranges != NULL) { 84 | struct BinoptCfgMemrange* new_memranges = malloc(sizeof(struct BinoptCfgMemrange) * new_cfg->memrange_alloc); 85 | if (new_memranges == NULL) { 86 | if (new_cfg->params) 87 | for (size_t i = 0; i < new_cfg->param_count; ++i) 88 | free(new_cfg->params[i].const_val); 89 | free(new_cfg->params); 90 | free(new_cfg); 91 | return NULL; 92 | } 93 | memcpy(new_memranges, base_cfg->memranges, sizeof(struct BinoptCfgMemrange) * base_cfg->memrange_count); 94 | new_cfg->memranges = new_memranges; 95 | } 96 | if (base_cfg->implflags != NULL) { 97 | struct BinoptCfgFlag* new_implflags = malloc(sizeof(struct BinoptCfgFlag) * new_cfg->implflag_alloc); 98 | if (new_implflags == NULL) { 99 | if (new_cfg->params) 100 | for (size_t i = 0; i < new_cfg->param_count; ++i) 101 | free(new_cfg->params[i].const_val); 102 | free(new_cfg->params); 103 | free(new_cfg->memranges); 104 | free(new_cfg); 105 | return NULL; 106 | } 107 | memcpy(new_implflags, base_cfg->implflags, sizeof(struct BinoptCfgFlag) * base_cfg->implflag_count); 108 | new_cfg->implflags = new_implflags; 109 | } 110 | return new_cfg; 111 | } 112 | // Set function signature. 113 | WEAK void binopt_cfg_type(BinoptCfgRef cfg, unsigned count, BinoptType ret, ...) { 114 | va_list args; 115 | 116 | cfg->ret_ty = ret; 117 | if (cfg->params) 118 | free(cfg->params); 119 | cfg->params = calloc(count, sizeof(struct BinoptCfgParam)); 120 | if (cfg->params == NULL) { 121 | cfg->param_count = 0; 122 | cfg->param_alloc = 0; 123 | return; 124 | } 125 | 126 | cfg->param_alloc = count; 127 | 128 | va_start(args, ret); 129 | for (unsigned i = 0; i < count; ++i) { 130 | cfg->params[i].ty = va_arg(args, BinoptType); 131 | } 132 | cfg->param_count = count; 133 | va_end(args); 134 | 135 | return; 136 | } 137 | WEAK void binopt_cfg_set(BinoptCfgRef cfg, BinoptOptFlags flag, size_t val) { 138 | switch (flag) { 139 | case BINOPT_F_FASTMATH: cfg->fast_math = !!val; break; 140 | case BINOPT_F_LOGLEVEL: cfg->log_level = val; break; 141 | default: 142 | if (cfg->implflag_count == cfg->implflag_alloc) { 143 | size_t new_cnt = 2 * cfg->implflag_alloc; 144 | size_t new_size = (new_cnt ? new_cnt : 8) * sizeof(*(cfg->implflags)); 145 | void* new_flags = realloc(cfg->implflags, new_size); 146 | if (new_flags == NULL) 147 | return; // old flags are left untouched 148 | cfg->implflags = new_flags; 149 | cfg->implflag_alloc = new_cnt; 150 | } 151 | 152 | size_t flag_idx = cfg->implflag_count++; 153 | cfg->implflags[flag_idx].flag = flag; 154 | cfg->implflags[flag_idx].val = val; 155 | break; 156 | } 157 | } 158 | WEAK void binopt_cfg_set_param(BinoptCfgRef cfg, unsigned idx, const void* val) { 159 | if (idx >= cfg->param_count) 160 | return; 161 | size_t const_size = binopt_type_size(cfg->params[idx].ty); 162 | cfg->params[idx].const_val = malloc(const_size); 163 | if (!cfg->params[idx].const_val) 164 | return; 165 | memcpy(cfg->params[idx].const_val, val, const_size); 166 | } 167 | WEAK void binopt_cfg_mem(BinoptCfgRef cfg, void* base, size_t size, 168 | BinoptMemFlags flags) { 169 | if (cfg->memrange_alloc == 0) { 170 | cfg->memranges = malloc(8 * sizeof(struct BinoptCfgMemrange)); 171 | if (cfg->memranges == NULL) 172 | return; 173 | cfg->memrange_alloc = 8; 174 | } 175 | if (cfg->memrange_count == cfg->memrange_alloc) { 176 | size_t new_size = 2 * cfg->memrange_alloc * sizeof(*(cfg->memranges)); 177 | void* new_ranges = realloc(cfg->memranges, new_size); 178 | if (new_ranges == NULL) 179 | return; // old range allocation is left untouched 180 | cfg->memranges = new_ranges; 181 | cfg->memrange_alloc *= 2; 182 | } 183 | 184 | size_t range_idx = cfg->memrange_count++; 185 | cfg->memranges[range_idx].base = base; 186 | cfg->memranges[range_idx].size = size; 187 | cfg->memranges[range_idx].flags = flags; 188 | } 189 | WEAK void binopt_cfg_free(BinoptCfgRef cfg) { 190 | if (cfg->params) { 191 | for (size_t i = 0; i < cfg->param_count; ++i) 192 | free(cfg->params[i].const_val); 193 | free(cfg->params); 194 | } 195 | if (cfg->memranges) 196 | free(cfg->memranges); 197 | free(cfg); 198 | } 199 | 200 | WEAK BinoptFunc binopt_spec_create(BinoptCfgRef cfg) { 201 | return cfg->func; 202 | } 203 | WEAK void binopt_spec_delete(BinoptHandle handle, BinoptFunc spec_func) {} 204 | 205 | // Convenience functions 206 | void binopt_cfg_set_parami(BinoptCfgRef cfg, unsigned idx, size_t val) { 207 | binopt_cfg_set_param(cfg, idx, &val); 208 | } 209 | void binopt_cfg_set_paramp(BinoptCfgRef cfg, unsigned idx, const void* ptr, 210 | size_t size, BinoptMemFlags flags) { 211 | binopt_cfg_set_param(cfg, idx, &ptr); 212 | binopt_cfg_mem(cfg, ptr, size, flags); 213 | } 214 | -------------------------------------------------------------------------------- /src/meson.build: -------------------------------------------------------------------------------- 1 | 2 | sources = [ 3 | 'default.c', 4 | ] 5 | 6 | binopt_lib = shared_library('binopt', sources, 7 | include_directories: binopt_inc, 8 | install: true) 9 | -------------------------------------------------------------------------------- /tests/alignment.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | static void align_test16(void* ptr) { 9 | if ((uintptr_t) ptr & 15) { 10 | printf("Test failed (buf): %p\n", ptr); 11 | abort(); 12 | } 13 | void* faddr = __builtin_frame_address(0); 14 | if ((uintptr_t) faddr & 15) { 15 | printf("Test failed (rsp): %p\n", faddr); 16 | abort(); 17 | } 18 | } 19 | 20 | static void func(void(* align_test_fn)(void*)) { 21 | unsigned char buf[16] __attribute__((aligned(16))); 22 | align_test_fn(buf); 23 | __asm__ volatile("" ::: "memory"); 24 | } 25 | 26 | int main(int argc, char** argv) { 27 | printf("Rewriter: %s\n", binopt_driver()); 28 | 29 | BinoptHandle boh = binopt_init(); 30 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 31 | binopt_cfg_type(bcfg, 1, BINOPT_TY_VOID, BINOPT_TY_PTR); 32 | 33 | void (* new_func)(void(*)(void*)); 34 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 35 | 36 | new_func(align_test16); 37 | printf("Test passed\n"); 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /tests/call.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | __attribute__((noinline)) static int nested(int a) { 4 | return 2 * a; 5 | } 6 | 7 | static int func(int a) { 8 | return nested(a) + a; 9 | } 10 | 11 | int main(int argc, char** argv) { 12 | BinoptHandle boh = test_init(argc, argv); 13 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 14 | binopt_cfg_type(bcfg, 1, BINOPT_TY_INT32, BINOPT_TY_INT32); 15 | binopt_cfg_set_parami(bcfg, 0, 42); 16 | 17 | int (* new_func)(int); 18 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 19 | test_eq_i32(new_func(8), 3 * 8, 3 * 42); 20 | test_fini(); 21 | } 22 | -------------------------------------------------------------------------------- /tests/common.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef BINOPT_TEST_COMMON_H 3 | #define BINOPT_TEST_COMMON_H 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | static int optimize; 13 | 14 | static BinoptHandle test_init(int argc, char** argv) { 15 | if (argc != 3) { 16 | printf("usage: %s driver opt\n", argv[0]); 17 | exit(EXIT_FAILURE); 18 | } 19 | 20 | optimize = strtol(argv[2], NULL, 0); 21 | 22 | const char* driver = binopt_driver(); 23 | if (strcmp(argv[1], driver)) { 24 | printf("error: expected driver %s got %s\n", argv[1], driver); 25 | exit(EXIT_FAILURE); 26 | } 27 | 28 | return binopt_init(); 29 | } 30 | 31 | static void test_eq_i32(int32_t a, int32_t no_opt, int32_t opt) { 32 | int32_t b = optimize ? opt : no_opt; 33 | if (a != b) { 34 | printf("error: got %d != %d\n", a, b); 35 | exit(EXIT_FAILURE); 36 | } 37 | } 38 | 39 | __attribute__((noreturn)) 40 | static void test_fini(void) { 41 | exit(EXIT_SUCCESS); 42 | } 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /tests/constmem.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | 4 | static int func(int* b) { 5 | return *b; 6 | } 7 | 8 | int main(int argc, char** argv) { 9 | int* const_val = mmap(NULL, sizeof(int), PROT_READ|PROT_WRITE, 10 | MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 11 | *const_val = 42; 12 | mprotect(const_val, sizeof(int), PROT_READ); 13 | // const_val now points to read-only memory. 14 | 15 | BinoptHandle boh = test_init(argc, argv); 16 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 17 | binopt_cfg_type(bcfg, 1, BINOPT_TY_INT32, BINOPT_TY_PTR); 18 | binopt_cfg_set_parami(bcfg, 0, (uintptr_t) const_val); 19 | 20 | int (* new_func)(int*); 21 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 22 | 23 | mprotect(const_val, sizeof(int), PROT_READ|PROT_WRITE); 24 | *const_val = 3; 25 | 26 | int param2 = 16; 27 | // If nothing is propagated, the result is 16. If the pointer was propagated 28 | // but not the value, the result is 3. If the value from the read-only 29 | // memory is used, the result is 42. 30 | test_eq_i32(new_func(¶m2), param2, 42); 31 | test_fini(); 32 | } 33 | -------------------------------------------------------------------------------- /tests/indjmp-dynamic.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | static int func(int a) { 4 | __asm__ volatile(".section .data; 2: .8byte 1f; .previous jmp *2b(%%rip); 1:" :::); 5 | return a; 6 | } 7 | 8 | int main(int argc, char** argv) { 9 | BinoptHandle boh = test_init(argc, argv); 10 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 11 | binopt_cfg_type(bcfg, 1, BINOPT_TY_INT32, BINOPT_TY_INT32); 12 | binopt_cfg_set_parami(bcfg, 0, 42); 13 | 14 | int (* new_func)(int); 15 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 16 | test_eq_i32(new_func(8), 8, 42); 17 | test_fini(); 18 | } 19 | -------------------------------------------------------------------------------- /tests/indjmp-static.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | static int func(int a) { 4 | __asm__ volatile("mov $1f, %%rax; jmp *%%rax; 1:" ::: "rax"); 5 | return a; 6 | } 7 | 8 | int main(int argc, char** argv) { 9 | BinoptHandle boh = test_init(argc, argv); 10 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 11 | binopt_cfg_type(bcfg, 1, BINOPT_TY_INT32, BINOPT_TY_INT32); 12 | binopt_cfg_set_parami(bcfg, 0, 42); 13 | 14 | int (* new_func)(int); 15 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 16 | test_eq_i32(new_func(8), 8, 42); 17 | test_fini(); 18 | } 19 | -------------------------------------------------------------------------------- /tests/manyparams.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | static int func(int edi, int esi, int edx, int ecx, int r8d, int r9d, int sp8, 4 | int sp16) { 5 | return sp8 - sp16; 6 | } 7 | 8 | int main(int argc, char** argv) { 9 | BinoptHandle boh = test_init(argc, argv); 10 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 11 | binopt_cfg_type(bcfg, 8, BINOPT_TY_INT32, BINOPT_TY_INT32, BINOPT_TY_INT32, 12 | BINOPT_TY_INT32, BINOPT_TY_INT32, BINOPT_TY_INT32, 13 | BINOPT_TY_INT32, BINOPT_TY_INT32, BINOPT_TY_INT32); 14 | binopt_cfg_set_parami(bcfg, 7, 42); 15 | 16 | int (* new_func)(int, int, int, int, int, int, int, int); 17 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 18 | test_eq_i32(new_func(100, 101, 102, 103, 104, 105, 8, 16), 8 - 16, 8 - 42); 19 | test_fini(); 20 | } 21 | -------------------------------------------------------------------------------- /tests/memory.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | static int func(int a, int* b) { 4 | return a * *b; 5 | } 6 | 7 | int main(int argc, char** argv) { 8 | int param2 = 42; 9 | 10 | BinoptHandle boh = test_init(argc, argv); 11 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 12 | binopt_cfg_type(bcfg, 2, BINOPT_TY_INT32, BINOPT_TY_INT32, BINOPT_TY_PTR); 13 | binopt_cfg_set_paramp(bcfg, 1, ¶m2, sizeof(param2), BINOPT_MEM_CONST); 14 | 15 | int (* new_func)(int, int*); 16 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 17 | 18 | param2 = 16; 19 | test_eq_i32(new_func(8, ¶m2), 8 * param2, 8 * 42); 20 | test_fini(); 21 | } 22 | -------------------------------------------------------------------------------- /tests/meson.build: -------------------------------------------------------------------------------- 1 | 2 | tests = [ 3 | # test binary; expected results for [Default, DBrew, DBLL, Drob] 4 | # 0 means no optimization, 1 means optimized with values propagated 5 | [executable('test_simple', 'simple.c', dependencies: [binopt]), 0, 1, 1, 1], 6 | [executable('test_manyparams', 'manyparams.c', dependencies: [binopt]), 0, 0, 1, 0], 7 | [executable('test_memory', 'memory.c', dependencies: [binopt]), 0, 1, 1, 1], 8 | [executable('test_constmem', 'constmem.c', dependencies: [binopt]), 0, 0, 1, 1], 9 | [executable('test_writablemem', 'writablemem.c', dependencies: [binopt]), 0, 1, 1, 1], 10 | [executable('test_alignment', 'alignment.c', dependencies: [binopt]), 0, 1, 1, 1], 11 | [executable('test_call', 'call.c', dependencies: [binopt]), 0, 1, 1, 0], 12 | [executable('test_indjmp-static', 'indjmp-static.c', dependencies: [binopt]), 0, 1, 1, 0], 13 | [executable('test_indjmp-dynamic', 'indjmp-dynamic.c', dependencies: [binopt]), 0, 1, 1, 0], 14 | ] 15 | 16 | rewriters = [[binopt_lib, 'Default (no rewriting)', 1]] 17 | 18 | if get_option('build_dbrew') 19 | rewriters += [[binopt_dbrew_lib, 'DBrew', 2]] 20 | endif 21 | if get_option('build_dbll') 22 | rewriters += [[binopt_dbll_lib, 'DBLL', 3]] 23 | endif 24 | if get_option('build_drob') 25 | rewriters += [[binopt_drob_lib, 'Drob', 4]] 26 | endif 27 | 28 | foreach case : tests 29 | foreach rewriter : rewriters 30 | env = environment() 31 | env.set('LD_PRELOAD', rewriter[0].full_path()) 32 | name = '@0@ @1@'.format(case[0].full_path().split('/')[-1], rewriter[1]) 33 | test(name, case[0], args: [rewriter[1], '@0@'.format(case[rewriter[2]])], env: env, depends: [rewriter[0]]) 34 | endforeach 35 | endforeach 36 | -------------------------------------------------------------------------------- /tests/simple.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | static int func(int a, int b) { 4 | return a * b; 5 | } 6 | 7 | int main(int argc, char** argv) { 8 | BinoptHandle boh = test_init(argc, argv); 9 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 10 | binopt_cfg_type(bcfg, 2, BINOPT_TY_INT32, BINOPT_TY_INT32, BINOPT_TY_INT32); 11 | binopt_cfg_set_parami(bcfg, 1, 42); 12 | 13 | int (* new_func)(int, int); 14 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 15 | test_eq_i32(new_func(8, 16), 8 * 16, 8 * 42); 16 | test_fini(); 17 | } 18 | -------------------------------------------------------------------------------- /tests/writablemem.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include 3 | 4 | int global = 42; 5 | 6 | static int func(int* b) { 7 | return *b; 8 | } 9 | 10 | int main(int argc, char** argv) { 11 | BinoptHandle boh = test_init(argc, argv); 12 | BinoptCfgRef bcfg = binopt_cfg_new(boh, (BinoptFunc) func); 13 | binopt_cfg_type(bcfg, 1, BINOPT_TY_INT32, BINOPT_TY_PTR); 14 | // Note that we don't specify the value itself as constant. 15 | binopt_cfg_set_parami(bcfg, 0, (uintptr_t) &global); 16 | 17 | int (* new_func)(int*); 18 | *((BinoptFunc*) &new_func) = binopt_spec_create(bcfg); 19 | 20 | global = 35; 21 | int param2 = 16; 22 | // If nothing is propagated, the result is 16. If the pointer was propagated 23 | // the result is 35. The result must never be 42. 24 | test_eq_i32(new_func(¶m2), param2, global); 25 | test_fini(); 26 | } 27 | --------------------------------------------------------------------------------