├── LICENSE ├── README.md └── source ├── Makefile ├── args.c ├── bench.c ├── bitops.h ├── cpu.c ├── cuckoo.h ├── data.c ├── exploit.h ├── file.h ├── gea1.c ├── gea1.h ├── linear_alg.c ├── linear_alg.h ├── main.c ├── print.c ├── sched.c ├── sort.c ├── sort_cuckoo.c ├── stage1.c ├── stage2.c ├── stage3.c ├── test.c ├── timing.h └── transform.h /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GEA1_break 2 | 3 | This tool implements the attack against the GEA-1 described in 4 | [Cryptanalysis of the GPRS Encryption Algorithms GEA-1 and GEA-2](https://eprint.iacr.org/2021/819.pdf). 5 | GEA-1 is one of the GPRS native algorithms and does not provide a 6 | sufficient level of security, being easily breakable using simple COTS 7 | computer hardware. 8 | 9 | ## Table of content 10 | 11 | 1. [Compiling](#compiling) 12 | 2. [Usage](#usage) 13 | 3. [Howto](#howto) 14 | 4. [Performance](#performance) 15 | 5. [Optimization](#optimizing) 16 | 6. [Authors & contributions](#authors) 17 | 18 | ## Compiling 19 | 20 | First, install the `libm4ri` development package on your favorite Linux 21 | distribution and then type: 22 | 23 | ```bash 24 | make 25 | ``` 26 | 27 | ## Usage 28 | 29 | ``` 30 | $ ./gea1_break --help 31 | Usage: gea1_break [OPTION...] 32 | 33 | Implementation of the attack described in https://eprint.iacr.org/2021/819.pdf 34 | to recover GEA-1 keys. 35 | 36 | -a, --all prevent an early exit in stage #2 37 | -b, --bench Run the benchmarks mode 38 | -c, --core=nr_cores The number of cores to use (default is maximum 39 | available) 40 | -d, --dir=dir The directoring storing the results of the 41 | precomputation 42 | -f, --flag=dir_flag {0,1} The direction flag 43 | -i, --iv=iv (hex) The IV 44 | -k, --keystream=keystream (hex) 45 | The keystream 46 | -l, --length=keystream length (bits) 47 | The keystream length (must be >= 48 && <= 64) 48 | -p, --precomputation Run the precomputation sage (stage #1) 49 | -r, --reverse Return the key based on the IV and dir_flag (stage 50 | #3) 51 | -s, --state=recovered_state (hex) 52 | The S recovered in stage #2 53 | -t, --tests Run in test mode 54 | -v, --verbose Increase the verbosity level (default: 0) 55 | -x, --bruteforce Run the key recovery stage (stage #2) 56 | -?, --help Give this help list 57 | --usage Give a short usage message 58 | -V, --version Print program version 59 | 60 | Mandatory or optional arguments to long options are also mandatory or optional 61 | for any corresponding short options. 62 | 63 | Report bugs to roderick.asselineau@{__no_spam__}airbus.com. 64 | $ 65 | ``` 66 | 67 | ## Howto 68 | 69 | You can (and you should) test all internal algorithms using the (default) 70 | `-t` command: 71 | ``` 72 | $ ./gea1_break -t 73 | [+] Satety tests 74 | -> OK [0.01s] 75 | ``` 76 | 77 | If an `assert()` is triggered, it probably implies that you found 78 | a bug and that you won't be able to correctly run the program (for now). 79 | 80 | To recover the secret key used to generate some known keystream, you need 81 | to first generate a set of tables using `-p`: 82 | ``` 83 | $ ./gea1_break -p --dir mytables 84 | ``` 85 | 86 | This precomputes tables within the `./mytables/` directory. This operation 87 | only needs to be done _once_ and should only take a couple of minutes on 88 | _recent_ computer hardware. Please notice that there is a set of tables 89 | for each of the two backends. Selecting the backend is done at compilation time 90 | by assigning `OPTIM_LOOKUP` either to `OPTIM_LKUP_CUCKOO` (default) or to 91 | `OPTIM_LKUP_BSEARCH` (much slower in general). 92 | 93 | The second step is to recover the internal state `S` for a given bitstream 94 | using the `-x` option. `gea1_break` comes in two flavors: `single` mode 95 | and `batch` mode, depending on the compilation flag `OPTIM_BATCH` (0 or 1). 96 | 97 | In `single` mode, `gea1_break` is optimized to break a unique key. However 98 | it is possible that you may actually need to recover two (or even more) 99 | keys in which case using the `batch` mode would definitely be better for 100 | you since the computation is _mutualized_. 101 | 102 | ### Single mode 103 | 104 | ``` 105 | $ make clean && make EXT="-DOPTIM_BATCH=0" 106 | [...] 107 | $ ./gea1_break -V 108 | GEA1_break v0.3 - cuckoo/single/high 109 | $ ./gea1_break -x --dir ./mytables --keystream 14b36a6fb803c7bb -l 64 110 | [...] 111 | [+] State found in 24663.00s [411.05m]! 112 | UB = 38740ac2 113 | V = ac 114 | T = da2e48 115 | S = 243c504a2733bce6 116 | [...] 117 | ``` 118 | 119 | ### Batch mode 120 | 121 | ``` 122 | $ make clean && make EXT="-DOPTIM_BATCH=1" 123 | [...] 124 | $ ./gea1_break -V 125 | GEA1_break v0.3 - cuckoo/batch/high 126 | $ ./gea1_break -x --dir ./mytables --batch 14b36a6fb803c7bb:64,88a63c9dad536a11:64 127 | [+] Batch mode! Attempting to crack: 128 | -> [b00] 14b36a6fb803c7bb (64) [mask:ffffffffffffffff] 129 | -> [b01] 88a63c9dad536a11 (64) [mask:ffffffffffffffff] 130 | [...] 131 | [+] State found for b01 in 24210.00s [403.50m]! 132 | S = 713ed89153b804f0 133 | [...] 134 | [+] State found for b00 in 42702.00s [711.70m]! 135 | S = 243c504a2733bce6 136 | [...] 137 | ``` 138 | 139 | ### Recovering `K` 140 | 141 | Once `S` is recovered, `K` can be computed by providing the `IV` (as an 142 | `uint32_t`) and the direction flag `f` (0 or 1): 143 | 144 | ``` 145 | $ ./gea1_break -r --iv 88d64f69 --state 713ed89153b804f0 -f 1 146 | K = 78b1bfcfe3ca4b65 147 | ``` 148 | 149 | `K` is returned as an `uint64_t` integer as well. In that regard, please 150 | understand that `uint{32,64}_t` types are used as convenient storage areas. 151 | `K`'s _ith_ bit should be retrieved using the classic `(K >> i)&1`. The 152 | same logic applies to `IV` and `S`. 153 | 154 | ### A few observations 155 | 156 | In the last mode, the key recovery process accelerates each time a key is 157 | recovered and asymptotically converges toward the speed of the `single` 158 | mode. 159 | 160 | In [Cryptanalysis of the GPRS Encryption Algorithms GEA-1 and GEA-2](https://eprint.iacr.org/2021/819.pdf), 161 | the authors recover each secret key using 65 bits of keystream. According 162 | to our practical observations though, 64 bits of keystream are enough. As 163 | a result, it is convenient to store the keystream as an `uint64_t` with a 164 | maximum size of 64 bits (by definition). This is comfortable for a number 165 | of reasons including saving memory and speeding up computations. 166 | 167 | The default behavior of the program is to stop the computation whenever 168 | a key candidate has been found for each specified keystream. However there 169 | are two exceptions: 170 | 171 | * If one of the keystreams is smaller than 64 bits (for example 61 bits 172 | long using the `-l` option of the `single` mode) then by definition multiple 173 | candidates per keystream pop up and the program cannot tell which ones 174 | are the false positives. For this reason `gea1_break` computes all the 175 | candidates and only stops when the computation is over. 176 | * If the `--all` option is set then the early exit is disabled no matter 177 | the size of the keystream. 178 | 179 | ## Performance 180 | 181 | It is difficult to accurately measure the performance difference 182 | between the original paper and this implementation, since hardware 183 | configurations are typically different, and also some design choices are 184 | obviously different. However, you may be able to roughly estimate the 185 | running time on your computer based on the tests that we made. 186 | 187 | All the following tests were performed on a DELL server with the 188 | following characteristics: 189 | 190 | * 2x Intel(R) Xeon(R) CPU E5-2640 v2 @ 2.00GHz 191 | * cache size: 20480 KB 192 | * 8x physical cores / CPU, 16x virtual cores / CPU (HT) 193 | * 64 GB of RAM 194 | 195 | Note: The code is _not_ currently designed to run on a cluster. This may 196 | change in the future should the need occur. 197 | 198 | ### Generating the tables 199 | 200 | Generating the `cuckoo` tables: 201 | ``` 202 | $ make clean && make EXT="-DOPTIM_LOOKUP=OPTIM_LKUP_CUCKOO" -j`nproc` 203 | $ time ./gea1_break -p --dir ./tables_cuckoo 204 | ``` 205 | 206 | Or generating the `bsearch` tables (not recommended): 207 | ``` 208 | $ make clean && make EXT="-DOPTIM_LOOKUP=OPTIM_LKUP_BSEARCH" -j`nproc` 209 | $ time ./gea1_break -p --dir ./tables_bsearch 210 | ``` 211 | 212 | ### Cracking a key (cuckoo/single) 213 | 214 | Searching through the whole key space takes us between 12h and 13h as 215 | demonstrated below: 216 | 217 | ``` 218 | $ time ./gea1_break -v -x --dir ./table24_cuck04 --keystream 14b36a6fb803c7bb -l 64 --all 219 | [+] Preparing V, B, TAC basis 220 | -> OK [0.32 ms] 221 | [+] Preparing MA, MB, MC 222 | -> OK [0.36 ms] 223 | [+] Preparing the v elements for all the cores 224 | -> OK [0.37 ms] 225 | [+] Loading hash tables [0,127] from ./table24_cuck04/ 226 | -> OK [31s] 227 | [+] Generating RegA+RegC keystreams (2^32) [Full] 228 | -> using 32 cores 229 | -> All LP have terminated 230 | -> OK [24953s] 231 | [+] Unloading hash tables from ./table24_cuck04/ 232 | -> OK [3s] 233 | [+] Loading hash tables [128,255] from ./table24_cuck04/ 234 | -> OK [54s] 235 | [+] Generating RegA+RegC keystreams (2^32) [Full] 236 | -> using 32 cores 237 | [+] State found in 26229.00s [437.15m]! 238 | UB = 38740ac2 239 | V = ac 240 | T = da2e48 241 | S = 243c504a2733bce6 242 | -> All LP have terminated 243 | -> OK [21554s] 244 | [+] Unloading hash tables from ./table24_cuck04/ 245 | -> OK [1s] 246 | 247 | real 776m36.133s 248 | user 23213m50.278s 249 | sys 286m23.211s 250 | ``` 251 | 252 | One can observe that, during this run, our first round completed within 6h 253 | when the second one took a 1 hour penalty (while doing the same amount of 254 | computation) which may be because of other jobs running on the server. 255 | With such results, we can expect to recover a single key in half that time 256 | on average thus with ~6.5h of computation. 257 | 258 | ``` 259 | $ (time stdbuf -oL ./gea1_break -v -x --dir ./table24_cuck04 --keystream 14b36a6fb803c7bb -l 64 2>&1) 2>&1 | tee single_14b36a6fb803c7bb.txt 260 | [+] Preparing V, B, TAC basis 261 | -> OK [0.29 ms] 262 | [+] Preparing MA, MB, MC 263 | -> OK [0.35 ms] 264 | [+] Preparing the v elements for all the cores 265 | -> OK [0.37 ms] 266 | [+] Loading hash tables [0,127] from ./table24_cuck04/ 267 | -> OK [17s] 268 | [+] Generating RegA+RegC keystreams (2^32) [Full] 269 | -> using 32 cores 270 | -> All LP have terminated 271 | -> OK [22232s] 272 | [+] Unloading hash tables from ./table24_cuck04/ 273 | -> OK [2s] 274 | [+] Loading hash tables [128,255] from ./table24_cuck04/ 275 | -> OK [31s] 276 | [+] Generating RegA+RegC keystreams (2^32) [Full] 277 | -> using 32 cores 278 | [+] State found in 23435.00s [390.58m]! 279 | UB = 38740ac2 280 | V = ac 281 | T = da2e48 282 | S = 243c504a2733bce6 283 | -> All LP have terminated 284 | -> OK [1154s] 285 | [+] Unloading hash tables from ./table24_cuck04/ 286 | -> OK [1s] 287 | 288 | real 390m36.541s 289 | user 11898m11.818s 290 | sys 123m3.758s 291 | ``` 292 | 293 | The RAM requirement to complete this stage is ~23 GB (`OPTIM_MEM_HIGH`) 294 | and ~12 GB (`OPTIM_MEM_LOW`). 295 | 296 | 297 | ### Cracking multiple keys (cuckoo/batch) 298 | 299 | The running time is tied to the number of keys. Attempting to break five 300 | testvectors from [this file](https://github.com/Dude100/MediaTek-HelioX10-Baseband/blob/591772a0d659ef0f7bba1953d18f8fe7c18b11de/(FDD)MT6795.MOLY.LR9.W1423.MD.LWTG.MP.V24/driver/cipher/include/gcu_ut.h) 301 | gives us: 302 | 303 | ``` 304 | $ time ./gea1_break -v -x --dir ./table24_cuck04/ --batch 8ac31421ab98a11f:64,14b36a6fb803c7bb:64,88a63c9dad536a11:64,c725804289b920d2:64,8ac45e0f6419395a:64,3ff638812ee23296:64 305 | [+] Batch mode! Attempting to crack: 306 | -> [b00] 8ac31421ab98a11f (64) [mask:ffffffffffffffff] 307 | -> [b01] 14b36a6fb803c7bb (64) [mask:ffffffffffffffff] 308 | -> [b02] 88a63c9dad536a11 (64) [mask:ffffffffffffffff] 309 | -> [b03] c725804289b920d2 (64) [mask:ffffffffffffffff] 310 | -> [b04] 8ac45e0f6419395a (64) [mask:ffffffffffffffff] 311 | -> [b05] 3ff638812ee23296 (64) [mask:ffffffffffffffff] 312 | [+] Preparing V, B, TAC basis 313 | -> OK [1.68 ms] 314 | [+] Preparing MA, MB, MC 315 | -> OK [1.72 ms] 316 | [+] Preparing the v elements for all the cores 317 | -> OK [1.72 ms] 318 | [+] Loading hash tables [0,127] from ./table24_cuck04// 319 | -> OK [29s] 320 | [+] Generating RegA+RegC keystreams (2^32) [Full] 321 | -> using 32 cores 322 | [+] State found for b02 in 24210.00s [403.50m]! 323 | UB = 94e9e91c 324 | V = d 325 | T = becfe1 326 | S = 713ed89153b804f0 327 | [+] State found for b03 in 37807.00s [630.12m]! 328 | UB = 67685e4e 329 | V = 6c 330 | T = 2fac34 331 | S = 43c43be610d42616 332 | -> All LP have terminated 333 | -> OK [40507s] 334 | [+] Unloading hash tables from ./table24_cuck04// 335 | -> OK [0s] 336 | [+] Loading hash tables [128,255] from ./table24_cuck04// 337 | -> OK [42s] 338 | [+] Generating RegA+RegC keystreams (2^32) [Full] 339 | -> using 32 cores 340 | [+] State found for b01 in 42702.00s [711.70m]! 341 | UB = 38740ac2 342 | V = ac 343 | T = da2e48 344 | S = 243c504a2733bce6 345 | [+] State found for b05 in 57419.00s [956.98m]! 346 | UB = a437ea66 347 | V = c5 348 | T = 250c10 349 | S = 51dc282bfb0479f3 350 | -> All LP have terminated 351 | -> OK [28720s] 352 | [+] Unloading hash tables from ./table24_cuck04// 353 | -> OK [1s] 354 | 355 | real 1154m59.552s 356 | user 34861m59.565s 357 | sys 361m49.503s 358 | ``` 359 | 360 | Observe that the state corresponding to the first testvector 361 | (which is in fact quite _special_) and is not recovered and neither is 362 | the 4th candidate forcing the program to continue until the very end. 363 | 364 | Practically speaking, in 956.98m (~16h) we recovered 4 different states thus 365 | four different keys, so the benefit of this mode is obvious. The RAM 366 | requirement during this stage is also ~23 GB (`OPTIM_MEM_HIGH`) and ~12 GB 367 | (`OPTIM_MEM_LOW`). 368 | 369 | 370 | ## Optimization 371 | 372 | 373 | You may want to play with a couple of flags within `exploit.h`: 374 | 375 | | Optimization name | Default value | Description | 376 | | ---------------------- | ----------------- | ---------------------------------------------------------------------------------------- | 377 | | OPTIM_BATCH | 0 | If enabled, compiles a special version of the program able to handle multiple keystreams.| 378 | | OPTIM_LIN_ALG | 1 | Use a linear algebra trick to skip expensive matrix operations | 379 | | OPTIM_LOOKUP | OPTIM_LKUP_CUCKOO | Select the hash table backend, the slowest being `OPTIM_LKUP_BSEARCH` | 380 | | OPTIM_MEM | OPTIM_MEM_HIGH | Select the memory requirements policy. `OPTIM_MEM_HIGH` expects 64 GB of RAM. | 381 | | OPTIM_SCHED | 1 | Change the scheduling policy to the most interesting depending on the current task | 382 | | OPTIM_SKIP_COLLISIONS | 1 | Skip handling collisions within the hash table (some keys may not be broken as a result).| 383 | 384 | Generally speaking, unless you know what you do, we recommend to keep the 385 | default flag values for the best performances. 386 | 387 | Note: `OPTIM_SKIP_COLLISIONS` is the default behavior, and handling collisions, 388 | thus 100% of the keys, is currently not implemented. 389 | 390 | ## FAQ 391 | 392 | 393 | #### Q: What will be the next features? 394 | 395 | Handling the collisions is likely to appear within a couple of days. 396 | 397 | A major modification of the cli in order to integrate `bitmasks` and arbitrary 398 | long keystreams. While we have not tested it, it seems likely that, in 399 | some cases, you may have difficulties to extract 64 consecutive bits of 400 | keystream. As a result it makes sense to provide a mask and to extend the 401 | bitstream size since otherwise this would increase the number of false 402 | positives. This is meant to be addressed in the short term as well. 403 | 404 | A full memory version with extended precomputed tables is an option and 405 | likely to be one of our priorities (when we get time though). 406 | 407 | Modifying the program to allow it to run on a (heterogeneous) cluster is 408 | another option. 409 | 410 | None of these options takes time to implement but testing does. 411 | 412 | #### Q: Will there be a `gea2_break`? 413 | 414 | Perhaps! If enough people sign the petition ;> 415 | 416 | #### Q: Can you give me an advice to estimate the average/worst running time on my machine? 417 | 418 | Edit `exploit.h` and set `NR_BITS_UB` to `24` then recompile the binary. 419 | Note the `demo` tag appearing the version: 420 | 421 | ``` 422 | $ ./gea1_break -V 423 | GEA1_break v0.3 - cuckoo/batch/high/demo 424 | ``` 425 | 426 | Now run: 427 | ``` 428 | $ time ./gea1_break -v -x --dir ./tables25_round_cuckoo4 --keystream d93922ae6ccba015 -l 64 --all 429 | [+] Preparing V, B, TAC basis 430 | -> OK [0.24 ms] 431 | [+] Preparing MA, MB, MC 432 | -> OK [0.28 ms] 433 | [+] Preparing the v elements for all the cores 434 | -> OK [0.29 ms] 435 | [+] Loading hash tables [0,127] from ./tables25_round_cuckoo4/ 436 | -> OK [17s] 437 | [+] Generating RegA+RegC keystreams (2^24) to crack 0xd93922ae6ccba015 [Demo] 438 | -> using 32 cores 439 | [+] State found in 17.00s [0.28m]! 440 | UB = e 441 | V = 5d 442 | T = 15 443 | S = 3807cf4fdb121506 444 | -> All LP have terminated 445 | -> OK [106s] 446 | [+] Unloading hash tables from ./tables25_round_cuckoo4/ 447 | -> OK [2s] 448 | [+] Loading hash tables [128,255] from ./tables25_round_cuckoo4/ 449 | -> OK [45s] 450 | [+] Generating RegA+RegC keystreams (2^24) to crack 0xd93922ae6ccba015 [Demo] 451 | -> using 32 cores 452 | -> All LP have terminated 453 | -> OK [92s] 454 | [+] Unloading hash tables from ./tables25_round_cuckoo4/ 455 | -> OK [1s] 456 | 457 | real 4m23.091s 458 | user 89m31.224s 459 | sys 5m24.429s 460 | ``` 461 | 462 | So, basically, what does that tell us? Loading the memory takes a couple 463 | of seconds. Since it is only done twice and is independent from the complexity 464 | of the attack, it is negligible. 465 | 466 | On the other hand, the keystream generation took, respectively, 106s and 92s 467 | to perform each 2^23 (similar) operations. 468 | 469 | Therefore: 470 | * The full run should take less than 15.08h in the worst case. In fact 471 | since the measurement is polluted by the creation/destruction of child 472 | processes, 13.08h is a much better approximation and generally speaking you 473 | may consider the lowest measure, unless you intend to have your cores 474 | parasites with other loads. 475 | * On average a key should break within half that time thus 6.5h. 476 | 477 | #### Q: The memory requirements are way above what I have currently, is there any hope? 478 | 479 | Well yes, you could recompile with `-DOPTIM_MEM=OPTIM_MEM_LOW` which was 480 | meant for configurations with 16 GB of RAM. 481 | 482 | #### Q: Using all the cores is too much load on my laptop, how I can use a subset of the available cores? 483 | 484 | Use the `-c` option to specify the maximum amount of cores that you want 485 | to use. Running the program without that option is obviously equivalent 486 | to ``-c`nproc` ``. 487 | 488 | 489 | ## Authors & contributions 490 | 491 | 492 | Roderick ASSELINEAU (main author), Erik-Oliver BLASS (cuckoo backend) 493 | 494 | The authors would like to thank: 495 | * Gaëtan LEURENT for gently taking the time to answer to our questions; 496 | * Luc ROUDE and Alexandre GAZET for helping with tests and suggestions; 497 | * Guillaume SYLVAND for sharing a bit of his HP computing knowledge; 498 | * The Airbus' VCE/VCX teams (#rollercoaster). 499 | 500 | Pull requests are welcome. For major changes, please open an issue first 501 | to discuss what you would like to change. 502 | 503 | Please make sure to update tests as appropriate. 504 | -------------------------------------------------------------------------------- /source/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -Wall -Wextra -O9 -fomit-frame-pointer -I. -fanalyzer $(EXT) 2 | 3 | OBJS = main.o gea1.o sort_cuckoo.o sched.o test.o print.o cpu.o args.o bench.o sort.o linear_alg.o stage1.o stage2.o stage3.o data.o 4 | 5 | PROG = gea1_break 6 | 7 | all: $(PROG) 8 | $(PROG): $(OBJS) 9 | $(CC) $(OBJS) -o $(PROG) -lm4ri 10 | 11 | clean: 12 | rm -f $(PROG) $(OBJS) 13 | -------------------------------------------------------------------------------- /source/args.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "exploit.h" 7 | 8 | #if OPTIM_LOOKUP == OPTIM_LKUP_CUCKOO 9 | #define BACKEND_STR "cuckoo" 10 | #else 11 | #define BACKEND_STR "bsearch" 12 | #endif 13 | 14 | #if OPTIM_BATCH 15 | #define SINGLE_BATCH_STR "batch" 16 | #else 17 | #define SINGLE_BATCH_STR "single" 18 | #endif 19 | 20 | #if OPTIM_MEM == OPTIM_MEM_HIGH 21 | #define MEM_STR "high" 22 | #else 23 | #define MEM_STR "low" 24 | #endif 25 | 26 | #if NR_BITS_UB == 32 27 | #define DEMO_STR "" 28 | #else 29 | #define DEMO_STR "/demo" 30 | #endif 31 | 32 | const char *argp_program_version = "GEA1_break v0.3 - "BACKEND_STR"/"SINGLE_BATCH_STR"/"MEM_STR""DEMO_STR; 33 | 34 | const char *argp_program_bug_address = "roderick.asselineau@airbus.com"; 35 | static char doc[] = "\nImplementation of the attack described in https://eprint.iacr.org/2021/819.pdf to recover GEA-1 keys."; 36 | static char args_doc[] = ""; 37 | 38 | // Adds verbosity 39 | 40 | static struct argp_option options[] = { 41 | { "tests", 't', 0, 0, "Run in test mode", 0}, 42 | { "bench", 'b', 0, 0, "Run the benchmarks mode", 0}, 43 | { "precomputation", 'p', 0, 0, "Run the precomputation sage (stage #1)", 0}, 44 | { "bruteforce", 'x', 0, 0, "Run the key recovery stage (stage #2)", 0}, 45 | { "reverse", 'r', 0, 0, "Return the key based on the IV and dir_flag (stage #3)", 0}, 46 | { "dir", 'd', "dir", 0, "The directoring storing the results of the precomputation", 0}, 47 | #if OPTIM_BATCH 48 | { "batch", 'k', "k1(hex):b1len[,k2(hex):b2len][,...]", 0, "The keystreams", 0}, 49 | #else 50 | { "keystream", 'k', "keystream (hex)", 0, "The keystream", 0}, 51 | { "length", 'l', "keystream length (bits)", 0, "The keystream length (must be >= 56 && <= 64)", 0}, 52 | #endif 53 | { "all", 'a', 0, 0, "prevent an early exit in stage #2", 0}, 54 | { "state", 's', "recovered_state (hex)", 0, "The S recovered in stage #2", 0}, 55 | { "iv", 'i', "iv (hex)", 0, "The IV", 0}, 56 | { "flag", 'f', "dir_flag {0,1}", 0, "The direction flag", 0}, 57 | { "core", 'c', "nr_cores", 0, "The number of cores to use (default is maximum available)", 0}, 58 | { "verbose", 'v', 0, 0, "Increase the verbosity level (default: 0)", 0}, 59 | { 0 } 60 | }; 61 | 62 | // https://helloacm.com/c-coding-exercise-number-of-1-bits-revisited/ 63 | int hamming_weight(uint64_t n) 64 | { 65 | int r = n & 1; 66 | while(n >>= 1) 67 | r += (n & 1); 68 | return r; 69 | } 70 | 71 | int handle_cpu(long *nr_cpu) 72 | { 73 | long nr_available_cores = cpu_get_nr_cores(); 74 | 75 | if(nr_available_cores < 1 || *nr_cpu < 1) 76 | return -1; 77 | 78 | // If we requested more CPU than available. 79 | if(*nr_cpu > nr_available_cores) { 80 | printf("[!] Requested: %ld / %ld cores, providing: %ld\n", *nr_cpu, nr_available_cores, nr_available_cores); 81 | *nr_cpu = nr_available_cores; 82 | } 83 | 84 | return 0; 85 | } 86 | 87 | #if OPTIM_BATCH 88 | // Ok this is a very simple/lazy parser but users ought to know what they 89 | // do anyway... :) 90 | int handle_batch(char *arg, struct arguments *arguments) 91 | { 92 | char *chunk = strtok(arg, ":,"); 93 | int i=0; 94 | while(chunk) { 95 | 96 | // Overflow? ;> 97 | if((i/2) >= NR_KS_MAX) { 98 | printf("[!] Too many keystreams specified, keeping the %d first.\n", NR_KS_MAX); 99 | break; 100 | } 101 | 102 | if(!(i%2)) { 103 | arguments->target.keystream[i/2].bitvector = strtoull(chunk, NULL, 16); // 64 bits value 104 | if(!arguments->target.keystream[i/2].bitvector) 105 | return -1; 106 | arguments->target.keystream[i/2].bitlength = 64; 107 | arguments->target.keystream[i/2].bitmask = 0xffffffffffffffff; 108 | arguments->target.keystream[i/2].solved = 0; 109 | arguments->target.nr_targets += 1; 110 | } else { 111 | arguments->target.keystream[(i-1)/2].bitlength = strtoul(chunk, NULL, 10); 112 | if(arguments->target.keystream[(i-1)/2].bitlength > 64 || arguments->target.keystream[(i-1)/2].bitlength < 56) 113 | return -2; 114 | if(arguments->target.keystream[(i-1)/2].bitlength == 64) 115 | arguments->target.keystream[(i-1)/2].bitmask = 0xffffffffffffffff; 116 | else 117 | arguments->target.keystream[(i-1)/2].bitmask = ((1UL<<(arguments->target.keystream[(i-1)/2].bitlength))-1); 118 | } 119 | chunk = strtok(NULL, ":,"); 120 | i++; 121 | } 122 | 123 | return 0; 124 | } 125 | #endif 126 | 127 | static error_t parse_opt(int key, char *arg, struct argp_state *state) 128 | { 129 | struct arguments *arguments = state->input; 130 | long nr_cpu; 131 | 132 | switch (key) { 133 | case 't': 134 | arguments->mode = MODE_TEST; 135 | break; 136 | case 'b': 137 | arguments->mode = MODE_BENCH; 138 | break; 139 | case 'p': 140 | arguments->mode = MODE_PRECOMPUTATION; 141 | break; 142 | case 'x': 143 | arguments->mode = MODE_BRUTEFORCE; 144 | break; 145 | case 'a': 146 | arguments->all = 1; 147 | break; 148 | case 'r': 149 | arguments->mode = MODE_REVERSE; 150 | break; 151 | case 'd': 152 | arguments->dir_name = strdup(arg); 153 | break; 154 | case 's': 155 | arguments->S = strtoull(arg, NULL, 16); // 64 bits value 156 | arguments->S_is_set = 1; 157 | break; 158 | case 'i': 159 | arguments->IV = strtoul(arg, NULL, 16); // 32 bits value 160 | arguments->IV_is_set = 1; 161 | break; 162 | #if OPTIM_BATCH 163 | case 'k': 164 | arguments->batch = strdup(arg); 165 | if(handle_batch(arguments->batch, arguments) < 0) { 166 | free(arguments->batch); 167 | return ARGP_ERR_UNKNOWN; 168 | } 169 | free(arguments->batch); 170 | arguments->batch = NULL; 171 | break; 172 | #else 173 | // Without the batch mode, we only care about a single keystream 174 | case 'k': 175 | // Currently we do not accept 0 bitvector because errno is not 176 | // set properly to EINVAL even when the results of the conversion 177 | // is messed up therefore we have no way to distinguish errors 178 | // properly. 179 | arguments->target.keystream[0].bitvector = strtoull(arg, NULL, 16); // 64 bits value 180 | if(!arguments->target.keystream[0].bitvector) 181 | return ARGP_ERR_UNKNOWN; 182 | arguments->target.keystream[0].solved = 0; // only interesting in batch mode. 183 | arguments->target.nr_targets += 1; 184 | break; 185 | case 'l': 186 | arguments->target.keystream[0].bitlength = strtoul(arg, NULL, 10); 187 | if(arguments->target.keystream[0].bitlength >= 64 || arguments->target.keystream[0].bitlength < 56) 188 | arguments->target.keystream[0].bitmask = 0xFFFFFFFFFFFFFFFF; 189 | else 190 | arguments->target.keystream[0].bitmask = ((1UL<<(arguments->target.keystream[0].bitlength))-1); 191 | break; 192 | #endif 193 | case 'f': 194 | arguments->flag = (strtol(arg, NULL, 10) == 1); // 1 bit value 195 | arguments->flag_is_set = 1; 196 | break; 197 | case 'c': 198 | nr_cpu = strtol(arg, NULL, 10); 199 | if(handle_cpu(&nr_cpu) < 0) 200 | return ARGP_ERR_UNKNOWN; 201 | arguments->nr_cores = nr_cpu; 202 | break; 203 | case 'v': 204 | arguments->verbosity += 1; 205 | break; 206 | case ARGP_KEY_ARG: 207 | return 0; 208 | default: 209 | return ARGP_ERR_UNKNOWN; 210 | } 211 | return 0; 212 | } 213 | 214 | struct argp argp = { options, parse_opt, args_doc, doc, 0, 0, 0 }; 215 | -------------------------------------------------------------------------------- /source/bench.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "exploit.h" 10 | #include "file.h" 11 | #include "timing.h" 12 | #include "gea1.h" 13 | 14 | // from main.c 15 | extern int verbosity; 16 | 17 | // LFSR_B 18 | 19 | void bench_LFSR_B() 20 | { 21 | uint64_t c1,c2; 22 | CLOCK_VARS(1); 23 | volatile uint64_t bitstream; 24 | uint32_t i, nr_calls = 1000; 25 | 26 | bitstream = 0; 27 | CLOCK_START(0); 28 | c1 = __rdtsc(); 29 | for(i=0; i lfsr_galois_B(bitstream:%d bits): [time = %.2f us, cycles=%ld]\n", 64, US(CLOCK_GET(0))/nr_calls, (c2-c1)/nr_calls); 35 | } 36 | 37 | // RegisterB 38 | 39 | void __bench_RegisterB(int nr_bits) 40 | { 41 | uint64_t c1,c2; 42 | CLOCK_VARS(1); 43 | volatile uint64_t bitstream; 44 | uint32_t i, nr_calls=1000; 45 | 46 | bitstream = 0; 47 | CLOCK_START(0); 48 | c1 = __rdtsc(); 49 | for(i=0; i RegisterB:\n"); 62 | for(i=20; i RegisterB2:\n"); 94 | for(i=20; i RegisterA:\n"); 126 | for(i=20; i RegisterA2:\n"); 156 | for(i=20; i RegisterC:\n"); 188 | for(i=20; i RegisterC2:\n"); 217 | for(i=20; i Seq Search on average over %d (unsorted) elements: [time=%.4f ms, cycles=%ld]\n", nr_elements, MS(CLOCK_GET(0))/nr_calls, (c2-c1)/nr_calls); 250 | 251 | if(verbosity) 252 | print_uint64_array_as_hex(p, 3); 253 | 254 | CLOCK_START(0); 255 | c1 = __rdtsc(); 256 | radix_sort(p, nr_elements); 257 | c2 = __rdtsc(); 258 | CLOCK_STOP(0); 259 | printf("\t-> Radix Sort over %d (unsorted) elements took: [time=%.4f ms, cycles=%ld]\n", nr_elements, MS(CLOCK_GET(0))/nr_calls, (c2-c1)); 260 | 261 | if(verbosity) 262 | print_uint64_array_as_hex(p, 3); 263 | 264 | CLOCK_START(0); 265 | c1 = __rdtsc(); 266 | for(i=0; i Seq Search on average over %d (sorted) elements: [time=%.4f ms, cycles=%ld]\n", nr_elements, MS(CLOCK_GET(0))/nr_calls, (c2-c1)/nr_calls); 272 | 273 | CLOCK_START(0); 274 | c1 = __rdtsc(); 275 | for(i=0; i Bin Search on average over %d (sorted) elements: [time=%.4f ms, cycles=%ld]\n", nr_elements, MS(CLOCK_GET(0))/nr_calls, (c2-c1)/nr_calls); 281 | } 282 | 283 | void __bench_sort2_cuckoo(char *directory) 284 | { 285 | char fname[512]; 286 | uint64_t nr_success = 0; 287 | uint64_t nr_failures = 0; 288 | uint64_t nr_collisions = 0; 289 | uint64_t nr_insert_errors = 0; 290 | uint32_t key, value; 291 | int64_t sz, *p = NULL; 292 | CLOCK_VARS(1); 293 | uint64_t nr_elements = (1UL<<32); 294 | uint64_t j; 295 | BUCKET *ht = NULL; 296 | int i, ret; 297 | 298 | if(!is_directory_created(directory)) { 299 | printf("[-] Wrong directory, cannot run bench_sort2()\n"); 300 | return; 301 | } 302 | 303 | i = random() % 256; 304 | 305 | printf("\t-> Cuckoo benchmarking (can take a bit of time)\n"); 306 | 307 | memset(fname, 0, sizeof(fname)); 308 | snprintf(fname, sizeof(fname)-1, "%s/"SORTED_TABLE_FMT, directory, i); 309 | sz = get_file_size(fname); 310 | if(sz < 0) { 311 | return; 312 | } 313 | 314 | assert(sz == 8*N); 315 | 316 | p = malloc(sz); 317 | assert(p); 318 | 319 | ret = read_file(fname, (uint8_t *)p, sz); 320 | if(ret) { 321 | printf("[-] Error, could not read %lu bytes in %s\n", sz, fname); 322 | free(p); 323 | return; 324 | } 325 | 326 | // Step1. Prepare the table 327 | cuckoo_setup_ht(&ht); 328 | for(int j=0; j> 32); 331 | value = (uint32_t)(p[j]); 332 | 333 | if(!key) { 334 | if(verbosity > 1) 335 | printf("[!] Skipping 0 key!\n"); 336 | continue; 337 | } 338 | 339 | ret = cuckoo_put(key, value, ht); 340 | if(ret == -1) { 341 | nr_insert_errors++; 342 | } 343 | 344 | if(ret == -2) { 345 | nr_collisions++; 346 | } 347 | 348 | } 349 | free(p); 350 | printf("\t\t+ cuckoo_put(): Inserted %lu/%d elements of table %d\n", N-nr_collisions-nr_insert_errors, N, i); 351 | printf("\t\t Collisions: %lu [%.2f%%]\n", nr_collisions, (double)nr_collisions*100.0/N); 352 | 353 | // Step2. Searching elements. 354 | CLOCK_START(0); 355 | for (j=0; j < nr_elements; j++) { 356 | ret = cuckoo_lookup(j, ht); 357 | if (ret == -1) { 358 | nr_failures += 1; 359 | } else { 360 | nr_success += 1; 361 | } 362 | } 363 | CLOCK_STOP(0); 364 | 365 | printf("\t\t+ cuckoo_lookup(): Searching for %lu elements in table %d took: %.2f ms, i.e., %.2f ns per element.\n", nr_elements, 366 | i, 367 | MS(CLOCK_GET(0)), 368 | NS(CLOCK_GET(0)) / ((double)(nr_elements))); 369 | printf("\t\t Success: %lu, Failure: %lu\n", nr_success, nr_failures); 370 | 371 | cuckoo_free_ht(&ht); 372 | } 373 | 374 | extern uint32_t *sorted_Tab[NR_V_ELEMENTS_MAX]; 375 | extern uint64_t TabIndex[NR_V_ELEMENTS_MAX][(1< b-search benchmarking (can take a bit of time)\n"); 397 | 398 | // Step1. Prepare the table 399 | alloc_all_sorted_Tab(); 400 | load_sorted_Tab(directory, 16); 401 | i = random() % 16; 402 | 403 | // Step1. Searching elements. 404 | // Since nr_elements is 1<<28, we should have nr_success close to 1<<20, 405 | // which is true practically speaking. 406 | CLOCK_START(0); 407 | for (j=0; j < (nr_elements); j++) { 408 | index_qword = TabIndex[i][(j >> (32-NR_BITS_IDX)) & MASK_IDX]; 409 | idx1 = (index_qword>>32); 410 | idx2 = (index_qword&0xFFFFFFFF); 411 | int ret = b_search32(j, &sorted_Tab[i][idx1], idx2-idx1+1); 412 | if (ret == -1) { 413 | nr_failures += 1; 414 | } else { 415 | nr_success += 1; 416 | } 417 | } 418 | CLOCK_STOP(0); 419 | 420 | printf("\t\t+ b_search32(): Searching for %lu elements in table %d took: %.2f ms, i.e., %.2f ns per element.\n", nr_elements, 421 | i, 422 | MS(CLOCK_GET(0)), 423 | NS(CLOCK_GET(0)) / ((double)(nr_elements))); 424 | printf("\t\t Success: %lu, Failure: %lu\n", nr_success, nr_failures); 425 | 426 | free(p); 427 | free_all_sorted_Tab(); 428 | } 429 | 430 | void bench_sort2(char *directory) 431 | { 432 | __bench_sort2_bsearch(directory); 433 | __bench_sort2_cuckoo(directory); 434 | } 435 | -------------------------------------------------------------------------------- /source/bitops.h: -------------------------------------------------------------------------------- 1 | #ifndef __BITOPS_H__ 2 | #define __BITOPS_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // Macros 9 | #define GET_BIT(x, i) (((x)>>(i))&1) 10 | #define SET_BIT(x, i, b) ((x) |= ((b)<<(i))) 11 | 12 | // Inlined routines 13 | 14 | #if 0 15 | static __inline__ 16 | uint64_t invert_bits_qword(uint64_t a) 17 | { 18 | uint64_t b = 0; 19 | uint64_t x; 20 | int i; 21 | 22 | for(i=0; i<64; i++) { 23 | x = GET_BIT(a, i); 24 | b |= (x<<(63-i)); 25 | } 26 | return b; 27 | } 28 | 29 | static __inline__ 30 | uint64_t invert_bits_dword(uint32_t a) 31 | { 32 | uint64_t b = 0; 33 | uint32_t x; 34 | int i; 35 | 36 | for(i=0; i<32; i++) { 37 | x = GET_BIT(a, i); 38 | b |= (x<<(31-i)); 39 | } 40 | return b; 41 | } 42 | #endif 43 | 44 | static __inline__ 45 | uint64_t rotate_left(uint64_t val, int n, int shift) 46 | { 47 | return ((val >> (n-shift)) | ((val << shift) & ((1UL<> shift)); 54 | } 55 | 56 | #endif /* __BITOPS_H__ */ 57 | -------------------------------------------------------------------------------- /source/cpu.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | 6 | #include "exploit.h" 7 | 8 | void cpu_get_work(uint64_t *lower_bound, uint64_t *upper_bound, uint32_t cpu_id, uint32_t round_idx, uint32_t nr_cores, uint32_t nr_rounds, uint64_t nr_elements) 9 | { 10 | uint64_t lower_bound_0, upper_bound_0; 11 | uint32_t delta; 12 | 13 | lower_bound_0 = (cpu_id+0) * (uint32_t)(nr_elements / nr_cores); 14 | if((cpu_id+1) == nr_cores) 15 | upper_bound_0 = nr_elements - 1; 16 | else 17 | upper_bound_0 = (cpu_id+1) * (uint32_t)(nr_elements / nr_cores) - 1; 18 | 19 | delta = (uint32_t)((upper_bound_0-lower_bound_0+1) / nr_rounds); 20 | 21 | *lower_bound = lower_bound_0 + (round_idx)*(delta); 22 | if((round_idx+1) == nr_rounds) 23 | *upper_bound = upper_bound_0; 24 | else 25 | *upper_bound = lower_bound_0 + (round_idx+1)*(delta) - 1; 26 | } 27 | 28 | int cpu_get_nr_cores() 29 | { 30 | return sysconf(_SC_NPROCESSORS_ONLN); 31 | } 32 | 33 | int cpu_bind(int cpu_id) 34 | { 35 | cpu_set_t set; 36 | int ret; 37 | 38 | CPU_ZERO(&set); 39 | CPU_SET(cpu_id, &set); 40 | ret = sched_setaffinity(0, sizeof(set), &set); 41 | return ret; 42 | } 43 | 44 | -------------------------------------------------------------------------------- /source/cuckoo.h: -------------------------------------------------------------------------------- 1 | #ifndef __CUCKOO_H__ 2 | #define __CUCKOO_H__ 3 | 4 | #include "exploit.h" 5 | 6 | /* Macros */ 7 | 8 | #ifndef NR_BITS_TAC 9 | #define NR_BITS_TAC 24 10 | #endif 11 | 12 | #define DOMAINSIZE 32 13 | #define LOGN NR_BITS_TAC 14 | #define N NR_TAC_ELEMENTS 15 | #define NR_BUCKETS (21307065) // aka ((size_t)ceil(1.27*N)) 16 | 17 | #define DIFF (DOMAINSIZE-LOGN) 18 | #define MAXDISTANCE (0x5555AAAA) 19 | 20 | #define H1(x) ((x) % NR_BUCKETS) 21 | #define H2(x) (((x)>>DIFF) % NR_BUCKETS) 22 | #define H3(x) (H1(((x) ^ MAXDISTANCE))) 23 | 24 | /* Types */ 25 | 26 | typedef struct _tt { 27 | uint32_t key; // This will be used to store 32 bits of keystream. 28 | uint32_t value; // This will be used to store a file offset. 29 | } BUCKET; 30 | 31 | // Note: int works as a return type because the value is an index which 32 | // can never be bigger than 2^24. 33 | static __inline__ 34 | int cuckoo_lookup(uint32_t key, BUCKET *ht) 35 | { 36 | uint32_t index = H1(key); 37 | 38 | if (ht[index].key == key) { 39 | return ht[index].value; 40 | } 41 | 42 | index = H2(key); 43 | 44 | if (ht[index].key == key) { 45 | return ht[index].value; 46 | } 47 | 48 | index = H3(key); 49 | 50 | if (ht[index].key == key) { 51 | return ht[index].value; 52 | } 53 | 54 | return -1; 55 | } 56 | 57 | #endif /* __CUCKOO_H__ */ 58 | -------------------------------------------------------------------------------- /source/data.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "exploit.h" 10 | #include "file.h" 11 | 12 | // from main.c 13 | extern int verbosity; 14 | 15 | // Exported globals 16 | uint32_t *unsorted_Tab[NR_V_ELEMENTS_MAX]; 17 | uint32_t *sorted_Tab[NR_V_ELEMENTS_MAX]; 18 | uint64_t TabIndex[NR_V_ELEMENTS_MAX][(1< 1) 47 | printf("\t-> Allocated: %.2f Gb\n", (float)(size_allocated*NR_V_ELEMENTS)/(1<<30)); 48 | return 0; 49 | } 50 | 51 | void free_unsorted_Tab(int lower_bound, int upper_bound) 52 | { 53 | uint64_t size_allocated = 0; 54 | int i; 55 | 56 | size_allocated = 2 * sizeof(uint32_t) * (upper_bound - lower_bound + 1); 57 | for(i=0; i Freed: %.2f Gb\n", (float)(size_allocated*NR_V_ELEMENTS)/(1<<30)); 63 | } 64 | 65 | int save_unsorted_Tab(int lower_bound, int upper_bound, char *dirname) 66 | { 67 | char fname[512]; 68 | int i, fd, ret; 69 | uint32_t nr_bytes_remaining; 70 | int offset; 71 | 72 | for(i=0; i 1) 127 | printf("\t-> Allocated: %.2f Gb\n", (float)(size_allocated*NR_V_ELEMENTS)/(1<<30)); 128 | 129 | return 0; 130 | } 131 | 132 | void free_all_sorted_Tab() 133 | { 134 | uint64_t size_allocated = 0; 135 | int i; 136 | 137 | size_allocated = sizeof(uint32_t) * NR_TAC_ELEMENTS; 138 | for(i=0; i Freed: %.2f Gb\n", (float)(size_allocated*NR_V_ELEMENTS)/(1<<30)); 145 | 146 | return; 147 | } 148 | 149 | int load_sorted_Tab(char *dirname, int nr_tables) 150 | { 151 | char fname[512]; 152 | int i, ret; 153 | uint32_t *p; 154 | uint64_t *q, sz = 2 * sizeof(uint32_t) * NR_TAC_ELEMENTS; 155 | 156 | ASSERT(nr_tables <= NR_V_ELEMENTS_MAX); 157 | 158 | char *tmp = malloc(sz); 159 | if(!tmp) { 160 | printf("[-] load_sorted_Tab(): Could not allocate %ld bytes [errno:%d]!\n", sz, errno); 161 | return -1; 162 | } 163 | 164 | for(i=0; i> 32); 179 | } 180 | 181 | memset(fname, 0, sizeof(fname)); 182 | snprintf(fname, sizeof(fname)-1, "%s/"SORTED_INDEX_FMT, dirname, i); 183 | ret = read_file(fname, (uint8_t *)TabIndex[i], (1< 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cuckoo.h" 13 | 14 | /* Macros */ 15 | 16 | // MISC 17 | 18 | #define SORTED_TABLE_FMT "sorted_%.3d.tbl" 19 | #define SORTED_INDEX_FMT "sorted_%.3d.idx" 20 | #define SORTED_CUCKOO_FMT "sorted_%.3d.cuckoo" 21 | #define UNSORTED_TABLE_FMT "unsorted_%.3d_%.10d_%.10d.tbl" 22 | 23 | #define OPTIM_LKUP_DEFAULT 0 // Slow bsearch 24 | #define OPTIM_LKUP_CUCKOO 1 25 | #define OPTIM_LKUP_BSEARCH 2 // Fast bsearch 26 | 27 | #define OPTIM_MEM_LOW 0 // At least 16 Gb of RAM 28 | #define OPTIM_MEM_HIGH 1 // At least 64 Gb of RAM // Default 29 | 30 | #ifndef __PAGE_SZ 31 | #define __PAGE_SZ 4096 32 | #endif 33 | #define ROUND_PAGESZ(x) ( ((x) & (__PAGE_SZ-1)) == 0 ? ((x)/__PAGE_SZ) : (((x) + __PAGE_SZ - (x & (__PAGE_SZ-1))) / __PAGE_SZ) ) 34 | 35 | // optimizations 36 | 37 | #define likely(x) __builtin_expect(!!(x), 1) 38 | #define unlikely(x) __builtin_expect(!!(x), 0) 39 | 40 | #ifndef OPTIM_MEM 41 | #define OPTIM_MEM OPTIM_MEM_HIGH 42 | #endif 43 | 44 | #ifndef OPTIM_BATCH 45 | #define OPTIM_BATCH 0 46 | #endif 47 | 48 | #ifndef OPTIM_SKIP_COLLISIONS 49 | #define OPTIM_SKIP_COLLISIONS 1 50 | #endif 51 | 52 | #ifndef OPTIM_SCHED 53 | #define OPTIM_SCHED 1 // Needs a small patch before. 54 | #endif 55 | 56 | #ifndef OPTIM_LOOKUP 57 | #define OPTIM_LOOKUP OPTIM_LKUP_CUCKOO 58 | //~ #define OPTIM_LOOKUP OPTIM_LKUP_BSEARCH 59 | #endif 60 | 61 | #ifndef OPTIM_LIN_ALG 62 | #define OPTIM_LIN_ALG 1 63 | #endif 64 | 65 | // OPTIM_LKUP_BSEARCH 66 | 67 | #define NR_BITS_IDX 8 68 | #define MASK_IDX ((1< 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | static __inline__ 13 | int get_self_absolute_path(char *buffer, size_t bufsiz) 14 | { 15 | ssize_t ret = readlink("/proc/self/exe", buffer, bufsiz); 16 | if(ret < 0 || (size_t)ret == bufsiz) { 17 | printf("[-] readlink() was not provided a big enough buffer!\n"); 18 | return -1; 19 | } 20 | 21 | return 0; 22 | } 23 | 24 | static __inline__ 25 | int is_directory_created(char *dirname) 26 | { 27 | DIR *d; 28 | 29 | d = opendir(dirname); 30 | if (!d) { 31 | return 0; 32 | } 33 | closedir(d); 34 | return 1; 35 | } 36 | 37 | // Good enough for now. 38 | static __inline__ 39 | int create_directory(char *dir_name) 40 | { 41 | struct stat st = {0}; 42 | int ret; 43 | 44 | ret = stat(dir_name, &st); 45 | if (!ret) { 46 | printf("[!] Directory already existing, overwriting its content...\n"); 47 | return 0; 48 | } 49 | 50 | ret = mkdir(dir_name, 0700); 51 | if(ret < 0) { 52 | printf("[-] mkdir() failed! [errno:%d]\n", errno); 53 | return -1; 54 | } 55 | 56 | return 0; 57 | } 58 | 59 | static __inline__ 60 | int64_t get_file_size(char *fname) 61 | { 62 | struct stat st; 63 | int ret; 64 | 65 | memset(&st, 0, sizeof(st)); 66 | ret = lstat(fname, &st); 67 | if(ret < 0) { 68 | printf("[-] Error, lstat() failed [err:%d]\n", errno); 69 | return -1; 70 | } 71 | return st.st_size; 72 | } 73 | 74 | static __inline__ 75 | int read_file(char *fname, uint8_t *buffer, uint64_t buffer_size) 76 | { 77 | uint64_t nr_bytes_remaining, offset; 78 | int fd, ret; 79 | 80 | fd = open(fname, O_RDONLY); 81 | if(fd < 0) { 82 | printf("[-] Error open(%s) failed: [errno:%d]\n", fname, errno); 83 | return -1; 84 | } 85 | 86 | // Depending on the disk or other factors, read() "may" return earlier. 87 | nr_bytes_remaining = buffer_size; 88 | offset = 0; 89 | while(nr_bytes_remaining) { 90 | ret = read(fd, &buffer[offset], nr_bytes_remaining); 91 | if(ret < 0) { 92 | printf("[-] Error read(%lu) failed: [errno:%d]\n", nr_bytes_remaining, errno); 93 | close(fd); 94 | return -2; 95 | } 96 | nr_bytes_remaining -= ret; 97 | offset += ret; 98 | } 99 | close(fd); 100 | return 0; 101 | } 102 | 103 | static __inline__ 104 | int write_file(char *fname, uint8_t *buffer, uint32_t buffer_size) 105 | { 106 | uint32_t nr_bytes_remaining, offset; 107 | int fd, ret; 108 | 109 | fd = open(fname, O_CREAT | O_RDWR, S_IRUSR|S_IWUSR|S_IRGRP); 110 | if(fd < 0) { 111 | printf("[-] Error open() failed to create %s file [errno:%d]\n", fname, errno); 112 | return -1; 113 | } 114 | 115 | // Depending on the disk or other factors, write() "may" return earlier. 116 | nr_bytes_remaining = buffer_size; 117 | offset = 0; 118 | while(nr_bytes_remaining) { 119 | ret = write(fd, &buffer[offset], nr_bytes_remaining); 120 | if(ret < 0) { 121 | printf("[-] Error write(%d) failed [errno:%d]\n", nr_bytes_remaining, errno); 122 | close(fd); 123 | return -2; 124 | } 125 | nr_bytes_remaining -= ret; 126 | offset += ret; 127 | } 128 | close(fd); 129 | return 0; 130 | } 131 | 132 | static __inline__ 133 | char *create_tmp_file() 134 | { 135 | uint8_t buffer[4096]; 136 | char name[512]; 137 | int ret; 138 | 139 | memset(name, 0, sizeof(name)); 140 | memset(buffer, 0, sizeof(buffer)); 141 | 142 | snprintf(name, sizeof(name)-1, "/tmp/gea1_shmem_%d_%d", getpid(), rand()); 143 | ret = write_file(name, buffer, sizeof(buffer)); 144 | if(ret < 0) 145 | return NULL; 146 | return strdup(name); 147 | } 148 | #endif /* __FILE_H__ */ 149 | -------------------------------------------------------------------------------- /source/gea1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "exploit.h" 7 | #include "bitops.h" 8 | #include "gea1.h" 9 | #include "transform.h" 10 | 11 | uint8_t f_table[128]; 12 | 13 | // Unfortunately the penalty will be the bit extraction time. 14 | void setup_f_table(void) 15 | { 16 | int i, res; 17 | uint32_t x0, x1, x2, x3, x4, x5, x6; 18 | for(i=0; i<128; i++) { 19 | x0 = GET_BIT(i,0); 20 | x1 = GET_BIT(i,1); 21 | x2 = GET_BIT(i,2); 22 | x3 = GET_BIT(i,3); 23 | x4 = GET_BIT(i,4); 24 | x5 = GET_BIT(i,5); 25 | x6 = GET_BIT(i,6); 26 | res = f(x0, x1, x2, x3, x4, x5, x6); 27 | f_table[i] = res; 28 | } 29 | } 30 | 31 | uint32_t apply_f_2_S_fwd(uint8_t *S) 32 | { 33 | return f(S[3], S[12], S[22], S[38], S[42], S[55], S[63]); 34 | } 35 | 36 | uint32_t apply_f_2_S_bwd(uint8_t *S) 37 | { 38 | return f(S[2], S[11], S[21], S[37], S[41], S[54], S[62]); 39 | } 40 | 41 | void clock_S_backward(uint8_t *S_out, uint8_t *S_in, uint8_t *F, int F_len) 42 | { 43 | int i=0, j=0; 44 | uint32_t old_fb, old_b0; 45 | 46 | ASSERT(F_len > 0); 47 | 48 | memcpy(S_out, S_in, 64); 49 | for(i=0; i 0); 66 | 67 | memcpy(S_out, S_in, 64); 68 | for(i=0; i>= 1; 111 | lfsr &= ((1UL << REG_A_SIZE)-1); 112 | if (lsb) { 113 | res |= (1UL<>= 1; 132 | lfsr &= ((1UL << REG_B_SIZE)-1); 133 | if (lsb) { 134 | res |= (1UL<>= 1; 153 | lfsr &= ((1UL << REG_C_SIZE)-1); 154 | 155 | if (lsb) { 156 | res |= (1UL<>= 1; 185 | if (lsb) { 186 | lfsr ^= (0x5dd89b8d); // aka 0x58ec8ddd reversed 187 | } 188 | } 189 | return res; 190 | } 191 | 192 | // TODO: nr_bits vs 32 bits 193 | uint64_t RegisterB(uint32_t state, int nr_bits) 194 | { 195 | uint32_t lfsr = state; 196 | uint64_t res = 0; 197 | uint64_t b; 198 | int lsb, period = 0; 199 | 200 | ASSERT(nr_bits <= 64); 201 | 202 | for(period=0; period>= 1; 215 | if (lsb) { 216 | lfsr ^= (0xf1c0f045); // aka a20f038f reversed 217 | } 218 | } 219 | return res; 220 | } 221 | 222 | uint64_t RegisterC(uint64_t state, int nr_bits) 223 | { 224 | uint64_t lfsr = state; 225 | uint64_t res = 0; 226 | uint64_t b; 227 | int lsb, period = 0; 228 | 229 | ASSERT(nr_bits <= 64); 230 | 231 | for(period=0; period>= 1; 244 | if (lsb) { 245 | lfsr ^= (0x150e6fa24UL); // aka 0x48bece15 reversed 246 | } 247 | } 248 | return res; 249 | } 250 | 251 | uint64_t init_A(uint64_t S) 252 | { 253 | uint64_t lfsr = 0; 254 | uint64_t m0, m1, lsb; 255 | uint64_t state; 256 | int period = 0; 257 | 258 | state = rotate_right(S, 64, SHIFT_A); 259 | for(period=0; period<64; period++) { 260 | 261 | m0 = lfsr & 1; 262 | m1 = state & 1; 263 | lsb = m0 ^ m1; 264 | lfsr >>= 1; 265 | lfsr &= ((1UL << REG_A_SIZE)-1); 266 | if (lsb) { 267 | lfsr ^= (0x5dd89b8d); // aka 0x58ec8ddd reversed 268 | } 269 | state = rotate_right(state, 64, 1); 270 | 271 | } 272 | return lfsr; 273 | } 274 | 275 | uint64_t init_B(uint64_t S) 276 | { 277 | uint64_t lfsr = 0; 278 | uint64_t m0, m1, lsb; 279 | uint64_t state; 280 | int period = 0; 281 | 282 | state = rotate_right(S, 64, SHIFT_B); 283 | for(period=0; period<64; period++) { 284 | m0 = lfsr & 1; 285 | m1 = state & 1; 286 | lsb = m0 ^ m1; 287 | lfsr >>= 1; 288 | lfsr &= ((1UL << REG_B_SIZE)-1); 289 | if (lsb) { 290 | lfsr ^= (0xf1c0f045); // aka 0xa20f038f reversed 291 | } 292 | state = rotate_right(state, 64, 1); 293 | } 294 | return lfsr; 295 | } 296 | 297 | uint64_t init_C(uint64_t S) 298 | { 299 | uint64_t lfsr = 0; 300 | uint64_t m0, m1, lsb; 301 | uint64_t state; 302 | int period = 0; 303 | 304 | state = rotate_right(S, 64, SHIFT_C); 305 | for(period=0; period<64; period++) { 306 | m0 = lfsr & 1; 307 | m1 = state & 1; 308 | lsb = m0 ^ m1; 309 | lfsr >>= 1; 310 | lfsr &= ((1UL << REG_C_SIZE)-1); 311 | if (lsb) { 312 | lfsr ^= (0x150e6fa24UL); // aka 0x48bece15 reversed 313 | } 314 | state = rotate_right(state, 64, 1); 315 | } 316 | return lfsr; 317 | } 318 | 319 | // It is not optimized at all but we do not use it anyway. 320 | // The function is just meant to verify the correctness of the 321 | // implementation. 322 | 323 | #define __GEA1_DBG__ 0 324 | 325 | int GEA1(uint64_t K, uint32_t IV, int dir, uint64_t *bitstream) 326 | { 327 | uint8_t S[64]; 328 | uint8_t K_array[64]; 329 | uint8_t IV_array[32]; 330 | uint64_t qSA, qSB, qSC; 331 | uint64_t qS = 0; 332 | uint64_t LA, LB, LC; 333 | 334 | ASSERT(bitstream); 335 | 336 | transform_dword_2_list(IV, IV_array, 32); 337 | transform_qword_2_list(K, K_array, 64); 338 | init_S(S, K_array, IV_array, dir); 339 | transform_list_2_qword(S, 64, &qS); 340 | 341 | #if __GEA1_DBG__ 342 | printf("K = %lx, IV = %x, dir=%d\n", K, IV, dir); 343 | printf("S = %lx\n", qS); 344 | #endif 345 | 346 | qSA = init_A(qS); 347 | qSB = init_B(qS); 348 | qSC = init_C(qS); 349 | 350 | #if __GEA1_DBG__ 351 | printf("A = %lx\n", qSA); 352 | printf("B = %lx\n", qSB); 353 | printf("C = %lx\n", qSC); 354 | #endif 355 | 356 | if(!qSA) { 357 | qSA |= 1; 358 | } 359 | 360 | if(!qSB) { 361 | qSB |= 1; 362 | } 363 | 364 | if(!qSC) { 365 | qSC |= 1; 366 | } 367 | 368 | LA = RegisterA2(qSA, 64); 369 | LB = RegisterB2(qSB, 64); 370 | LC = RegisterC2(qSC, 64); 371 | 372 | *bitstream = LA ^ LB ^ LC; 373 | #if __GEA1_DBG__ 374 | printf("Bitstream = %lx\n", *bitstream); 375 | #endif 376 | return 0; 377 | } 378 | -------------------------------------------------------------------------------- /source/gea1.h: -------------------------------------------------------------------------------- 1 | #ifndef __GEA1_H__ 2 | #define __GEA1_H__ 3 | 4 | #include "bitops.h" 5 | 6 | // The original function which is slow as hell from an assembly point of 7 | // view. Should absolutely not be used as if during bf stages. 8 | 9 | static __inline__ 10 | uint32_t f(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6) 11 | { 12 | uint32_t r; 13 | 14 | r = x0*x2*x5*x6 + x0*x3*x5*x6 + x0*x1*x5*x6; 15 | r += x1*x2*x5*x6 + x0*x2*x3*x6 + x1*x3*x4*x6; 16 | r += x1*x3*x5*x6 + x0*x2*x4 + x0*x2*x3 + x0*x1*x3; 17 | r += x0*x2*x6 + x0*x1*x4 + x0*x1*x6; 18 | r += x1*x2*x6 + x2*x5*x6 + x0*x3*x5 + x1*x4*x6; 19 | r += x1*x2*x5 + x0*x3 + x0*x5 + x1*x3; 20 | r += x1*x5 + x1*x6 + x0*x2 + x1 + x2*x3 + x2*x5 + x2*x6 + x4*x5 + x5*x6 + x2 + x3 + x5; 21 | return r&1; 22 | } 23 | 24 | // First optimization for f computation. 25 | extern uint8_t f_table[128]; 26 | 27 | static __inline__ 28 | uint64_t RegisterA2(uint64_t state, int nr_bits) 29 | { 30 | uint64_t lfsr = state; 31 | uint64_t res = 0; 32 | uint64_t b; 33 | int lsb, period = 0; 34 | 35 | ASSERT(nr_bits <= 64); 36 | 37 | uint8_t x00; 38 | for(period=0; period>= 1; 53 | if (lsb) { 54 | lfsr ^= (0x5dd89b8d); // aka 0x58ec8ddd reversed 55 | } 56 | } 57 | return res; 58 | } 59 | 60 | // TODO: nr_bits vs 32 bits 61 | static __inline__ 62 | uint64_t RegisterB2(uint32_t state, int nr_bits) 63 | { 64 | uint32_t lfsr = state; 65 | uint64_t res = 0; 66 | uint64_t b; 67 | int lsb, period = 0; 68 | 69 | ASSERT(nr_bits <= 64); 70 | 71 | uint8_t x00; 72 | for(period=0; period>= 1; 87 | if (lsb) { 88 | lfsr ^= (0xf1c0f045); // aka a20f038f reversed 89 | } 90 | } 91 | return res; 92 | } 93 | 94 | static __inline__ 95 | uint64_t RegisterC2(uint64_t state, int nr_bits) 96 | { 97 | uint64_t lfsr = state; 98 | uint64_t res = 0; 99 | uint64_t b; 100 | int lsb, period = 0; 101 | 102 | ASSERT(nr_bits <= 64); 103 | 104 | uint8_t x00; 105 | for(period=0; period>= 1; 120 | if (lsb) { 121 | lfsr ^= (0x150e6fa24UL); // aka 0x48bece15 reversed 122 | } 123 | } 124 | return res; 125 | } 126 | 127 | #endif /* __GEA1_H__ */ 128 | -------------------------------------------------------------------------------- /source/linear_alg.h: -------------------------------------------------------------------------------- 1 | #ifndef __LINEAR_ALG_H__ 2 | #define __LINEAR_ALG_H__ 3 | 4 | /* 5 | * Vector Space elements generation 6 | */ 7 | 8 | // Generate an element based on its base2 representation. 9 | 10 | static __inline__ 11 | uint64_t compute_tac_element(uint32_t elt_idx) 12 | { 13 | uint64_t q_t = 0; 14 | uint32_t i; 15 | 16 | for(i=0; i>i)&1) { 19 | q_t ^= TAC[i]; 20 | } 21 | } 22 | 23 | return q_t; 24 | } 25 | 26 | static __inline__ 27 | uint64_t compute_ub_element(uint32_t elt_idx) 28 | { 29 | uint32_t i; 30 | uint64_t q_u = 0; 31 | 32 | for(i=0; i>i)&1) { 35 | q_u ^= UB[i]; 36 | } 37 | } 38 | 39 | return q_u; 40 | } 41 | 42 | static __inline__ 43 | uint64_t compute_v_element(uint32_t elt_idx) 44 | { 45 | uint32_t i; 46 | uint64_t q_v = 0; 47 | 48 | for(i=0; i>i)&1) 51 | q_v ^= V[i]; 52 | } 53 | 54 | return q_v; 55 | } 56 | 57 | #endif /* __LINEAR_ALG_H__ */ 58 | -------------------------------------------------------------------------------- /source/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "exploit.h" 10 | #include "file.h" 11 | #include "timing.h" 12 | #include "transform.h" 13 | 14 | int verbosity = 0; 15 | extern struct argp argp; 16 | int nr_provided_cores = 0; 17 | 18 | void do_tests() 19 | { 20 | CLOCK_VARS(1); 21 | 22 | printf("[+] Satety tests\n"); 23 | CLOCK_START(0); 24 | test_cpu_load(); 25 | test_hw(); 26 | test_transform_state1(); 27 | test_transform_state2(); 28 | test_init_S(); 29 | test_retrieve_K(); 30 | test_registerB(); 31 | test_registerA(); 32 | test_registerC(); 33 | test_rotate(); 34 | test_initA(); 35 | test_initB(); 36 | test_initC(); 37 | test_GEA1(); 38 | CLOCK_STOP(0); 39 | printf("\t-> OK [%.2fs]\n", CLOCK_GET(0)); 40 | } 41 | 42 | void do_bench(char *dirname) 43 | { 44 | printf("[+] Benchmarking\n"); 45 | 46 | #if OPTIM_SCHED 47 | select_computation_scheduling(); 48 | #endif 49 | bench_LFSR_B(); 50 | bench_RegisterA(); 51 | bench_RegisterA2(); 52 | bench_RegisterB(); 53 | bench_RegisterB2(); 54 | bench_RegisterC(); 55 | bench_RegisterC2(); 56 | #if OPTIM_SCHED 57 | select_io_scheduling(); 58 | #endif 59 | if(!dirname) 60 | bench_sort1(); 61 | else 62 | bench_sort2(dirname); 63 | } 64 | 65 | int do_precomputation(char *dirname) 66 | { 67 | time_t t1, t2; 68 | CLOCK_VARS(1); 69 | struct lp_arg *largs = NULL; 70 | pid_t *pids = NULL; 71 | int wstatus; 72 | int i; 73 | int nr_cores2; 74 | 75 | largs = calloc(nr_provided_cores, sizeof(struct lp_arg)); 76 | pids = calloc(nr_provided_cores, sizeof(pid_t)); 77 | 78 | if(!largs || !pids) { 79 | printf("[-] Running out of memory...\n"); 80 | free(largs); 81 | free(pids); 82 | return -1; 83 | } 84 | 85 | printf("[+] Preparing V, TAC basis\n"); 86 | CLOCK_START(0); 87 | setup_v_basis(); 88 | setup_tac_basis(); 89 | CLOCK_STOP(0); 90 | printf("\t-> OK [%.2f ms]\n", MS(CLOCK_GET(0))); 91 | 92 | printf("[+] Preparing MB matrix\n"); 93 | CLOCK_START(0); 94 | setup_MB_matrix(); 95 | CLOCK_STOP(0); 96 | if(unlikely(verbosity)) 97 | printf("\t-> OK [%.2f ms]\n", MS(CLOCK_GET(0))); 98 | 99 | printf("[+] Preparing the v elements for all the cores\n"); 100 | CLOCK_START(0); 101 | precompute_v_elements(); 102 | CLOCK_STOP(0); 103 | if(unlikely(verbosity)) 104 | printf("\t-> OK [%.2f ms]\n", MS(CLOCK_GET(0))); 105 | 106 | // Generate the keystream 107 | if(NR_BITS_TAC != NR_BITS_TAC_MAX) 108 | printf("[+] Generating RegB keystreams O(2^%d) [Demo]\n", NR_BITS_TAC); 109 | else 110 | printf("[+] Generating RegB keystreams O(2^32) [Full]\n"); 111 | printf("\t-> using %d cores\n", nr_provided_cores); 112 | 113 | for(i=0; i All LP have terminated\n"); 135 | if(unlikely(verbosity)) 136 | printf("\t-> OK [ %lds ~ %.2fm]\n", t2-t1, (float)(t2-t1)/60); 137 | 138 | // Cleaning the pids array 139 | memset(pids, 0, nr_provided_cores * sizeof(pid_t)); 140 | memset(largs, 0, nr_provided_cores * sizeof(struct lp_arg)); 141 | 142 | printf("[+] Sorting the tables\n"); 143 | t1 = time(NULL); 144 | 145 | // Handling corner cases. 146 | nr_cores2 = nr_provided_cores; 147 | if(nr_cores2 > NR_V_ELEMENTS) 148 | nr_cores2 = NR_V_ELEMENTS; 149 | 150 | for(i=0; i OK [ %lds ~ %.2fm ]\n", t2-t1, (float)(t2-t1)/60); 171 | 172 | #if DEBUG_MALLOC 173 | printf("[+] Freeing everything!\n"); 174 | CLOCK_START(0); 175 | free_MB_matrix(); 176 | CLOCK_STOP(0); 177 | if(unlikely(verbosity)) 178 | printf("\t-> OK [ %.2fs ]\n", CLOCK_GET(0)); 179 | #endif 180 | free(largs); 181 | free(pids); 182 | return 0; 183 | } 184 | 185 | #if (OPTIM_LOOKUP == OPTIM_LKUP_BSEARCH) 186 | int fd_sorted_files[NR_V_ELEMENTS_MAX]; 187 | 188 | static __inline__ 189 | int __bruteforce_bsearch(struct arguments *args) 190 | { 191 | CLOCK_VARS(1); 192 | time_t start, end; 193 | int wstatus, i, ret; 194 | struct lp_arg *largs = NULL; 195 | pid_t *pids = NULL; 196 | char fname[512]; 197 | uint64_t size_allocated; 198 | time_t start2 = time(NULL); 199 | int fd; 200 | char *f_name_tmp = NULL; 201 | struct _target *t = NULL; 202 | key_t key; 203 | int shmid; 204 | 205 | largs = calloc(nr_provided_cores, sizeof(struct lp_arg)); 206 | pids = calloc(nr_provided_cores, sizeof(pid_t)); 207 | 208 | if(unlikely(!largs || !pids)) { 209 | printf("[-] Running out of memory...\n"); 210 | ret = -1; 211 | goto bye; 212 | } 213 | 214 | printf("[+] Preparing V, B, TAC basis\n"); 215 | CLOCK_START(0); 216 | setup_v_basis(); 217 | setup_ub_basis(); 218 | setup_tac_basis(); 219 | CLOCK_STOP(0); 220 | if(unlikely(verbosity)) 221 | printf("\t-> OK [%.2f ms]\n", MS(CLOCK_GET(0))); 222 | 223 | printf("[+] Preparing MA, MB, MC\n"); 224 | CLOCK_START(0); 225 | setup_MA_matrix(); 226 | setup_MB_matrix(); 227 | setup_MC_matrix(); 228 | CLOCK_STOP(0); 229 | if(unlikely(verbosity)) 230 | printf("\t-> OK [%.2f ms]\n", MS(CLOCK_GET(0))); 231 | 232 | printf("[+] Preparing the v elements for all the cores\n"); 233 | CLOCK_START(0); 234 | precompute_v_elements(); 235 | CLOCK_STOP(0) 236 | if(unlikely(verbosity)) 237 | printf("\t-> OK [%.2f ms]\n", MS(CLOCK_GET(0))); 238 | 239 | // TODO: proper error handling. 240 | printf("[+] Preparing memory to load/store the results of the computation\n"); 241 | ret = alloc_all_sorted_Tab(); 242 | if(unlikely(ret)) { 243 | ret = -7; 244 | goto bye; 245 | } 246 | 247 | size_allocated = sizeof(uint32_t) * NR_TAC_ELEMENTS * NR_V_ELEMENTS; 248 | if(unlikely(verbosity)) 249 | printf("\t-> Allocated: %.2f Gb\n", (float)(size_allocated)/(1<<30)); 250 | 251 | printf("[+] Loading computation from %s/\n", args->dir_name); 252 | start = time(NULL); 253 | ret = load_sorted_Tab(args->dir_name, NR_V_ELEMENTS); // TODO. 254 | if(unlikely(ret < 0)) { 255 | ret = -6; 256 | goto bye; 257 | } 258 | 259 | end = time(NULL); 260 | if(unlikely(verbosity)) 261 | printf("\t-> OK [%lds]\n", end-start); 262 | 263 | // Open the sorted files so that the children inherits FD. 264 | memset(fd_sorted_files, 0, sizeof(fd_sorted_files)); 265 | for(i=0; idir_name, i); 268 | fd = open(fname, O_RDONLY); 269 | if(unlikely(fd < 0)) { 270 | ret = -2; 271 | goto bye; 272 | } 273 | fd_sorted_files[i] = fd; 274 | } 275 | 276 | f_name_tmp = create_tmp_file(); 277 | if(!f_name_tmp) { 278 | printf("[-] Error, create_tmp_file() failed!\n"); 279 | ret = -3; 280 | goto bye; 281 | } 282 | 283 | if(verbosity > 1) { 284 | printf("[+] Created %s file\n", f_name_tmp); 285 | } 286 | 287 | key = ftok(f_name_tmp, getpid()&0xFF); 288 | shmid = shmget(key, 8192, 0666|IPC_CREAT); 289 | if(shmid < 0) { 290 | printf("[-] Error, shmget() failed [errno:%d]\n", errno); 291 | ret = -4; 292 | goto bye; 293 | } 294 | 295 | t = (struct _target *) shmat(shmid, (void*)0, 0); 296 | if(t==(void*)(-1)) { 297 | printf("[-] Error, shmat() failed [errno:%d]\n", errno); 298 | ret = -5; 299 | goto bye; 300 | } 301 | 302 | memcpy(t, &args->target, sizeof(struct _target)); 303 | 304 | if(NR_BITS_UB != NR_BITS_UB_MAX) 305 | #if OPTIM_BATCH 306 | printf("[+] Generating RegA+RegC keystreams (2^%d) to crack %d keystreams [Demo]\n", NR_BITS_UB, args->target.nr_targets); 307 | #else 308 | printf("[+] Generating RegA+RegC keystreams (2^%d) to crack 0x%lx [Demo]\n", NR_BITS_UB, args->target.keystream[0].bitvector); 309 | #endif 310 | else 311 | printf("[+] Generating RegA+RegC keystreams (2^32) [Full]\n"); 312 | printf("\t-> using %d cores\n", nr_provided_cores); 313 | 314 | start = time(NULL); 315 | 316 | for(i=0; idir_name; 320 | largs[i].key = key; 321 | largs[i].start = start2; 322 | largs[i].round_idx = 0; 323 | largs[i].early_exit = args->all ? 0 : 1; 324 | } 325 | 326 | for(i=0; i All LP have terminated\n"); 340 | if(unlikely(verbosity)) 341 | printf("\t-> OK [%lds]\n", end-start); 342 | 343 | ret = 0; 344 | 345 | // TODO. Missing things. 346 | bye: 347 | 348 | // Closing FD. 349 | for(i=0; i OK [ %.2fs ]\n", cpu_time_used); 371 | #endif 372 | if(largs) 373 | free(largs); 374 | if(pids) 375 | free(pids); 376 | return ret; 377 | } 378 | 379 | #else 380 | 381 | static __inline__ 382 | int __bruteforce_cuckoo(struct arguments *args) 383 | { 384 | CLOCK_VARS(1); 385 | time_t start, end; 386 | int wstatus, i, round, ret; 387 | struct lp_arg *largs = NULL; 388 | pid_t *pids = NULL; 389 | time_t start2 = time(NULL); 390 | char *f_name_tmp = NULL; 391 | struct _target *t = NULL; 392 | key_t key; 393 | int shmid; 394 | 395 | largs = calloc(nr_provided_cores, sizeof(struct lp_arg)); 396 | pids = calloc(nr_provided_cores, sizeof(pid_t)); 397 | 398 | if(!largs || !pids) { 399 | printf("[-] Running out of memory...\n"); 400 | ret = -1; 401 | goto bye; 402 | } 403 | 404 | printf("[+] Preparing V, B, TAC basis\n"); 405 | CLOCK_START(0); 406 | setup_v_basis(); 407 | setup_ub_basis(); 408 | setup_tac_basis(); 409 | CLOCK_STOP(0); 410 | if(unlikely(verbosity)) 411 | printf("\t-> OK [%.2f ms]\n", MS(CLOCK_GET(0))); 412 | 413 | printf("[+] Preparing MA, MB, MC\n"); 414 | CLOCK_START(0); 415 | setup_MA_matrix(); 416 | setup_MB_matrix(); 417 | setup_MC_matrix(); 418 | CLOCK_STOP(0); 419 | if(unlikely(verbosity)) 420 | printf("\t-> OK [%.2f ms]\n", MS(CLOCK_GET(0))); 421 | 422 | printf("[+] Preparing the v elements for all the cores\n"); 423 | CLOCK_START(0); 424 | precompute_v_elements(); 425 | CLOCK_STOP(0) 426 | if(unlikely(verbosity)) 427 | printf("\t-> OK [%.2f ms]\n", MS(CLOCK_GET(0))); 428 | 429 | f_name_tmp = create_tmp_file(); 430 | if(!f_name_tmp) { 431 | printf("[-] Error, create_tmp_file() failed!\n"); 432 | ret = -2; 433 | goto bye; 434 | } 435 | 436 | if(verbosity > 1) { 437 | printf("[+] Created %s file\n", f_name_tmp); 438 | } 439 | 440 | key = ftok(f_name_tmp, getpid()&0xFF); 441 | shmid = shmget(key, 8192, 0666|IPC_CREAT); 442 | if(shmid < 0) { 443 | printf("[-] Error, shmget() failed [errno:%d]\n", errno); 444 | ret = -3; 445 | goto bye; 446 | } 447 | 448 | t = (struct _target *) shmat(shmid, (void*)0, 0); 449 | if(t==(void*)(-1)) { 450 | printf("[-] Error, shmat() failed [errno:%d]\n", errno); 451 | ret = -4; 452 | goto bye; 453 | } 454 | 455 | memcpy(t, &args->target, sizeof(struct _target)); 456 | 457 | for(round=0; roundall && t->nr_state_recovered == t->nr_targets) { 461 | break; 462 | } 463 | 464 | printf("[+] Loading hash tables [%d,%d] from %s/\n", round*(NR_V_ELEMENTS/STG2_NR_ROUNDS), ((round+1)*(NR_V_ELEMENTS/STG2_NR_ROUNDS)-1), args->dir_name); 465 | start = time(NULL); 466 | ret = load_hash_Tab(args->dir_name, round*(NR_V_ELEMENTS/STG2_NR_ROUNDS), (NR_V_ELEMENTS/STG2_NR_ROUNDS)); 467 | if(ret < 0) { 468 | ret = -5; 469 | goto bye; 470 | } 471 | 472 | end = time(NULL); 473 | if(unlikely(verbosity)) 474 | printf("\t-> OK [%lds]\n", end-start); 475 | 476 | if(NR_BITS_UB != NR_BITS_UB_MAX) 477 | #if OPTIM_BATCH 478 | printf("[+] Generating RegA+RegC keystreams (2^%d) to crack %d keystreams [Demo]\n", NR_BITS_UB, args->target.nr_targets); 479 | #else 480 | printf("[+] Generating RegA+RegC keystreams (2^%d) to crack 0x%lx [Demo]\n", NR_BITS_UB, args->target.keystream[0].bitvector); 481 | #endif 482 | else 483 | printf("[+] Generating RegA+RegC keystreams (2^32) [Full]\n"); 484 | printf("\t-> using %d cores\n", nr_provided_cores); 485 | 486 | start = time(NULL); 487 | 488 | for(i=0; idir_name; 492 | largs[i].key = key; 493 | largs[i].start = start2; 494 | largs[i].round_idx = round; 495 | largs[i].early_exit = args->all ? 0 : 1; 496 | } 497 | 498 | for(i=0; i All LP have terminated\n"); 512 | if(unlikely(verbosity)) 513 | printf("\t-> OK [%lds]\n", end-start); 514 | 515 | printf("[+] Unloading hash tables from %s/\n", args->dir_name); 516 | start = time(NULL); 517 | unload_hash_Tab((NR_V_ELEMENTS/2)); 518 | end = time(NULL); 519 | if(unlikely(verbosity)) 520 | printf("\t-> OK [%lds]\n", end-start); 521 | 522 | } 523 | 524 | ret = 0; 525 | 526 | bye: 527 | 528 | if(t) 529 | shmdt(t); 530 | if(f_name_tmp) { 531 | unlink(f_name_tmp); 532 | free(f_name_tmp); 533 | } 534 | 535 | #if DEBUG_MALLOC 536 | printf("[+] Freeing everything!\n"); 537 | CLOCK_START(0) 538 | free_MC_matrix(); 539 | free_MB_matrix(); 540 | free_MA_matrix(); 541 | CLOCK_STOP(0) 542 | if(unlikely(verbosity)) 543 | printf("\t-> OK [ %.2fs ]\n", cpu_time_used); 544 | #endif 545 | if(largs) 546 | free(largs); 547 | if(pids) 548 | free(pids); 549 | return ret; 550 | } 551 | #endif 552 | 553 | int do_bruteforce(struct arguments *args) 554 | { 555 | #if (OPTIM_LOOKUP == OPTIM_LKUP_CUCKOO) 556 | return __bruteforce_cuckoo(args); 557 | #else 558 | return __bruteforce_bsearch(args); 559 | #endif 560 | } 561 | 562 | static __inline__ 563 | void clean_arguments(struct arguments *args) 564 | { 565 | args->mode = MODE_TEST; 566 | args->dir_name = NULL; 567 | #if OPTIM_BATCH 568 | args->batch = NULL; 569 | #endif 570 | memset(&args->target, 0, sizeof(args->target)); 571 | args->IV = 0; 572 | args->IV_is_set = 0; 573 | args->S = 0; 574 | args->S_is_set = 0; 575 | args->flag = false; 576 | args->flag_is_set = 0; 577 | args->nr_cores = cpu_get_nr_cores(); 578 | args->verbosity = 0; 579 | args->all = 0; 580 | } 581 | 582 | int main(int argc, char **argv) 583 | { 584 | struct arguments args; 585 | int ret; 586 | 587 | clean_arguments(&args); 588 | 589 | // Once and for all. 590 | setup_f_table(); 591 | 592 | // Perform the parsing of the CLI arguments. 593 | argp_parse(&argp, argc, argv, 0, 0, &args); 594 | nr_provided_cores = args.nr_cores; 595 | 596 | // Fixes the verbosity 597 | verbosity = args.verbosity; 598 | 599 | // Default mode. 600 | if(args.mode == MODE_TEST) { 601 | do_tests(); 602 | exit(EXIT_SUCCESS); 603 | } 604 | 605 | // Used to check how fast our routines are. 606 | // TODO: Create an estimation of the global time of exploitation on 607 | // given system. 608 | else if(args.mode == MODE_BENCH) { 609 | do_bench(args.dir_name); 610 | exit(EXIT_SUCCESS); 611 | } 612 | 613 | // Generate the required tables 614 | // Store these tables on disk (--db) 615 | else if(args.mode == MODE_PRECOMPUTATION) { 616 | if(!args.dir_name) { 617 | printf("[-] Please provide an output file using --dir to save the bitstream\n"); 618 | exit(EXIT_FAILURE); 619 | } 620 | 621 | ret = create_directory(args.dir_name); 622 | if(ret < 0) { 623 | printf("[-] Failed to create directory %s [ret=%d]\n", args.dir_name, ret); 624 | exit(EXIT_FAILURE); 625 | } 626 | 627 | do_precomputation(args.dir_name); 628 | exit(EXIT_SUCCESS); 629 | } 630 | 631 | else if(args.mode == MODE_BRUTEFORCE) { 632 | 633 | if(!args.dir_name || !is_directory_created(args.dir_name)) { 634 | printf("[-] Please provide a valid location using --dir to load the precomputation results\n"); 635 | exit(EXIT_FAILURE); 636 | } 637 | 638 | #if OPTIM_BATCH 639 | if(!args.target.nr_targets) { 640 | printf("[-] Please provide your keystreams in hex:length,[...] using --batch\n"); 641 | exit(EXIT_FAILURE); 642 | } 643 | 644 | if(verbosity) { 645 | printf("[+] Batch mode! Attempting to crack:\n"); 646 | for(int i=0; i [b%02d] %.16lx (%d) [mask:%lx]\n", i, 648 | args.target.keystream[i].bitvector, 649 | args.target.keystream[i].bitlength, 650 | args.target.keystream[i].bitmask); 651 | } 652 | } 653 | #else 654 | if(args.target.nr_targets != 1) { 655 | printf("[-] Please provide a single keystream in hex using --keystream\n"); 656 | exit(EXIT_FAILURE); 657 | } 658 | 659 | if(!args.target.keystream[0].bitlength) { 660 | printf("[-] Please provide a valid keystream bitlength using --length\n"); 661 | exit(EXIT_FAILURE); 662 | } 663 | 664 | if(args.target.keystream[0].bitlength < 56 || args.target.keystream[0].bitlength > 64) { 665 | printf("[-] Please provide a length in range [56,64]\n"); 666 | exit(EXIT_FAILURE); 667 | } 668 | #endif 669 | 670 | do_bruteforce(&args); 671 | exit(EXIT_SUCCESS); 672 | } 673 | 674 | else if(args.mode == MODE_REVERSE) { 675 | 676 | uint8_t K[64]; 677 | uint8_t S_225[64]; 678 | uint8_t IV[32]; 679 | uint64_t q_K = 0; 680 | 681 | if(!args.IV_is_set) { 682 | printf("[-] Please provide the 32 bits IV using --iv\n"); 683 | exit(EXIT_FAILURE); 684 | } 685 | 686 | if(!args.S_is_set) { 687 | printf("[-] Please provide the recovered 64 bits S using --state\n"); 688 | exit(EXIT_FAILURE); 689 | } 690 | 691 | if(!args.flag_is_set) { 692 | printf("[-] Please provide the direction flag using -f\n"); 693 | exit(EXIT_FAILURE); 694 | } 695 | 696 | if(verbosity) { 697 | printf("[+] Recovering key using state:%lx iv:%x dir:%d\n", args.S, args.IV, args.flag); 698 | } 699 | 700 | memset(K, 0, sizeof(K)); 701 | transform_qword_2_list(args.S, S_225, 64); 702 | transform_qword_2_list(args.IV, IV, 32); 703 | retrieve_K(K, S_225, IV, args.flag); 704 | transform_list_2_qword(K, 64, &q_K); 705 | 706 | printf("K = %lx\n", q_K); 707 | exit(EXIT_SUCCESS); 708 | } 709 | 710 | return 0; 711 | } 712 | -------------------------------------------------------------------------------- /source/print.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "exploit.h" 5 | 6 | // TODO: printf("%d %d\n", C->nrows, C->ncols); 7 | 8 | void print_line_vector(mzd_t *v, int length, char *prefix) 9 | { 10 | int b, i; 11 | 12 | //~ assert(v->nrows == length); 13 | //~ assert(v->ncols == 1); 14 | 15 | if(prefix) 16 | printf("%s", prefix); 17 | for(i=0; incols == length); 29 | //~ assert(v->nrows == 1); 30 | 31 | if(prefix) 32 | printf("%s", prefix); 33 | for(i=0; i 2 | #include 3 | #include 4 | #include 5 | 6 | #include "exploit.h" 7 | 8 | // from main.c 9 | extern int verbosity; 10 | 11 | #if OPTIM_SCHED 12 | void select_computation_scheduling(void) 13 | { 14 | struct sched_param param = {0}; 15 | int policy, ret; 16 | 17 | policy = sched_getscheduler(0); 18 | if(policy == SCHED_BATCH) 19 | return; 20 | 21 | ret = sched_setscheduler(0, SCHED_BATCH, ¶m); 22 | policy = sched_getscheduler(0); 23 | if(verbosity && (ret == -1 || policy != SCHED_BATCH)) { 24 | printf("[!] select_computation_scheduling() failed to set the SCHED_BATCH policy\n"); 25 | } 26 | return; 27 | } 28 | 29 | void select_io_scheduling(void) 30 | { 31 | struct sched_param param = {0}; 32 | int policy, ret; 33 | 34 | policy = sched_getscheduler(0); 35 | if(policy == SCHED_OTHER) 36 | return; 37 | 38 | ret = sched_setscheduler(0, SCHED_OTHER, ¶m); 39 | policy = sched_getscheduler(0); 40 | if(verbosity && (ret == -1 || policy != SCHED_OTHER)) { 41 | printf("[!] select_io_scheduling() failed to set the SCHED_OTHER policy\n"); 42 | } 43 | return; 44 | } 45 | #endif 46 | -------------------------------------------------------------------------------- /source/sort.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "exploit.h" 8 | 9 | #define GET_KEY_64(element) ((element)>>32) 10 | #define GET_KEY_32(element) (element) 11 | 12 | /* 13 | * Based on the original Radix Sort implementation from: 14 | * https://www.geeksforgeeks.org/radix-sort/ 15 | * 16 | */ 17 | 18 | uint64_t tmp[NR_TAC_ELEMENTS_MAX]; // tmp array of 128 MB (max) 19 | int count[256]; 20 | 21 | // We use radix 256 in this version. 22 | void countSort(uint64_t arr[], int nr_elements, int offset) 23 | { 24 | int i; 25 | 26 | memset(count, 0, sizeof(count)); 27 | 28 | // Store count of occurrences in count[] 29 | for(i=0; i> offset) & 0xff]++; 31 | 32 | for(i=1; i<256; i++) 33 | count[i] += count[i-1]; 34 | 35 | // Build the output array 36 | for(i=nr_elements-1; i >= 0; i--) { 37 | tmp[count[(GET_KEY_64(arr[i]) >> offset) & 0xff] - 1] = arr[i]; 38 | count[(GET_KEY_64(arr[i]) >> offset) & 0xFF]--; 39 | } 40 | 41 | for(i=0; i GET_KEY_64(p[mid])) { 84 | begin = mid +1; 85 | } 86 | else { 87 | end = mid-1; 88 | } 89 | } 90 | } 91 | return -1; 92 | } 93 | 94 | int b_search32(uint32_t key, uint32_t *p, uint32_t arr_size) 95 | { 96 | int begin, end, mid; 97 | int found = 0; 98 | 99 | ASSERT(arr_size <= NR_TAC_ELEMENTS_MAX); 100 | 101 | begin=0; 102 | end=arr_size; 103 | 104 | while(!found && begin<=end) { 105 | mid = (begin + end)/2; 106 | if (GET_KEY_32(p[mid]) == key) { 107 | return mid; 108 | } 109 | else { 110 | if(key > GET_KEY_32(p[mid])) { 111 | begin = mid +1; 112 | } 113 | else { 114 | end = mid-1; 115 | } 116 | } 117 | } 118 | return -1; 119 | } 120 | 121 | // Returns -1 in case the element is not found, its index otherwise 122 | // Note: NR_TAC_ELEMENTS_MAX < (1<<31) thus it works 123 | 124 | int seq_search64(uint32_t key, uint64_t *p, uint32_t arr_size) 125 | { 126 | uint32_t i; 127 | 128 | ASSERT(arr_size <= NR_TAC_ELEMENTS_MAX); 129 | for(i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "exploit.h" 8 | #include "cuckoo.h" 9 | 10 | void cuckoo_printBucket(BUCKET *current_b) 11 | { 12 | printf("Key: %x, H1: %x, H2: %x, H3: %x\n", 13 | current_b->key, 14 | H1(current_b->key), 15 | H2(current_b->key), 16 | H3(current_b->key)); 17 | } 18 | 19 | void cuckoo_setup_ht(BUCKET **ht) 20 | { 21 | *ht = (BUCKET *) calloc(1, NR_BUCKETS * sizeof(BUCKET)); 22 | } 23 | 24 | void cuckoo_free_ht(BUCKET **ht) 25 | { 26 | free(*ht); 27 | *ht = NULL; 28 | } 29 | 30 | static __inline__ 31 | void swap(BUCKET *b1, BUCKET *b2) 32 | { 33 | BUCKET b0; 34 | b0.value = b1->value; 35 | b0.key = b1->key; 36 | b1->value = b2->value; 37 | b1->key = b2->key; 38 | b2->value = b0.value; 39 | b2->key = b0.key; 40 | } 41 | 42 | int cuckoo_put(uint32_t key, uint32_t value, BUCKET *ht) 43 | { 44 | BUCKET current_b; 45 | BUCKET *b1; 46 | uint32_t index; 47 | uint32_t counter; 48 | 49 | // TODO. 50 | if (key == 0) { 51 | printf("Can keys be all 0?!\n"); 52 | return -2; 53 | } 54 | 55 | #if OPTIM_SKIP_COLLISIONS 56 | int ret = cuckoo_lookup(key, ht); 57 | if(ret != -1) { 58 | return -2; 59 | } 60 | #endif 61 | 62 | current_b.key = key; 63 | current_b.value = value; 64 | 65 | index = H1(current_b.key); 66 | b1 = &ht[index]; 67 | swap(¤t_b, b1); 68 | 69 | counter = 0; 70 | 71 | while (current_b.key != 0) { 72 | 73 | if (index == H1(current_b.key)) { 74 | 75 | // use H2 76 | index = H2(current_b.key); 77 | swap(¤t_b, &ht[index]); 78 | 79 | } else if (index == H2(current_b.key)) { 80 | 81 | //use H3 82 | index = H3(current_b.key); 83 | swap(¤t_b, &ht[index]); 84 | 85 | } else { 86 | 87 | // use H1 88 | index = H1(current_b.key); 89 | swap(¤t_b, &ht[index]); 90 | 91 | } 92 | 93 | if (counter > 10*LOGN) { 94 | printf("Alert, stash required?\n"); 95 | return -1; 96 | } 97 | 98 | counter++; 99 | } 100 | 101 | return 0; 102 | } 103 | -------------------------------------------------------------------------------- /source/stage1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "exploit.h" 13 | #include "file.h" 14 | #include "timing.h" 15 | #include "gea1.h" 16 | #include "transform.h" 17 | 18 | extern uint64_t V[8]; // V basis 19 | extern uint64_t TAC[24]; // TAC basis 20 | extern uint64_t UB[32]; // UB basis 21 | 22 | #include "linear_alg.h" 23 | 24 | // from main.c 25 | extern int verbosity; 26 | 27 | // from linear_alg.c 28 | extern uint64_t V_elts[NR_V_ELEMENTS_MAX]; 29 | extern mzd_t *MB_mat; 30 | #if OPTIM_LIN_ALG 31 | extern uint64_t f_MB[NR_V_ELEMENTS_MAX]; 32 | #endif 33 | 34 | // from data.c 35 | extern uint32_t *unsorted_Tab[NR_V_ELEMENTS_MAX]; 36 | 37 | int stg1a_create_raw_data(void *arg) 38 | { 39 | struct lp_arg *larg = (struct lp_arg *)arg; 40 | uint32_t state; 41 | uint64_t keystream = 0; 42 | uint64_t *p_keystream = NULL; 43 | uint64_t lower_bound_r, upper_bound_r, j; 44 | int i, jj, round, ret; 45 | DBG_CLOCK_VARS(6); 46 | 47 | mzd_t *t_xor_v = NULL; 48 | uint64_t q_t = 0; 49 | 50 | #if OPTIM_LIN_ALG 51 | setup_f_MB_elements(); 52 | mzd_t *beta_0 = NULL; 53 | uint64_t q_v = 0; 54 | uint64_t q_t_xor_v = 0; 55 | uint64_t q_beta_0 = 0; 56 | uint64_t q_beta_i = 0; 57 | uint64_t q_MB_x_v0 = 0; 58 | uint64_t q_MB_x_vi = 0; 59 | uint64_t q_f_MB_x_v0_xor_beta0 = 0; 60 | #else 61 | mzd_t *v = NULL; 62 | mzd_t *t = NULL; 63 | mzd_t *beta = NULL; 64 | #endif 65 | 66 | // First of all bind! 67 | cpu_bind(larg->id); 68 | 69 | t_xor_v = mzd_init(64,1); 70 | #if OPTIM_LIN_ALG 71 | beta_0 = mzd_init(32,1); 72 | #else 73 | t = mzd_init(64,1); 74 | v = mzd_init(64,1); 75 | beta = mzd_init(32,1); 76 | #endif 77 | 78 | // Using the full range would require 32 GB allocated which is too much 79 | // these days for a lot of setup thus using 16 GB at a time is better. 80 | // Note: We could obviously go lower but then it would not make a lot 81 | // of sense since the computational power would (probably) be laking 82 | // as well. 83 | for(round=0; roundid, round, larg->nr_cores, STG1A_NR_ROUNDS, NR_TAC_ELEMENTS); 86 | 87 | if(verbosity > 1) 88 | printf("[LP_%.4d] - [%ld, %ld]\n", larg->id, lower_bound_r, upper_bound_r); 89 | 90 | DBG_CLOCK_START(0); 91 | ret = alloc_unsorted_Tab(lower_bound_r, upper_bound_r); 92 | assert(ret == 0); // TODO 93 | DBG_CLOCK_STOP(0); 94 | 95 | for(j=lower_bound_r, jj=0; j<=upper_bound_r; j++, jj++) { 96 | 97 | DBG_CLOCK_START(1); 98 | q_t = compute_tac_element(j); 99 | #if !OPTIM_LIN_ALG 100 | transform_qword_2_vect(q_t, t); 101 | #endif 102 | DBG_CLOCK_START(1); 103 | 104 | for(i=0; idirname); 175 | DBG_CLOCK_STOP(4) 176 | ASSERT(ret == 0); // TODO 177 | 178 | if(verbosity) 179 | printf("[+] Deleting memory\n"); 180 | 181 | DBG_CLOCK_START(5); 182 | free_unsorted_Tab(lower_bound_r, upper_bound_r); 183 | DBG_CLOCK_STOP(5); 184 | } 185 | 186 | #if DEBUG_TIMING 187 | if(unlikely(verbosity)) 188 | printf("create_Tab: %.2fs, compute_tac_element: %.2fs, lin_alg: %.2fs, RegisterB2: %.2fs, save_Tab: %.2fs, delete_Tab: %.2fs\n", DBG_CLOCK_GET(0), 189 | DBG_CLOCK_GET(1), 190 | DBG_CLOCK_GET(2), 191 | DBG_CLOCK_GET(3), 192 | DBG_CLOCK_GET(4), 193 | DBG_CLOCK_GET(5)); 194 | #endif 195 | 196 | #if !OPTIM_LIN_ALG 197 | mzd_free(t); 198 | mzd_free(v); 199 | mzd_free(beta); 200 | #endif 201 | mzd_free(t_xor_v); 202 | 203 | exit(EXIT_SUCCESS); 204 | } 205 | 206 | #if (OPTIM_LOOKUP == OPTIM_LKUP_BSEARCH) 207 | 208 | // from data.c 209 | extern uint64_t TabIndex[NR_V_ELEMENTS_MAX][(1<> (64-NR_BITS_IDX)) & MASK_IDX)) { 227 | if(found == 0) { 228 | idx1 = j; 229 | idx2 = j; 230 | found = 1; 231 | } else { 232 | idx2 = j; 233 | } 234 | } else { 235 | if(found) { 236 | nxt = j; 237 | break; 238 | } 239 | } 240 | } 241 | ASSERT(found==1); 242 | ASSERT(idx2 >= idx1); 243 | // Finally update the array 244 | TabIndex[i][k] = (((uint64_t)(idx1)<<32) | idx2); 245 | } 246 | } 247 | 248 | static __inline__ 249 | int __create_lkup_tables_bsearch(void *arg) 250 | { 251 | struct lp_arg *larg = (struct lp_arg *)arg; 252 | struct dirent *dir; 253 | char fname_unsorted[512]; 254 | char fname[512]; 255 | struct stat st; 256 | int ret, round; 257 | DIR *d = NULL; 258 | char *mapped_file = NULL, *p_mapped_file = NULL; 259 | uint64_t lower_bound_r, upper_bound_r, i; 260 | DBG_CLOCK_VARS(5); 261 | 262 | // First of all bind! 263 | cpu_bind(larg->id); 264 | 265 | mapped_file = malloc(2 * sizeof(uint32_t) * NR_TAC_ELEMENTS); 266 | if(unlikely(!mapped_file)) { 267 | printf("[-] __create_lkup_tables_bsearch() failed: Could not allocate memory!\n"); 268 | free(mapped_file); 269 | exit(EXIT_FAILURE); 270 | } 271 | 272 | for(round=0; roundid, round, larg->nr_cores, STG1B_NR_ROUNDS, NR_V_ELEMENTS); 275 | 276 | if(verbosity > 1) 277 | printf("[LP_%.4d] - [%ld, %ld]\n", larg->id, lower_bound_r, upper_bound_r); 278 | 279 | for(i=lower_bound_r; i<=upper_bound_r; i++) { 280 | 281 | DBG_CLOCK_START(0); 282 | p_mapped_file = mapped_file; 283 | 284 | d = opendir(larg->dirname); 285 | if(unlikely(!d)) { 286 | printf("[-] __create_lkup_tables_bsearch() failed: opendir(%s) failed! [errno:%d]\n", larg->dirname, errno); 287 | free(mapped_file); 288 | exit(EXIT_FAILURE); 289 | } 290 | 291 | memset(fname_unsorted, 0, sizeof(fname_unsorted)); 292 | snprintf(fname_unsorted, sizeof(fname_unsorted)-1, "unsorted_%.3d_", (int)i); 293 | 294 | while((dir = readdir(d))) { 295 | 296 | // If this triggers then someone is playing us a trick ;> 297 | // Or there is an uuexpected bug o_O;; 298 | assert((uint32_t)(p_mapped_file-mapped_file) <= (2 * sizeof(uint32_t) * NR_TAC_ELEMENTS)); 299 | 300 | if(memcmp(dir->d_name, fname_unsorted, strlen(fname_unsorted))) 301 | continue; 302 | 303 | snprintf(fname, sizeof(fname)-1, "%s/%s", larg->dirname, dir->d_name); 304 | memset(&st, 0, sizeof(st)); 305 | ret = lstat(fname, &st); 306 | ASSERT(ret == 0); // TODO 307 | ret = read_file(fname, (uint8_t *)p_mapped_file, st.st_size); 308 | if(ret < 0) { 309 | printf("[-] __create_lkup_tables_bsearch() failed to open %s [err:%d]\n", fname, ret); 310 | free(mapped_file); 311 | exit(EXIT_FAILURE); 312 | } 313 | 314 | p_mapped_file += st.st_size; 315 | } 316 | closedir(d); 317 | DBG_CLOCK_STOP(0); 318 | 319 | // Sorting the file. 320 | DBG_CLOCK_START(1); 321 | radix_sort((uint64_t *)mapped_file, NR_TAC_ELEMENTS); 322 | DBG_CLOCK_STOP(1); 323 | 324 | #if 0 325 | print_uint64_array_as_hex((uint64_t *)mapped_file, 20); 326 | #endif 327 | 328 | // Saving the file 329 | memset(fname, 0, sizeof(fname)); 330 | snprintf(fname, sizeof(fname)-1, "%s/"SORTED_TABLE_FMT, larg->dirname, (int)i); 331 | 332 | DBG_CLOCK_START(2); 333 | ret = write_file(fname, (uint8_t *)mapped_file, 2 * sizeof(uint32_t) * NR_TAC_ELEMENTS); 334 | DBG_CLOCK_STOP(2); 335 | if(unlikely(ret < 0)) { 336 | printf("[-] __create_lkup_tables_bsearch() failed to write into %s [err:%d]\n", fname, ret); 337 | free(mapped_file); 338 | exit(EXIT_FAILURE); 339 | } 340 | 341 | DBG_CLOCK_START(3); 342 | create_index(mapped_file, i); 343 | DBG_CLOCK_STOP(3); 344 | 345 | // Saving the file 346 | DBG_CLOCK_START(4); 347 | memset(fname, 0, sizeof(fname)); 348 | snprintf(fname, sizeof(fname)-1, "%s/"SORTED_INDEX_FMT, larg->dirname, (int)i); 349 | ret = write_file(fname, (uint8_t *)TabIndex[i], sizeof(uint64_t) * (1<id); 394 | 395 | mapped_file = malloc(2 * sizeof(uint32_t) * NR_TAC_ELEMENTS); 396 | if(unlikely(!mapped_file)) { 397 | printf("[-] __create_lkup_tables_cuckoo() failed: Could not allocate memory!\n"); 398 | free(mapped_file); 399 | exit(EXIT_FAILURE); 400 | } 401 | 402 | for(round=0; roundid, round, larg->nr_cores, STG1B_NR_ROUNDS, NR_V_ELEMENTS); 405 | if(verbosity > 1) 406 | printf("[LP_%.4d] - [%ld, %ld]\n", larg->id, lower_bound_r, upper_bound_r); 407 | 408 | for(i=lower_bound_r; i<=upper_bound_r; i++) { 409 | 410 | DBG_CLOCK_START(0); 411 | p_mapped_file = mapped_file; 412 | 413 | d = opendir(larg->dirname); 414 | if(unlikely(!d)) { 415 | printf("[-] __create_lkup_tables_cuckoo() failed: opendir(%s) failed! [errno:%d]\n", larg->dirname, errno); 416 | free(mapped_file); 417 | exit(EXIT_FAILURE); 418 | } 419 | 420 | memset(fname_unsorted, 0, sizeof(fname_unsorted)); 421 | snprintf(fname_unsorted, sizeof(fname_unsorted)-1, "unsorted_%.3d_", (int)i); 422 | 423 | while((dir = readdir(d))) { 424 | 425 | // If this triggers then someone is playing us a trick ;> 426 | // Or there is an uuexpected bug o_O;; 427 | assert((uint32_t)(p_mapped_file-mapped_file) <= (2 * sizeof(uint32_t) * NR_TAC_ELEMENTS)); 428 | 429 | if(memcmp(dir->d_name, fname_unsorted, strlen(fname_unsorted))) 430 | continue; 431 | 432 | snprintf(fname, sizeof(fname)-1, "%s/%s", larg->dirname, dir->d_name); 433 | memset(&st, 0, sizeof(st)); 434 | ret = lstat(fname, &st); 435 | ASSERT(ret == 0); // TODO 436 | ret = read_file(fname, (uint8_t *)p_mapped_file, st.st_size); 437 | if(ret < 0) { 438 | printf("[-] __create_lkup_tables_cuckoo() failed to open %s [err:%d]\n", fname, ret); 439 | free(mapped_file); 440 | exit(EXIT_FAILURE); 441 | } 442 | 443 | p_mapped_file += st.st_size; 444 | } 445 | closedir(d); 446 | DBG_CLOCK_STOP(0); 447 | 448 | ht[i] = NULL; 449 | cuckoo_setup_ht(&ht[i]); 450 | 451 | DBG_CLOCK_START(3); 452 | uint64_t *p = (uint64_t *)mapped_file; 453 | for(j=0; j> 32); 456 | uint32_t value = (uint32_t)(p[j]); 457 | 458 | if(!key) { 459 | if(verbosity > 1) 460 | printf("[!] Skipping 0 key!\n"); 461 | continue; 462 | } 463 | 464 | cuckoo_put(key, value, ht[i]); 465 | } 466 | DBG_CLOCK_STOP(3); 467 | 468 | // Saving the file 469 | DBG_CLOCK_START(4); 470 | memset(fname, 0, sizeof(fname)); 471 | snprintf(fname, sizeof(fname)-1, "%s/"SORTED_CUCKOO_FMT, larg->dirname, (int)i); 472 | ret = write_file(fname, (uint8_t *)ht[i], NR_BUCKETS * sizeof(BUCKET)); 473 | cuckoo_free_ht(&ht[i]); 474 | ht[i] = NULL; 475 | DBG_CLOCK_STOP(4); 476 | if(ret < 0) { 477 | printf("[-] __create_lkup_tables_cuckoo() failed to write into %s [err:%d]\n", fname, ret); 478 | free(mapped_file); 479 | exit(EXIT_FAILURE); 480 | } 481 | 482 | } 483 | 484 | } 485 | 486 | #if DEBUG_TIMING 487 | if(unlikely(verbosity)) 488 | printf("Load: %.2fs, Sort: %.2fs, Write: %.2fs, X: %.2fs, Y: %.2fs\n", DBG_CLOCK_GET(0), 489 | DBG_CLOCK_GET(1), 490 | DBG_CLOCK_GET(2), 491 | DBG_CLOCK_GET(3), 492 | DBG_CLOCK_GET(4)); 493 | #endif 494 | 495 | free(mapped_file); 496 | exit(EXIT_SUCCESS); 497 | } 498 | 499 | #endif 500 | 501 | int stg1b_create_lkup_tables(void *arg) 502 | { 503 | #if (OPTIM_LOOKUP == OPTIM_LKUP_BSEARCH) 504 | return __create_lkup_tables_bsearch(arg); 505 | #else 506 | return __create_lkup_tables_cuckoo(arg); 507 | #endif 508 | } 509 | -------------------------------------------------------------------------------- /source/stage2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "exploit.h" 14 | #include "file.h" 15 | #include "timing.h" 16 | #include "gea1.h" 17 | #include "transform.h" 18 | 19 | extern uint64_t V[8]; // V basis 20 | extern uint64_t TAC[24]; // TAC basis 21 | extern uint64_t UB[32]; // UB basis 22 | 23 | #include "linear_alg.h" 24 | 25 | // from main.c 26 | extern int verbosity; 27 | 28 | // From data.c 29 | 30 | #if (OPTIM_LOOKUP == OPTIM_LKUP_CUCKOO) 31 | extern BUCKET *ht[NR_V_ELEMENTS_MAX]; 32 | #else 33 | extern uint32_t *sorted_Tab[NR_V_ELEMENTS_MAX]; 34 | extern uint64_t TabIndex[NR_V_ELEMENTS_MAX][(1<id); 77 | 78 | // Now prepare the shared memory to retrieve the targets. 79 | shmid = shmget(larg->key, 8192, 0666|IPC_CREAT); 80 | if(shmid < 0) { 81 | printf("[-] shmget() failed [errno:%d], exiting...\n", errno); 82 | exit(EXIT_FAILURE); 83 | } 84 | 85 | t = (struct _target *)shmat(shmid, (void*)0, 0); 86 | if(t==(void*)(-1)) { 87 | printf("[-] shmat() failed [errno:%d], exiting...\n", errno); 88 | exit(EXIT_FAILURE); 89 | } 90 | 91 | // Prepare our work load. 92 | round_idx = larg->round_idx; 93 | cpu_get_work(&lower_bound, &upper_bound, larg->id, 0, larg->nr_cores, 1, NR_UB_ELEMENTS); 94 | 95 | if(!larg->early_exit) 96 | early_exit = 0; 97 | 98 | #if !OPTIM_BATCH 99 | uint64_t q0, key_mask; 100 | q0 = t->keystream[0].bitvector; 101 | key_mask = t->keystream[0].bitmask; 102 | if(t->keystream[0].bitlength != 64) { 103 | early_exit = 0; 104 | } 105 | #else 106 | for(int c=0; cnr_targets; c++) { 107 | if(t->keystream[c].bitlength != 64) { 108 | early_exit = 0; 109 | break; 110 | } 111 | } 112 | #endif 113 | 114 | if(verbosity > 1) { 115 | printf("LP_%.4d - [%u, %u]\n", larg->id, (uint32_t)lower_bound, (uint32_t)upper_bound); 116 | } 117 | 118 | // Initialization of the linear alg objects 119 | beta = mzd_init(32,1); 120 | u_xor_v = mzd_init(64,1); 121 | u_xor_v_xor_t = mzd_init(64,1); 122 | 123 | uint64_t q_u; 124 | uint64_t q_v; 125 | uint64_t q_t; 126 | uint64_t q_u_xor_v; 127 | uint64_t q_u_xor_v_xor_t; 128 | 129 | #if OPTIM_LIN_ALG 130 | setup_f_MA_MC_elements(); 131 | uint64_t q_alpha_0 = 0; 132 | uint64_t q_gamma_0 = 0; 133 | uint64_t q_alpha_i = 0; 134 | uint64_t q_gamma_i = 0; 135 | uint64_t q_MA_x_v0 = 0; 136 | uint64_t q_MC_x_v0 = 0; 137 | uint64_t q_MA_x_vi = 0; 138 | uint64_t q_MC_x_vi = 0; 139 | uint64_t q_f_MA_x_v0_xor_alpha0 = 0; 140 | uint64_t q_f_MC_x_v0_xor_gamma0 = 0; 141 | alpha_0 = mzd_init(31,1); 142 | gamma_0 = mzd_init(33,1); 143 | #else 144 | alpha = mzd_init(31,1); 145 | gamma = mzd_init(33,1); 146 | #endif 147 | 148 | #if OPTIM_SCHED 149 | select_computation_scheduling(); 150 | #endif 151 | 152 | for(j=lower_bound; j<=upper_bound; j++) { 153 | 154 | q_u = compute_ub_element(j); // Do we precompute this? 155 | 156 | for(i=round_idx*(NR_V_ELEMENTS/STG2_NR_ROUNDS), ii=0; i<(round_idx+1)*(NR_V_ELEMENTS/STG2_NR_ROUNDS); i++, ii++) { 157 | 158 | if(early_exit && (t->nr_state_recovered == t->nr_targets)) { 159 | goto bye; 160 | } 161 | 162 | // Step 1: Matrix manipulation 163 | DBG_CLOCK_START(0); 164 | 165 | #if OPTIM_LIN_ALG 166 | 167 | if(unlikely(ii==0)) { 168 | 169 | q_v = V_elts[i]; 170 | q_u_xor_v = q_u ^ q_v; 171 | 172 | transform_qword_2_vect(q_u_xor_v, u_xor_v); 173 | mzd_mul(alpha_0, MA_mat, u_xor_v, 0); // expensive 174 | mzd_mul(gamma_0, MC_mat, u_xor_v, 0); // expensive 175 | transform_vect_2_qword(alpha_0, &q_alpha_0); 176 | transform_vect_2_qword(gamma_0, &q_gamma_0); 177 | 178 | // Precomputing what comes next 179 | q_MA_x_v0 = f_MA[i]; 180 | q_MC_x_v0 = f_MC[i]; 181 | q_f_MA_x_v0_xor_alpha0 = q_MA_x_v0 ^ q_alpha_0; 182 | q_f_MC_x_v0_xor_gamma0 = q_MC_x_v0 ^ q_gamma_0; 183 | 184 | // Setting the right pointers 185 | q_alpha = q_alpha_0; 186 | q_gamma = q_gamma_0; 187 | 188 | } else { 189 | 190 | q_MA_x_vi = f_MA[i]; 191 | q_MC_x_vi = f_MC[i]; 192 | q_alpha_i = q_MA_x_vi ^ q_f_MA_x_v0_xor_alpha0; 193 | q_gamma_i = q_MC_x_vi ^ q_f_MC_x_v0_xor_gamma0; 194 | 195 | // Setting the right pointers 196 | q_alpha = q_alpha_i; 197 | q_gamma = q_gamma_i; 198 | } 199 | 200 | #else 201 | 202 | q_v = V_elts[i]; 203 | q_u_xor_v = q_u ^ q_v; 204 | 205 | transform_qword_2_vect(q_u_xor_v, u_xor_v); 206 | 207 | mzd_mul(alpha, MA_mat, u_xor_v, 0); // expensive 208 | mzd_mul(gamma, MC_mat, u_xor_v, 0); // expensive 209 | 210 | transform_vect_2_qword(alpha, &q_alpha); 211 | transform_vect_2_qword(gamma, &q_gamma); 212 | 213 | #endif 214 | 215 | DBG_CLOCK_STOP(0); 216 | 217 | // Step 2: bitstream generation 218 | DBG_CLOCK_START(1); 219 | bitstream_regA = RegisterA2(q_alpha, 32); 220 | bitstream_regC = RegisterC2(q_gamma, 32); 221 | DBG_CLOCK_STOP(1); 222 | 223 | #if DEBUG_TESTCASE 224 | if(verbosity && j==27 && i==0) { 225 | printf("---\n"); 226 | print_line_vector(u_xor_v, 64, "[0, 27] u + v = "); // OK 227 | printf("[0, 27] alpha = %lx\n", q_alpha); 228 | printf("[0, 27] gamma = %lx\n", q_gamma); 229 | printf("[0, 27] LA = %lx\n", bitstream_regA); // OK 230 | printf("[0, 27] LC = %lx\n", bitstream_regC); // OK 231 | if(bitstream_regA != (uint32_t)0x25cfab4eeb9bb463) { 232 | printf("[!] Error: LA is incorrect!\n"); 233 | kill(0, SIGSEGV); 234 | } 235 | if(bitstream_regC != (uint32_t)0xd5bfa43aa906154e) { 236 | printf("[!] Error: LC is incorrect!\n"); 237 | kill(0, SIGSEGV); 238 | } 239 | printf("---\n"); 240 | } 241 | #endif 242 | 243 | #if OPTIM_BATCH 244 | // Not super super clean. 245 | for(int c=0; cnr_targets; c++) { 246 | uint64_t q0, key_mask; 247 | if(t->keystream[c].solved) 248 | continue; 249 | q0 = t->keystream[c].bitvector; 250 | key_mask = t->keystream[c].bitmask; 251 | #endif 252 | 253 | candidate = bitstream_regA ^ bitstream_regC ^ (uint32_t)q0; 254 | uint32_t x = (uint32_t)(candidate); 255 | 256 | // Step3: searching the pattern. 257 | DBG_CLOCK_START(2); 258 | int idx = cuckoo_lookup(x, ht[ii]); 259 | DBG_CLOCK_STOP(2); 260 | 261 | if(unlikely(idx > 0)) { 262 | 263 | time_t stop = time(NULL); 264 | nr_candidates++; 265 | DBG_CLOCK_START(3); 266 | 267 | q_t = compute_tac_element((uint32_t)idx); 268 | 269 | #if OPTIM_LIN_ALG 270 | q_v = V_elts[i]; 271 | q_u_xor_v = q_u ^ q_v; 272 | #endif 273 | 274 | q_u_xor_v_xor_t = q_t ^ q_u_xor_v; 275 | 276 | transform_qword_2_vect(q_u_xor_v_xor_t, u_xor_v_xor_t); 277 | mzd_mul(beta, MB_mat, u_xor_v_xor_t, 0); 278 | transform_vect_2_qword(beta, &q_beta); 279 | 280 | bitstream_regA = RegisterA2(q_alpha, 64); 281 | bitstream_regB = RegisterB2(q_beta, 64); 282 | bitstream_regC = RegisterC2(q_gamma, 64); 283 | candidate = bitstream_regA ^ bitstream_regB ^ bitstream_regC; 284 | 285 | if((candidate & key_mask) == (q0 & key_mask)) { 286 | uint64_t q_sol = 0; 287 | transform_vect_2_qword(u_xor_v_xor_t, &q_sol); 288 | #if OPTIM_BATCH 289 | printf("[+] State found for b%02d in %.2fs [%.2fm]!\n", c, (double)(stop-larg->start), (double)(stop-larg->start)/60); 290 | #else 291 | printf("[+] State found in %.2fs [%.2fm]!\n", (double)(stop-larg->start), (double)(stop-larg->start)/60); 292 | #endif 293 | if(unlikely(verbosity)) { 294 | printf("\tUB = %x\n", (uint32_t)j); 295 | printf("\tV = %x\n", i); 296 | printf("\tT = %x\n", (uint32_t)idx); 297 | } 298 | printf("\tS = %lx\n", q_sol); 299 | #if OPTIM_BATCH 300 | if(!(t->keystream[c].solved)) 301 | t->nr_state_recovered++; 302 | t->keystream[c].solved++; 303 | #else 304 | if(!(t->keystream[0].solved)) 305 | t->nr_state_recovered++; 306 | t->keystream[0].solved++; 307 | #endif 308 | } 309 | DBG_CLOCK_STOP(3); 310 | } 311 | 312 | #if OPTIM_BATCH 313 | } 314 | #endif 315 | 316 | } 317 | } 318 | 319 | bye: 320 | shmdt(t); 321 | 322 | #if OPTIM_SCHED 323 | select_io_scheduling(); 324 | #endif 325 | 326 | #if DEBUG_TIMING 327 | if(unlikely(verbosity)) 328 | printf("LinAlg: %.2fs, bitstream generation: %.2fs, b-search: %.2fs, false-positives: %.2fs\n", DBG_CLOCK_GET(0), 329 | DBG_CLOCK_GET(1), 330 | DBG_CLOCK_GET(2), 331 | DBG_CLOCK_GET(3)); 332 | #endif 333 | 334 | if(unlikely((verbosity > 1) && nr_candidates)) 335 | printf("\t-> %d candidates occured\n", nr_candidates); 336 | 337 | exit(EXIT_SUCCESS); 338 | } 339 | #endif 340 | 341 | 342 | #if (OPTIM_LOOKUP == OPTIM_LKUP_BSEARCH) 343 | static __inline__ 344 | int __stg2_state_recovery_bsearch(void *arg) 345 | { 346 | struct lp_arg *larg = (struct lp_arg *)arg; 347 | uint64_t q_alpha = 0, q_beta = 0, q_gamma = 0; 348 | uint64_t bitstream_regA, bitstream_regB, bitstream_regC; 349 | uint64_t candidate; 350 | uint64_t j, lower_bound, upper_bound; 351 | int nr_candidates = 0; 352 | DBG_CLOCK_VARS(4); 353 | int i, shmid; 354 | struct _target *t = NULL; 355 | int early_exit = 1; 356 | 357 | mzd_t *u_xor_v = NULL; 358 | mzd_t *u_xor_v_xor_t = NULL; 359 | mzd_t *beta = NULL; 360 | #if OPTIM_LIN_ALG 361 | mzd_t *alpha_0 = NULL; // needs an allocation 362 | mzd_t *gamma_0 = NULL; // needs an allocation 363 | #else 364 | mzd_t *alpha = NULL; // needs an allocation 365 | mzd_t *gamma = NULL; // needs an allocation 366 | #endif 367 | 368 | // First of all bind! 369 | cpu_bind(larg->id); 370 | 371 | // Now prepare the shared memory to retrieve the targets. 372 | shmid = shmget(larg->key, 8192, 0666|IPC_CREAT); 373 | if(shmid < 0) { 374 | printf("[-] shmget() failed [errno:%d], exiting...\n", errno); 375 | exit(EXIT_FAILURE); 376 | } 377 | 378 | t = (struct _target *)shmat(shmid, (void*)0, 0); 379 | if(t==(void*)(-1)) { 380 | printf("[-] shmat() failed [errno:%d], exiting...\n", errno); 381 | exit(EXIT_FAILURE); 382 | } 383 | 384 | lower_bound = larg->id * (NR_UB_ELEMENTS / larg->nr_cores); 385 | upper_bound = (larg->id+1) * (NR_UB_ELEMENTS / larg->nr_cores) - 1; 386 | 387 | if(!larg->early_exit) 388 | early_exit = 0; 389 | 390 | #if !OPTIM_BATCH 391 | uint64_t q0, key_mask; 392 | q0 = t->keystream[0].bitvector; 393 | key_mask = t->keystream[0].bitmask; 394 | if(t->keystream[0].bitlength != 64) { 395 | early_exit = 0; 396 | } 397 | #else 398 | for(int c=0; cnr_targets; c++) { 399 | if(t->keystream[c].bitlength != 64) { 400 | early_exit = 0; 401 | break; 402 | } 403 | } 404 | #endif 405 | 406 | if(verbosity > 1) { 407 | printf("LP_%.4d - [%u, %u]\n", larg->id, (uint32_t)lower_bound, (uint32_t)upper_bound); 408 | } 409 | 410 | // Initialization of the linear alg objects 411 | beta = mzd_init(32,1); 412 | u_xor_v = mzd_init(64,1); 413 | u_xor_v_xor_t = mzd_init(64,1); 414 | 415 | uint64_t q_u; 416 | uint64_t q_v; 417 | uint64_t q_t; 418 | uint64_t q_u_xor_v; 419 | uint64_t q_u_xor_v_xor_t; 420 | 421 | #if OPTIM_LIN_ALG 422 | setup_f_MA_MC_elements(); 423 | uint64_t q_alpha_0 = 0; 424 | uint64_t q_gamma_0 = 0; 425 | uint64_t q_alpha_i = 0; 426 | uint64_t q_gamma_i = 0; 427 | uint64_t q_MA_x_v0 = 0; 428 | uint64_t q_MC_x_v0 = 0; 429 | uint64_t q_MA_x_vi = 0; 430 | uint64_t q_MC_x_vi = 0; 431 | uint64_t q_f_MA_x_v0_xor_alpha0 = 0; 432 | uint64_t q_f_MC_x_v0_xor_gamma0 = 0; 433 | alpha_0 = mzd_init(31,1); 434 | gamma_0 = mzd_init(33,1); 435 | #else 436 | alpha = mzd_init(31,1); 437 | gamma = mzd_init(33,1); 438 | #endif 439 | 440 | #if OPTIM_SCHED 441 | select_computation_scheduling(); 442 | #endif 443 | 444 | for(j=lower_bound; j<=upper_bound; j++) { 445 | 446 | q_u = compute_ub_element(j); // DO WE PRECOMPUTE THIS? 447 | 448 | for(i=0; inr_state_recovered == t->nr_targets)) { 451 | goto bye; 452 | } 453 | 454 | // Step 1: Matrix manipulation 455 | DBG_CLOCK_START(0); 456 | 457 | #if OPTIM_LIN_ALG 458 | 459 | if(unlikely(i==0)) { 460 | 461 | q_v = V_elts[0]; 462 | q_u_xor_v = q_u ^ q_v; 463 | 464 | transform_qword_2_vect(q_u_xor_v, u_xor_v); 465 | mzd_mul(alpha_0, MA_mat, u_xor_v, 0); // expensive 466 | mzd_mul(gamma_0, MC_mat, u_xor_v, 0); // expensive 467 | transform_vect_2_qword(alpha_0, &q_alpha_0); 468 | transform_vect_2_qword(gamma_0, &q_gamma_0); 469 | 470 | // Precomputing what comes next 471 | q_MA_x_v0 = f_MA[0]; 472 | q_MC_x_v0 = f_MC[0]; 473 | q_f_MA_x_v0_xor_alpha0 = q_MA_x_v0 ^ q_alpha_0; 474 | q_f_MC_x_v0_xor_gamma0 = q_MC_x_v0 ^ q_gamma_0; 475 | 476 | // Setting the right pointers 477 | q_alpha = q_alpha_0; 478 | q_gamma = q_gamma_0; 479 | 480 | } else { 481 | 482 | q_MA_x_vi = f_MA[i]; 483 | q_MC_x_vi = f_MC[i]; 484 | q_alpha_i = q_MA_x_vi ^ q_f_MA_x_v0_xor_alpha0; 485 | q_gamma_i = q_MC_x_vi ^ q_f_MC_x_v0_xor_gamma0; 486 | 487 | // Setting the right pointers 488 | q_alpha = q_alpha_i; 489 | q_gamma = q_gamma_i; 490 | } 491 | 492 | #else 493 | 494 | q_v = V_elts[i]; 495 | q_u_xor_v = q_u ^ q_v; 496 | 497 | transform_qword_2_vect(q_u_xor_v, u_xor_v); 498 | 499 | mzd_mul(alpha, MA_mat, u_xor_v, 0); // expensive 500 | mzd_mul(gamma, MC_mat, u_xor_v, 0); // expensive 501 | 502 | transform_vect_2_qword(alpha, &q_alpha); 503 | transform_vect_2_qword(gamma, &q_gamma); 504 | 505 | #endif 506 | 507 | DBG_CLOCK_STOP(0); 508 | 509 | // Step 2: bitstream generation 510 | DBG_CLOCK_START(1); 511 | bitstream_regA = RegisterA2(q_alpha, 32); 512 | bitstream_regC = RegisterC2(q_gamma, 32); 513 | DBG_CLOCK_STOP(1); 514 | 515 | #if DEBUG_TESTCASE 516 | if(verbosity && j==27 && i==0) { 517 | printf("---\n"); 518 | print_line_vector(u_xor_v, 64, "[0, 27] u + v = "); // OK 519 | printf("[0, 27] alpha = %lx\n", q_alpha); 520 | printf("[0, 27] gamma = %lx\n", q_gamma); 521 | printf("[0, 27] LA = %lx\n", bitstream_regA); // OK 522 | printf("[0, 27] LC = %lx\n", bitstream_regC); // OK 523 | if(bitstream_regA != (uint32_t)0x25cfab4eeb9bb463) { 524 | printf("[!] Error: LA is incorrect!\n"); 525 | kill(0, SIGSEGV); 526 | } 527 | if(bitstream_regC != (uint32_t)0xd5bfa43aa906154e) { 528 | printf("[!] Error: LC is incorrect!\n"); 529 | kill(0, SIGSEGV); 530 | } 531 | printf("---\n"); 532 | } 533 | #endif 534 | 535 | #if OPTIM_BATCH 536 | // Not super super clean. 537 | for(int c=0; cnr_targets; c++) { 538 | uint64_t q0, key_mask; 539 | if(t->keystream[c].solved) 540 | continue; 541 | q0 = t->keystream[c].bitvector; 542 | key_mask = t->keystream[c].bitmask; 543 | #endif 544 | candidate = bitstream_regA ^ bitstream_regC ^ (uint32_t)q0; 545 | uint32_t x = (uint32_t)(candidate); 546 | 547 | // Step3: searching the pattern. 548 | DBG_CLOCK_START(2); 549 | 550 | uint64_t index_qword; 551 | uint32_t idx1, idx2; 552 | 553 | index_qword = TabIndex[i][(x >> (32-NR_BITS_IDX)) & MASK_IDX]; 554 | idx1 = (index_qword>>32); 555 | idx2 = (index_qword&0xFFFFFFFF); 556 | 557 | uint8_t *p = (uint8_t *)sorted_Tab[i]; 558 | int idx = b_search32(x, (uint32_t *)&p[4*idx1], idx2-idx1+1); 559 | if(idx >= 0) 560 | idx += idx1; 561 | 562 | DBG_CLOCK_STOP(2); 563 | 564 | if(unlikely(idx > 0)) { 565 | 566 | uint32_t recovered_t; 567 | time_t stop = time(NULL); 568 | 569 | nr_candidates++; 570 | DBG_CLOCK_START(3); 571 | 572 | uint64_t tmp64 = 0; 573 | int fd; 574 | fd = fd_sorted_files[i]; 575 | lseek(fd, idx*8, SEEK_SET); // align on 64 bits for potential cache effects. 576 | read(fd, &tmp64, 8); 577 | recovered_t = tmp64 & 0xFFFFFFFF; 578 | 579 | q_t = compute_tac_element(recovered_t); 580 | 581 | #if OPTIM_LIN_ALG 582 | q_v = V_elts[i]; 583 | q_u_xor_v = q_u ^ q_v; 584 | #endif 585 | 586 | q_u_xor_v_xor_t = q_t ^ q_u_xor_v; 587 | 588 | transform_qword_2_vect(q_u_xor_v_xor_t, u_xor_v_xor_t); 589 | mzd_mul(beta, MB_mat, u_xor_v_xor_t, 0); 590 | transform_vect_2_qword(beta, &q_beta); 591 | 592 | bitstream_regA = RegisterA2(q_alpha, 64); 593 | bitstream_regB = RegisterB2(q_beta, 64); 594 | bitstream_regC = RegisterC2(q_gamma, 64); 595 | candidate = bitstream_regA ^ bitstream_regB ^ bitstream_regC; 596 | 597 | if((candidate & key_mask) == (q0 & key_mask)) { 598 | uint64_t q_sol = 0; 599 | transform_vect_2_qword(u_xor_v_xor_t, &q_sol); 600 | #if OPTIM_BATCH 601 | printf("[+] State found for b%02d in %.2fs [%.2fm]!\n", c, (double)(stop-larg->start), (double)(stop-larg->start)/60); 602 | #else 603 | printf("[+] State found in %.2fs [%.2fm]!\n", (double)(stop-larg->start), (double)(stop-larg->start)/60); 604 | #endif 605 | if(unlikely(verbosity)) { 606 | printf("\tUB = %x\n", (uint32_t)j); 607 | printf("\tV = %x\n", i); 608 | printf("\tT = %x\n", recovered_t); 609 | } 610 | printf("\tS = %lx\n", q_sol); 611 | #if OPTIM_BATCH 612 | if(!(t->keystream[c].solved)) 613 | t->nr_state_recovered++; 614 | t->keystream[c].solved++; 615 | #else 616 | if(!(t->keystream[0].solved)) 617 | t->nr_state_recovered++; 618 | t->keystream[0].solved++; 619 | #endif 620 | } 621 | DBG_CLOCK_STOP(3); 622 | } 623 | 624 | #if OPTIM_BATCH 625 | } 626 | #endif 627 | 628 | } 629 | } 630 | 631 | bye: 632 | shmdt(t); 633 | 634 | #if OPTIM_SCHED 635 | select_io_scheduling(); 636 | #endif 637 | 638 | #if DEBUG_TIMING 639 | if(unlikely(verbosity)) 640 | printf("LinAlg: %.2fs, bitstream generation: %.2fs, b-search: %.2fs, false-positives: %.2fs\n", DBG_CLOCK_GET(0), 641 | DBG_CLOCK_GET(1), 642 | DBG_CLOCK_GET(2), 643 | DBG_CLOCK_GET(3)); 644 | #endif 645 | 646 | if(unlikely((verbosity > 1) && nr_candidates)) 647 | printf("\t-> %d candidates occured\n", nr_candidates); 648 | 649 | exit(EXIT_SUCCESS); 650 | } 651 | #endif 652 | 653 | int stg2_state_recovery(void *arg) 654 | { 655 | #if (OPTIM_LOOKUP == OPTIM_LKUP_BSEARCH) 656 | return __stg2_state_recovery_bsearch(arg); 657 | #else 658 | return __stg2_state_recovery_cuckoo(arg); 659 | #endif 660 | } 661 | -------------------------------------------------------------------------------- /source/stage3.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "exploit.h" 6 | 7 | uint32_t apply_f_2_S_fwd(uint8_t *); 8 | uint32_t apply_f_2_S_bwd(uint8_t *); 9 | 10 | // from main.c 11 | extern int verbosity; 12 | 13 | void retrieve_K(uint8_t *K, uint8_t *S_225, uint8_t *IV, int dir) 14 | { 15 | /* 16 | * This function retrieves K based on S_0, the IV and dir. 17 | */ 18 | 19 | uint8_t F[128]; 20 | uint8_t S_0[64]; 21 | uint8_t S_97[64]; 22 | uint8_t S_225b[64]; 23 | uint8_t S_33_p_j[64]; 24 | uint8_t dir_bit = dir&1; 25 | int j; 26 | 27 | memset(F, 0, sizeof(F)); 28 | memset(S_0, 0, sizeof(S_0)); 29 | clock_S_backward(S_97, S_225, F, 128); 30 | clock_S_forward(S_225b, S_97, F, 128); 31 | 32 | for(j=0; j<64; j++) { 33 | memcpy(&F[0], IV, 32); 34 | memcpy(&F[32], &dir_bit, 1); 35 | memcpy(&F[33], K, j); 36 | clock_S_forward(S_33_p_j, S_0, F, 33+j); 37 | K[j] = (S_97[j] ^ apply_f_2_S_fwd(S_33_p_j) ^ S_33_p_j[0])&1; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /source/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "exploit.h" 6 | #include "bitops.h" 7 | #include "gea1.h" 8 | #include "transform.h" 9 | 10 | static __inline__ 11 | void __test_cpu_load(uint32_t nr_cores, uint32_t nr_rounds, uint64_t nr_elements) 12 | { 13 | uint64_t lower_bound, lower_bound_0, last_upper_bound = 0; 14 | uint64_t upper_bound, upper_bound_0; 15 | 16 | lower_bound_0 = 0; 17 | upper_bound_0 = nr_elements-1; 18 | 19 | for(uint32_t i=0; i= lower_bound); 24 | if(i==0 && j==0) { 25 | last_upper_bound = upper_bound; 26 | assert(lower_bound == lower_bound_0); 27 | } else { 28 | assert(lower_bound == (last_upper_bound+1)); 29 | last_upper_bound = upper_bound; 30 | } 31 | } 32 | } 33 | assert(upper_bound == upper_bound_0); 34 | } 35 | 36 | void test_cpu_load() 37 | { 38 | uint64_t array_nr_elements[3] = { NR_V_ELEMENTS_MAX, NR_TAC_ELEMENTS_MAX, NR_UB_ELEMENTS_MAX }; 39 | 40 | for(int i=1; i<201; i++) { 41 | for(int j=2; j<=8; j+=2) { 42 | for(int k=0; k<3; k++) { 43 | if((uint64_t)(i*j) > array_nr_elements[k]) 44 | continue; 45 | __test_cpu_load(i, j, array_nr_elements[k]); 46 | } 47 | } 48 | } 49 | } 50 | 51 | void test_hw() 52 | { 53 | uint64_t i; 54 | int nr_w_1 = 0; 55 | 56 | for(i=0; i<64; i++) { 57 | int w = hamming_weight(1UL<> i)&1)) { 198 | return 1; 199 | } 200 | } 201 | return 0; 202 | } 203 | 204 | int cmp_vectors(mzd_t *a, mzd_t *b, int len) 205 | { 206 | int i, b1, b2; 207 | 208 | for(i=0; i> (8*i)) & 0xff; 637 | b1 = p[i] ^ c[i]; 638 | if(b0 != b1) 639 | return 1; 640 | } 641 | return 0; 642 | } 643 | 644 | 645 | void test_GEA1() 646 | { 647 | uint8_t gcu_plaintext_1[18] = { 0x00, 0x00, 0x00, 0x00, 648 | 0x00, 0x00, 0x00, 0x00, 649 | 0x00, 0x00, 0x00, 0x00, 650 | 0x00, 0x00, 0x00, 0x00, 651 | 0x00, 0x00 }; 652 | 653 | uint8_t gcu_ciphertext_1[18] = { 0x1F, 0xA1, 0x98, 0xAB, 654 | 0x21, 0x14, 0xC3, 0x8A, 655 | 0x9E, 0xBC, 0xCB, 0x63, 656 | 0xAD, 0x48, 0x13, 0xA7, 657 | 0x40, 0xC1 }; 658 | 659 | uint8_t gcu_plaintext_2[18] = { 0x91, 0xE1, 0xDB, 0x43, 660 | 0x0B, 0x86, 0x40, 0x18, 661 | 0xED, 0x59, 0x63, 0x9B, 662 | 0xAB, 0x9A, 0x73, 0xC3, 663 | 0xCD, 0xE6 }; 664 | 665 | uint8_t gcu_ciphertext_2[18] = { 0x2A, 0x26, 0xD8, 0xFB, 666 | 0x64, 0xEC, 0xF3, 0x0C, 667 | 0x14, 0x7F, 0x1F, 0x16, 668 | 0x5E, 0xBC, 0x8B, 0x31, 669 | 0x9B, 0xE6 }; 670 | 671 | uint8_t gcu_plaintext_3[18] = { 0xA8, 0xCA, 0xA6, 0x70, 672 | 0x98, 0x74, 0x82, 0x4D, 673 | 0x5B, 0x80, 0x40, 0x98, 674 | 0xB7, 0x69, 0x36, 0x4F, 675 | 0xD5, 0xAC }; 676 | 677 | uint8_t gcu_ciphertext_3[18] = { 0xB9, 0xA0, 0xF5, 0xDD, 678 | 0x05, 0x48, 0x24, 0xC5, 679 | 0xD8, 0x26, 0xA8, 0xF3, 680 | 0x3D, 0x8C, 0x61, 0x6B, 681 | 0xD1, 0x07 }; 682 | 683 | uint8_t gcu_plaintext_4[18] = { 0x36, 0x20, 0xAA, 0x33, 684 | 0x00, 0x77, 0x59, 0x16, 685 | 0x41, 0xD9, 0xD6, 0xA7, 686 | 0x3B, 0xBC, 0x8C, 0xA6, 687 | 0x53, 0xE4 }; 688 | 689 | uint8_t gcu_ciphertext_4[18] = { 0xE4, 0x00, 0x13, 0xBA, 690 | 0x42, 0xF7, 0x7C, 0xD1, 691 | 0x68, 0x5E, 0xAB, 0x0F, 692 | 0xA9, 0x5B, 0x8F, 0x76, 693 | 0xDC, 0x3F }; 694 | 695 | uint8_t gcu_plaintext_6[18] = { 0x12, 0xC1, 0x11, 0x1A, 696 | 0x6C, 0xB0, 0xF8, 0xD3, 697 | 0xF1, 0x83, 0x06, 0x77, 698 | 0x97, 0xCB, 0x2E, 0xBF, 699 | 0x5B, 0x6C }; 700 | 701 | uint8_t gcu_ciphertext_6[18] = { 0x48, 0xF8, 0x08, 0x7E, 702 | 0x63, 0xEE, 0x3C, 0x59, 703 | 0x6F, 0x42, 0x02, 0xA9, 704 | 0x44, 0xF8, 0xEE, 0x25, 705 | 0xDD, 0xD0 }; 706 | 707 | uint8_t gcu_plaintext_7[18] = { 0xA6, 0x41, 0x88, 0xFB, 708 | 0xB8, 0x2B, 0xAE, 0x69, 709 | 0x41, 0x19, 0xFC, 0x45, 710 | 0x01, 0xA7, 0xB2, 0xEB, 711 | 0xCB, 0xC5 }; 712 | 713 | uint8_t gcu_ciphertext_7[18] = { 0x30, 0x73, 0x6A, 0xD5, 714 | 0x39, 0x13, 0x58, 0x56, 715 | 0x00, 0x22, 0x31, 0xEC, 716 | 0x7F, 0x18, 0x2B, 0x3D, 717 | 0x03, 0x2D }; 718 | 719 | uint16_t gcu_key_1[7] = { 0x0000, 0x0000, 0x0000, 0x0000, 720 | 0x0000, 0x0000, 721 | 0x001c }; 722 | 723 | uint16_t gcu_key_2[7] = { 0xC5F9, 0x7B00, 0x89D3, 0xE84E, 724 | 0xC582, 0xF740, 725 | 0x001E }; 726 | 727 | uint16_t gcu_key_3[7] = { 0x4B65, 0xE3CA, 0xBFCF, 0x78B1, 728 | 0x4F69, 0x88D6, 729 | 0x001E }; 730 | 731 | uint16_t gcu_key_4[7] = { 0x06CF, 0xC095, 0x2794, 0xBE2D, 732 | 0xDEE5, 0x4BE3, 733 | 0x001C }; 734 | 735 | uint16_t gcu_key_6[7] = { 0x8A50, 0x9DAA, 0xF1A7, 0xE0F8, 736 | 0x897C, 0x2CEB, 737 | 0x001C }; 738 | 739 | uint16_t gcu_key_7[7] = { 0xB1D3, 0x590B, 0xDE75, 0xCA23, 740 | 0x2CCC, 0x233E, 741 | 0x001E }; 742 | 743 | uint64_t key; 744 | uint64_t z0, z1, z2, z3, z4, z5; 745 | uint32_t IV; 746 | int ret, dir; 747 | 748 | extract_params(&key, &IV, &dir, gcu_key_1); 749 | z0 = 0; 750 | GEA1(key, IV, dir, &z0); 751 | ret = memcmp_bitstream(z0, gcu_ciphertext_1, gcu_plaintext_1); 752 | assert(ret == 0); 753 | 754 | extract_params(&key, &IV, &dir, gcu_key_2); 755 | z1 = 0; 756 | GEA1(key, IV, dir, &z1); 757 | ret = memcmp_bitstream(z1, gcu_ciphertext_2, gcu_plaintext_2); 758 | assert(ret == 0); 759 | 760 | extract_params(&key, &IV, &dir, gcu_key_3); 761 | z2 = 0; 762 | GEA1(key, IV, dir, &z2); 763 | ret = memcmp_bitstream(z2, gcu_ciphertext_3, gcu_plaintext_3); 764 | assert(ret == 0); 765 | 766 | extract_params(&key, &IV, &dir, gcu_key_4); 767 | z3 = 0; 768 | GEA1(key, IV, dir, &z3); 769 | ret = memcmp_bitstream(z3, gcu_ciphertext_4, gcu_plaintext_4); 770 | assert(ret == 0); 771 | 772 | extract_params(&key, &IV, &dir, gcu_key_6); 773 | z4 = 0; 774 | GEA1(key, IV, dir, &z4); 775 | ret = memcmp_bitstream(z4, gcu_ciphertext_6, gcu_plaintext_6); 776 | assert(ret == 0); 777 | 778 | extract_params(&key, &IV, &dir, gcu_key_7); 779 | z5 = 0; 780 | GEA1(key, IV, dir, &z5); 781 | ret = memcmp_bitstream(z5, gcu_ciphertext_7, gcu_plaintext_7); 782 | assert(ret == 0); 783 | } 784 | -------------------------------------------------------------------------------- /source/timing.h: -------------------------------------------------------------------------------- 1 | #ifndef __TIMING_H__ 2 | #define __TIMING_H__ 3 | 4 | #include "exploit.h" 5 | 6 | // Conversion API 7 | #define MS(x) (1000*(x)) 8 | #define US(x) (1000*1000*(x)) 9 | #define NS(x) (1000*1000*1000*(x)) 10 | 11 | // Timing API 12 | #define CLOCK_VARS(i) clock_t __t1[(i)] = {0}, __t2[(i)] = {0}; \ 13 | double __cpu_time_used[i] = {0}; 14 | #define CLOCK_START(i) __t1[(i)] = clock() 15 | #define CLOCK_STOP(i) __t2[(i)] = clock(); \ 16 | __cpu_time_used[(i)] += ((double)(__t2[(i)] - __t1[(i)])) / CLOCKS_PER_SEC; 17 | #define CLOCK_GET(i) __cpu_time_used[(i)] 18 | 19 | #if DEBUG_TIMING 20 | #define DBG_CLOCK_VARS(i) CLOCK_VARS(i) 21 | #define DBG_CLOCK_START(i) CLOCK_START(i) 22 | #define DBG_CLOCK_STOP(i) CLOCK_STOP(i) 23 | #define DBG_CLOCK_GET(i) CLOCK_GET(i) 24 | #else 25 | #define DBG_CLOCK_VARS(i) 26 | #define DBG_CLOCK_START(i) 27 | #define DBG_CLOCK_STOP(i) 28 | #define DBG_CLOCK_GET(i) 29 | #endif 30 | 31 | // Clocking API 32 | 33 | #endif /* __TIMING_H__ */ 34 | -------------------------------------------------------------------------------- /source/transform.h: -------------------------------------------------------------------------------- 1 | #ifndef __TRANSFORM_H__ 2 | #define __TRANSFORM_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | // TODO: nrows 9 | static __inline__ 10 | void transform_vect_2_dword(mzd_t *C, uint32_t *d) 11 | { 12 | uint32_t tmp = 0; 13 | int i, nr_bits = C->nrows; 14 | 15 | ASSERT(nr_bits <= 32); 16 | 17 | for(i=0; inrows; 30 | 31 | ASSERT(nr_bits <= 64); 32 | 33 | for(i=0; inrows <= 32); 47 | 48 | for(i=0; inrows; i++) { 49 | b = (d >> i) & 0x1; 50 | mzd_write_bit(x, i, 0, b); 51 | } 52 | } 53 | 54 | static __inline__ 55 | void transform_qword_2_vect(uint64_t q, mzd_t *x) 56 | { 57 | int i, b; 58 | 59 | ASSERT(x->nrows <= 64); 60 | 61 | for(i=0; inrows; i++) { 62 | b = (q >> i) & 0x1; 63 | mzd_write_bit(x, i, 0, b); 64 | } 65 | } 66 | 67 | static __inline__ 68 | void transform_list_2_qword(uint8_t *L, int nr_bits, uint64_t *q) 69 | { 70 | uint64_t tmp = 0; 71 | int i; 72 | 73 | ASSERT(nr_bits <= 64); 74 | 75 | for(i=0; i> i) & 0x1; 92 | L[i] = b; 93 | } 94 | } 95 | 96 | static __inline__ 97 | void transform_dword_2_list(uint32_t d, uint8_t *L, int nr_bits) 98 | { 99 | int i, b; 100 | 101 | ASSERT(nr_bits <= 32); 102 | 103 | for(i=0; i> i) & 0x1; 105 | L[i] = b; 106 | } 107 | } 108 | 109 | #endif /* __TRANSFORM_H__ */ 110 | --------------------------------------------------------------------------------