├── LICENSE ├── README.md ├── afl-cmin.md ├── afl-cmin.py ├── tmin.md └── tmin.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # afl-kit 2 | 3 | Collection of utilities for AFL and fuzzing. 4 | 5 | - [afl-cmin.py](afl-cmin.md) Reimplement afl-cmin in python. Use less memory, less disk space, and faster. 6 | - [tmin.py](tmin.md) Similar to afl-tmin, but minimize by different conditions. 7 | 8 | ## License 9 | Apache License 2.0. Copyright 2016 Google Inc. 10 | 11 | This is not an official Google product. 12 | 13 | -------------------------------------------------------------------------------- /afl-cmin.md: -------------------------------------------------------------------------------- 1 | # afl-cmin.py 2 | Reimplement afl-cmin in python. Use less memory, less disk space, and faster. 3 | 4 | ## Features/enhancement 5 | - Support the same command line flags as original afl-cmin. 6 | - dedup by hash of file content in the beginning. 7 | - `-i DIR` can be specified multiple times. Also support globbing. Search files 8 | recursively. 9 | - `--crash-dir DIR` to copy detected crashes (deduplicated by hash) into `DIR`. 10 | - `-T WORKERS` to specify number of workers. 11 | - `--as_queue` output filename like `id:000001,hash:value`. 12 | 13 | So, you can use afl-cmin.py in workflow like this 14 | 15 | 1. Run many instances of afl-fuzz and have multiple queues in `sync_dir.1` directory 16 | 2. `afl-cmin.py -i 'sync_dir.1' -o sync_dir.2/prev/queue --as_queue ...` 17 | 3. Run another batch of afl-fuzz in `sync_dir.2`. They will automatically sync queue from `sync_dir.2/prev/queue`. 18 | 19 | ## Non-scientific performance test: 20 | 21 | ### 2025 experiment 22 | 23 | Following table shows performance numbers (in unit of minutes). Testing 24 | condition: 25 | - 186k unique input files. 73k output files after cmin. 26 | - Tested on machine with 64 cores. 27 | - The fuzzing target uses deferred fork server. Execution speed is about 36/s. 28 | 29 | | worker | afl-cmin | afl-cmin.py
of 2022 | afl-cmin.py | 30 | | --------: | -------: | ------------------------: | ----------: | 31 | | 1 | 162.2 | est. 480.0 | 94.3 | 32 | | 2 | | est. 240.0 | 48.6 | 33 | | 4 | | 121.0 | 24.5 | 34 | | 8 | 94.0 | 61.8 | 12.9 | 35 | | 16 | | 32.0 | 6.8 | 36 | | 32 | | 17.2 | 3.8 | 37 | | 64 | 76.2 | 10.6 | 2.5 | 38 | 39 | New AFL++'s afl-cmin is much faster than 2016 40 | - afl-showmap supports fork server since 2020. 41 | - afl-cmin executes multiple afl-showmap parallelly since 2023. 42 | - However, the data processing after target execution is still single thread. 43 | 44 | As comparison, afl-showmap takes 85 minutes to generate all traces using fork 45 | server. 46 | 47 | afl-pcmin no longer works with afl++, so skip it. 48 | 49 | ### historical experiment as 2016 version 50 | 51 | At that time, afl-showmap didn't support fork server and afl-cmin didn't 52 | support parallel processing. 53 | 54 | program | worker | temp disk (mb) | memory | time (min) 55 | ----------- | ------ | -------------: | ------ | ---------: 56 | afl-cmin | 1 | 9782 | 7.8gb | 27 57 | [afl-pcmin] | 8 | 9762 | 7.8gb | 13.8 58 | afl-cmin.py | 1 | 359 | <50mb | 11.9 59 | afl-cmin.py | 8 | 1136 | <250mb | 1.8 60 | 61 | [afl-pcmin]: https://github.com/bnagy/afl-trivia 62 | 63 | Detail of this table 64 | - the input are 79k unique files, total 472mb. the output are 5k files, total 39mb. 65 | - `temp disk` is the size of `.traces` folder after run with `AFL_KEEP_TRACES=1`. 66 | 67 | -------------------------------------------------------------------------------- /afl-cmin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2016 Google Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | import argparse 17 | import array 18 | import base64 19 | import collections 20 | import glob 21 | import hashlib 22 | import itertools 23 | import logging 24 | import multiprocessing 25 | import os 26 | import shutil 27 | import subprocess 28 | import sys 29 | 30 | try: 31 | from tqdm import tqdm 32 | except ImportError: 33 | print('Hint: install python module "tqdm" to show progress bar') 34 | 35 | class tqdm: 36 | 37 | def __init__(self, data=None, *args, **argd): 38 | self.data = data 39 | 40 | def __iter__(self): 41 | yield from self.data 42 | 43 | def __enter__(self): 44 | return self 45 | 46 | def __exit__(self, exc_type, exc_value, traceback): 47 | pass 48 | 49 | def update(self, *args): 50 | pass 51 | 52 | 53 | parser = argparse.ArgumentParser() 54 | 55 | cpu_count = multiprocessing.cpu_count() 56 | group = parser.add_argument_group('Required parameters') 57 | group.add_argument('-i', 58 | dest='input', 59 | action='append', 60 | metavar='dir', 61 | required=True, 62 | help='input directory with the starting corpus') 63 | group.add_argument('-o', 64 | dest='output', 65 | metavar='dir', 66 | required=True, 67 | help='output directory for minimized files') 68 | 69 | group = parser.add_argument_group('Execution control settings') 70 | group.add_argument('-f', 71 | dest='stdin_file', 72 | metavar='file', 73 | help='location read by the fuzzed program (stdin)') 74 | group.add_argument( 75 | '-m', 76 | dest='memory_limit', 77 | default='none', 78 | metavar='megs', 79 | type=lambda x: x if x == 'none' else int(x), 80 | help='memory limit for child process (default: %(default)s)') 81 | group.add_argument('-t', 82 | dest='time_limit', 83 | default=5000, 84 | metavar='msec', 85 | type=lambda x: x if x == 'none' else int(x), 86 | help='timeout for each run (default: %(default)s)') 87 | group.add_argument('-O', 88 | dest='frida_mode', 89 | action='store_true', 90 | default=False, 91 | help='use binary-only instrumentation (FRIDA mode)') 92 | group.add_argument('-Q', 93 | dest='qemu_mode', 94 | action='store_true', 95 | default=False, 96 | help='use binary-only instrumentation (QEMU mode)') 97 | group.add_argument('-U', 98 | dest='unicorn_mode', 99 | action='store_true', 100 | default=False, 101 | help='use unicorn-based instrumentation (Unicorn mode)') 102 | group.add_argument('-X', 103 | dest='nyx_mode', 104 | action='store_true', 105 | default=False, 106 | help='use Nyx mode') 107 | 108 | group = parser.add_argument_group('Minimization settings') 109 | group.add_argument('--crash-dir', 110 | dest='crash_dir', 111 | metavar='dir', 112 | default=None, 113 | help="move crashes to a separate dir, always deduplicated") 114 | group.add_argument('-A', 115 | dest='allow_any', 116 | action='store_true', 117 | help='allow crashes and timeouts (not recommended)') 118 | group.add_argument('-C', 119 | dest='crash_only', 120 | action='store_true', 121 | help='keep crashing inputs, reject everything else') 122 | group.add_argument('-e', 123 | dest='edge_mode', 124 | action='store_true', 125 | default=False, 126 | help='solve for edge coverage only, ignore hit counts') 127 | 128 | group = parser.add_argument_group('Misc') 129 | group.add_argument('-T', 130 | dest='workers', 131 | type=lambda x: cpu_count if x == 'all' else int(x), 132 | default=1, 133 | help='number of concurrent worker (default: %(default)d)') 134 | group.add_argument('--as_queue', 135 | action='store_true', 136 | help='output file name like "id:000000,hash:value"') 137 | group.add_argument('--no-dedup', 138 | action='store_true', 139 | help='skip deduplication step for corpus files') 140 | group.add_argument('--debug', action='store_true') 141 | 142 | parser.add_argument('exe', metavar='/path/to/target_app') 143 | parser.add_argument('args', nargs='*') 144 | 145 | args = parser.parse_args() 146 | logger = None 147 | afl_showmap_bin = None 148 | tuple_index_type_code = 'I' 149 | file_index_type_code = None 150 | 151 | 152 | def init(): 153 | global logger 154 | log_level = logging.DEBUG if args.debug else logging.INFO 155 | logging.basicConfig(level=log_level, 156 | format='%(asctime)s - %(levelname)s - %(message)s') 157 | logger = logging.getLogger(__name__) 158 | 159 | if args.stdin_file and args.workers > 1: 160 | logger.error('-f is only supported with one worker (-T 1)') 161 | sys.exit(1) 162 | 163 | if args.memory_limit != 'none' and args.memory_limit < 5: 164 | logger.error('dangerously low memory limit') 165 | sys.exit(1) 166 | 167 | if args.time_limit != 'none' and args.time_limit < 10: 168 | logger.error('dangerously low timeout') 169 | sys.exit(1) 170 | 171 | if not os.path.isfile(args.exe): 172 | logger.error('binary "%s" not found or not regular file', args.exe) 173 | sys.exit(1) 174 | 175 | if not os.environ.get('AFL_SKIP_BIN_CHECK') and not any( 176 | [args.qemu_mode, args.frida_mode, args.unicorn_mode, args.nyx_mode]): 177 | if b'__AFL_SHM_ID' not in open(args.exe, 'rb').read(): 178 | logger.error("binary '%s' doesn't appear to be instrumented", 179 | args.exe) 180 | sys.exit(1) 181 | 182 | for dn in args.input: 183 | if not os.path.isdir(dn) and not glob.glob(dn): 184 | logger.error('directory "%s" not found', dn) 185 | sys.exit(1) 186 | 187 | global afl_showmap_bin 188 | searches = [ 189 | None, 190 | os.path.dirname(__file__), 191 | os.getcwd(), 192 | ] 193 | if os.environ.get('AFL_PATH'): 194 | searches.append(os.environ['AFL_PATH']) 195 | 196 | for search in searches: 197 | afl_showmap_bin = shutil.which('afl-showmap', path=search) 198 | if afl_showmap_bin: 199 | break 200 | if not afl_showmap_bin: 201 | logger.fatal('cannot find afl-showmap, please set AFL_PATH') 202 | sys.exit(1) 203 | 204 | trace_dir = os.path.join(args.output, '.traces') 205 | shutil.rmtree(trace_dir, ignore_errors=True) 206 | try: 207 | os.rmdir(args.output) 208 | except OSError: 209 | pass 210 | if os.path.exists(args.output): 211 | logger.error( 212 | 'directory "%s" exists and is not empty - delete it first', 213 | args.output) 214 | sys.exit(1) 215 | if args.crash_dir and not os.path.exists(args.crash_dir): 216 | os.makedirs(args.crash_dir) 217 | os.makedirs(trace_dir) 218 | 219 | logger.info('use %d workers (-T)', args.workers) 220 | 221 | 222 | def detect_type_code(size): 223 | for type_code in ['B', 'H', 'I', 'L', 'Q']: 224 | if 256**array.array(type_code).itemsize > size: 225 | return type_code 226 | 227 | 228 | def afl_showmap(input_path=None, batch=None, afl_map_size=None, first=False): 229 | assert input_path or batch 230 | # yapf: disable 231 | cmd = [ 232 | afl_showmap_bin, 233 | '-m', str(args.memory_limit), 234 | '-t', str(args.time_limit), 235 | '-Z', # cmin mode 236 | ] 237 | # yapf: enable 238 | found_atat = False 239 | for arg in args.args: 240 | if '@@' in arg: 241 | found_atat = True 242 | 243 | if args.stdin_file: 244 | assert args.workers == 1 245 | input_from_file = True 246 | stdin_file = args.stdin_file 247 | cmd += ['-H', stdin_file] 248 | elif found_atat: 249 | input_from_file = True 250 | stdin_file = os.path.join(args.output, f'.input.{os.getpid()}') 251 | cmd += ['-H', stdin_file] 252 | else: 253 | input_from_file = False 254 | 255 | if batch: 256 | input_from_file = True 257 | filelist = os.path.join(args.output, f'.filelist.{os.getpid()}') 258 | with open(filelist, 'w') as f: 259 | for _, path in batch: 260 | f.write(path + '\n') 261 | cmd += ['-I', filelist] 262 | output_path = os.path.join(args.output, f'.showmap.{os.getpid()}') 263 | cmd += ['-o', output_path] 264 | else: 265 | if input_from_file: 266 | shutil.copy(input_path, stdin_file) 267 | cmd += ['-o', '-'] 268 | 269 | if args.frida_mode: 270 | cmd += ['-O'] 271 | if args.qemu_mode: 272 | cmd += ['-Q'] 273 | if args.unicorn_mode: 274 | cmd += ['-U'] 275 | if args.nyx_mode: 276 | cmd += ['-X'] 277 | if args.edge_mode: 278 | cmd += ['-e'] 279 | cmd += ['--', args.exe] + args.args 280 | 281 | env = os.environ.copy() 282 | env['AFL_QUIET'] = '1' 283 | env['ASAN_OPTIONS'] = 'detect_leaks=0' 284 | if first: 285 | logger.debug('run command line: %s', subprocess.list2cmdline(cmd)) 286 | env['AFL_CMIN_ALLOW_ANY'] = '1' 287 | if afl_map_size: 288 | env['AFL_MAP_SIZE'] = str(afl_map_size) 289 | if args.crash_only: 290 | env['AFL_CMIN_CRASHES_ONLY'] = '1' 291 | if args.allow_any: 292 | env['AFL_CMIN_ALLOW_ANY'] = '1' 293 | 294 | if input_from_file: 295 | p = subprocess.Popen(cmd, 296 | stdout=subprocess.PIPE, 297 | env=env, 298 | bufsize=1048576) 299 | else: 300 | p = subprocess.Popen(cmd, 301 | stdin=open(input_path, 'rb'), 302 | stdout=subprocess.PIPE, 303 | env=env, 304 | bufsize=1048576) 305 | out = p.stdout.read() 306 | p.wait() 307 | 308 | if batch: 309 | result = [] 310 | for idx, input_path in batch: 311 | basename = os.path.basename(input_path) 312 | values = [] 313 | try: 314 | trace_file = os.path.join(output_path, basename) 315 | with open(trace_file, 'r') as f: 316 | values = list(map(int, f)) 317 | crashed = len(values) == 0 318 | os.unlink(trace_file) 319 | except FileNotFoundError: 320 | a = None 321 | crashed = True 322 | values = [(t // 1000) * 9 + t % 1000 for t in values] 323 | a = array.array(tuple_index_type_code, values) 324 | result.append((idx, a, crashed)) 325 | os.unlink(filelist) 326 | os.rmdir(output_path) 327 | return result 328 | else: 329 | values = [] 330 | for line in out.split(): 331 | if not line.isdigit(): 332 | continue 333 | values.append(int(line)) 334 | values = [(t // 1000) * 9 + t % 1000 for t in values] 335 | a = array.array(tuple_index_type_code, values) 336 | crashed = p.returncode in [2, 3] 337 | if input_from_file and stdin_file != args.stdin_file: 338 | os.unlink(stdin_file) 339 | return a, crashed 340 | 341 | 342 | class JobDispatcher(multiprocessing.Process): 343 | 344 | def __init__(self, job_queue, jobs): 345 | super().__init__() 346 | self.job_queue = job_queue 347 | self.jobs = jobs 348 | 349 | def run(self): 350 | for job in self.jobs: 351 | self.job_queue.put(job) 352 | self.job_queue.close() 353 | 354 | 355 | class Worker(multiprocessing.Process): 356 | 357 | def __init__(self, idx, afl_map_size, q_in, p_out, r_out): 358 | super().__init__() 359 | self.idx = idx 360 | self.afl_map_size = afl_map_size 361 | self.q_in = q_in 362 | self.p_out = p_out 363 | self.r_out = r_out 364 | 365 | def run(self): 366 | map_size = self.afl_map_size or 65536 367 | max_tuple = map_size * 9 368 | max_file_index = 256**array.array(file_index_type_code).itemsize - 1 369 | m = array.array(file_index_type_code, [max_file_index] * max_tuple) 370 | counter = collections.Counter() 371 | crashes = [] 372 | 373 | pack_name = os.path.join(args.output, '.traces', f'{self.idx}.pack') 374 | pack_pos = 0 375 | with open(pack_name, 'wb') as trace_pack: 376 | while True: 377 | batch = self.q_in.get() 378 | if batch is None: 379 | break 380 | 381 | for idx, r, crash in afl_showmap( 382 | batch=batch, afl_map_size=self.afl_map_size): 383 | counter.update(r) 384 | 385 | used = False 386 | 387 | if crash: 388 | crashes.append(idx) 389 | 390 | # If we aren't saving crashes to a separate dir, handle them 391 | # the same as other inputs. However, unless AFL_CMIN_ALLOW_ANY=1, 392 | # afl_showmap will not return any coverage for crashes so they will 393 | # never be retained. 394 | if not crash or not args.crash_dir: 395 | for t in r: 396 | if idx < m[t]: 397 | m[t] = idx 398 | used = True 399 | 400 | if used: 401 | tuple_count = len(r) 402 | r.tofile(trace_pack) 403 | self.p_out.put((idx, self.idx, pack_pos, tuple_count)) 404 | pack_pos += tuple_count * r.itemsize 405 | else: 406 | self.p_out.put(None) 407 | 408 | self.r_out.put((self.idx, m, counter, crashes)) 409 | 410 | 411 | class CombineTraceWorker(multiprocessing.Process): 412 | 413 | def __init__(self, pack_name, jobs, r_out): 414 | super().__init__() 415 | self.pack_name = pack_name 416 | self.jobs = jobs 417 | self.r_out = r_out 418 | 419 | def run(self): 420 | already_have = set() 421 | with open(self.pack_name, 'rb') as f: 422 | for pos, tuple_count in self.jobs: 423 | f.seek(pos) 424 | result = array.array(tuple_index_type_code) 425 | result.fromfile(f, tuple_count) 426 | already_have.update(result) 427 | self.r_out.put(already_have) 428 | 429 | 430 | def hash_file(path): 431 | m = hashlib.sha1() 432 | with open(path, 'rb') as f: 433 | m.update(f.read()) 434 | return m.digest() 435 | 436 | 437 | def dedup(files): 438 | with multiprocessing.Pool(args.workers) as pool: 439 | seen_hash = set() 440 | result = [] 441 | hash_list = [] 442 | # use large chunksize to reduce multiprocessing overhead 443 | chunksize = max(1, min(256, len(files) // args.workers)) 444 | for i, h in enumerate( 445 | tqdm(pool.imap(hash_file, files, chunksize), 446 | desc='dedup', 447 | total=len(files), 448 | ncols=0, 449 | leave=(len(files) > 100000))): 450 | if h in seen_hash: 451 | continue 452 | seen_hash.add(h) 453 | result.append(files[i]) 454 | hash_list.append(h) 455 | return result, hash_list 456 | 457 | 458 | def is_afl_dir(dirnames, filenames): 459 | return ('queue' in dirnames and 'hangs' in dirnames 460 | and 'crashes' in dirnames and 'fuzzer_setup' in filenames) 461 | 462 | 463 | def collect_files(input_paths): 464 | paths = [] 465 | for s in input_paths: 466 | paths += glob.glob(s) 467 | 468 | files = [] 469 | with tqdm(desc='search', unit=' files', ncols=0) as pbar: 470 | for path in paths: 471 | for root, dirnames, filenames in os.walk(path, followlinks=True): 472 | for dirname in dirnames: 473 | if dirname.startswith('.'): 474 | dirnames.remove(dirname) 475 | 476 | if not args.crash_only and is_afl_dir(dirnames, filenames): 477 | continue 478 | 479 | for filename in filenames: 480 | if filename.startswith('.'): 481 | continue 482 | pbar.update(1) 483 | files.append(os.path.join(root, filename)) 484 | return files 485 | 486 | 487 | def main(): 488 | init() 489 | 490 | files = collect_files(args.input) 491 | if len(files) == 0: 492 | logger.error('no inputs in the target directory - nothing to be done') 493 | sys.exit(1) 494 | logger.info('Found %d input files in %d directories', len(files), 495 | len(args.input)) 496 | 497 | if not args.no_dedup: 498 | files, hash_list = dedup(files) 499 | logger.info('Remain %d files after dedup', len(files)) 500 | else: 501 | logger.info('Skipping file deduplication.') 502 | 503 | global file_index_type_code 504 | file_index_type_code = detect_type_code(len(files)) 505 | 506 | logger.info('Sorting files.') 507 | with multiprocessing.Pool(args.workers) as pool: 508 | chunksize = max(1, min(512, len(files) // args.workers)) 509 | size_list = list(pool.map(os.path.getsize, files, chunksize)) 510 | idxes = sorted(range(len(files)), key=lambda x: size_list[x]) 511 | files = [files[idx] for idx in idxes] 512 | hash_list = [hash_list[idx] for idx in idxes] 513 | 514 | afl_map_size = None 515 | if b'AFL_DUMP_MAP_SIZE' in open(args.exe, 'rb').read(): 516 | output = subprocess.run([args.exe], 517 | capture_output=True, 518 | env={ 519 | 'AFL_DUMP_MAP_SIZE': '1' 520 | }).stdout 521 | afl_map_size = int(output) 522 | logger.info('Setting AFL_MAP_SIZE=%d', afl_map_size) 523 | 524 | global tuple_index_type_code 525 | tuple_index_type_code = detect_type_code(afl_map_size * 9) 526 | 527 | logger.info('Testing the target binary') 528 | tuples, _ = afl_showmap(files[0], afl_map_size=afl_map_size, first=True) 529 | if tuples: 530 | logger.info('ok, %d tuples recorded', len(tuples)) 531 | else: 532 | logger.error('no instrumentation output detected') 533 | sys.exit(1) 534 | 535 | job_queue = multiprocessing.Queue() 536 | progress_queue = multiprocessing.Queue() 537 | result_queue = multiprocessing.Queue() 538 | 539 | workers = [] 540 | for i in range(args.workers): 541 | p = Worker(i, afl_map_size, job_queue, progress_queue, result_queue) 542 | p.start() 543 | workers.append(p) 544 | 545 | chunk = max(1, min(128, len(files) // args.workers)) 546 | jobs = list(itertools.batched(enumerate(files), chunk)) 547 | jobs += [None] * args.workers # sentinel 548 | 549 | dispatcher = JobDispatcher(job_queue, jobs) 550 | dispatcher.start() 551 | 552 | logger.info('Processing traces') 553 | effective = 0 554 | trace_info = {} 555 | for _ in tqdm(files, ncols=0, smoothing=0.01): 556 | r = progress_queue.get() 557 | if r is not None: 558 | idx, worker_idx, pos, tuple_count = r 559 | trace_info[idx] = worker_idx, pos, tuple_count 560 | effective += 1 561 | dispatcher.join() 562 | 563 | logger.info('Obtaining trace results') 564 | ms = [] 565 | crashes = [] 566 | counter = collections.Counter() 567 | for _ in tqdm(range(args.workers), ncols=0): 568 | idx, m, c, crs = result_queue.get() 569 | ms.append(m) 570 | counter.update(c) 571 | crashes.extend(crs) 572 | workers[idx].join() 573 | best_idxes = list(map(min, zip(*ms))) 574 | 575 | if not args.crash_dir: 576 | logger.info('Found %d unique tuples across %d files (%d effective)', 577 | len(counter), len(files), effective) 578 | else: 579 | logger.info( 580 | 'Found %d unique tuples across %d files (%d effective, %d crashes)', 581 | len(counter), len(files), effective, len(crashes)) 582 | all_unique = counter.most_common() 583 | 584 | logger.info('Processing candidates and writing output') 585 | already_have = set() 586 | count = 0 587 | 588 | def save_file(idx): 589 | input_path = files[idx] 590 | fn = (base64.b16encode(hash_list[idx]).decode('utf8').lower() 591 | if not args.no_dedup else os.path.basename(input_path)) 592 | if args.as_queue: 593 | if args.no_dedup: 594 | fn = 'id:%06d,orig:%s' % (count, fn) 595 | else: 596 | fn = 'id:%06d,hash:%s' % (count, fn) 597 | output_path = os.path.join(args.output, fn) 598 | try: 599 | os.link(input_path, output_path) 600 | except OSError: 601 | shutil.copy(input_path, output_path) 602 | 603 | jobs = [[] for i in range(args.workers)] 604 | saved = set() 605 | for t, c in all_unique: 606 | if c != 1: 607 | continue 608 | idx = best_idxes[t] 609 | if idx in saved: 610 | continue 611 | save_file(idx) 612 | saved.add(idx) 613 | count += 1 614 | 615 | worker_idx, pos, tuple_count = trace_info[idx] 616 | job = (pos, tuple_count) 617 | jobs[worker_idx].append(job) 618 | 619 | trace_packs = [] 620 | workers = [] 621 | for i in range(args.workers): 622 | pack_name = os.path.join(args.output, '.traces', f'{i}.pack') 623 | trace_f = open(pack_name, 'rb') 624 | trace_packs.append(trace_f) 625 | 626 | p = CombineTraceWorker(pack_name, jobs[i], result_queue) 627 | p.start() 628 | workers.append(p) 629 | 630 | for _ in range(args.workers): 631 | result = result_queue.get() 632 | already_have.update(result) 633 | 634 | for t, c in tqdm(list(reversed(all_unique)), ncols=0): 635 | if t in already_have: 636 | continue 637 | 638 | idx = best_idxes[t] 639 | save_file(idx) 640 | count += 1 641 | 642 | worker_idx, pos, tuple_count = trace_info[idx] 643 | trace_pack = trace_packs[worker_idx] 644 | trace_pack.seek(pos) 645 | result = array.array(tuple_index_type_code) 646 | result.fromfile(trace_pack, tuple_count) 647 | 648 | already_have.update(result) 649 | 650 | for f in trace_packs: 651 | f.close() 652 | 653 | if args.crash_dir: 654 | logger.info('Saving crashes to %s', args.crash_dir) 655 | crash_files = [files[c] for c in crashes] 656 | 657 | if args.no_dedup: 658 | # Unless we deduped previously, we have to dedup the crash files 659 | # now. 660 | crash_files, hash_list = dedup(crash_files) 661 | 662 | for idx, crash_path in enumerate(crash_files): 663 | fn = base64.b16encode(hash_list[idx]).decode('utf8').lower() 664 | output_path = os.path.join(args.crash_dir, fn) 665 | try: 666 | os.link(crash_path, output_path) 667 | except OSError: 668 | try: 669 | shutil.copy(crash_path, output_path) 670 | except shutil.Error: 671 | # This error happens when src and dest are hardlinks of the 672 | # same file. We have nothing to do in this case, but handle 673 | # it gracefully. 674 | pass 675 | 676 | if count == 1: 677 | logger.warning('all test cases had the same traces, check syntax!') 678 | logger.info('narrowed down to %s files, saved in "%s"', count, args.output) 679 | if not os.environ.get('AFL_KEEP_TRACES'): 680 | logger.info('Deleting trace files') 681 | trace_dir = os.path.join(args.output, '.traces') 682 | shutil.rmtree(trace_dir, ignore_errors=True) 683 | 684 | 685 | if __name__ == '__main__': 686 | main() 687 | -------------------------------------------------------------------------------- /tmin.md: -------------------------------------------------------------------------------- 1 | # tmin.py 2 | Similar to afl-tmin, but minimize by different conditions. 3 | 4 | ## Features/enhancement 5 | - Support similar command line flags as afl-tmin. 6 | - Use similar minimization heuristic as afl-tmin. 7 | - Instead of classifying input by coverage, tmin.py classifies input by 8 | program output and terminal conditions. Supportted conditions: 9 | * `--stdout`: stdout contains given string 10 | * `--stderr`: stderr contains given string 11 | * `--crash`: program terminated by any signal 12 | * `--returncode`: program exits with given returncode 13 | * `--signal`: program terminated by given signal 14 | * `--timeout`: program terminated due to timeout 15 | 16 | ## Examples 17 | - Minimize input while makes sure [exploitable] still output `EXPLOITABLE`. 18 | 19 | `tmin.py -i file.in -o file.out -m none --stdout "'EXPLOITABLE'" -- ~/src/exploitable/triage.py './w3m -T text/html -dump @@'` 20 | 21 | - Minimize input while makes sure the program is still killed by SIGABRT (i.e. assert() fail) 22 | 23 | `tmin.py -i file.in -o file.out --signal 6 -- /path/to/program @@` 24 | 25 | [exploitable]: https://github.com/jfoote/exploitable 26 | -------------------------------------------------------------------------------- /tmin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2017 Google Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | # TODO support exploitable 17 | # TODO support valgrind ? 18 | # TODO support showmap 19 | # TODO ddmin: https://github.com/MarkusTeufelberger/afl-ddmin-mod 20 | # TODO find similar variant but doesn't meet condition 21 | # TODO use wait4() to get child resource 22 | # TODO don't use communicate(), don't keep full stdout, stderr in memory 23 | # TODO how to customize env properly? 24 | # TODO detect asan/msan/ubsan automatically: __asan_default_options __msan_default_options __ubsan_default_options 25 | import argparse 26 | import ctypes 27 | import logging 28 | import os 29 | import re 30 | import resource 31 | import signal 32 | import subprocess 33 | import sys 34 | import tempfile 35 | import textwrap 36 | import time 37 | 38 | MSAN_ERROR = 86 39 | LSAN_ERROR = 23 40 | 41 | g_execs = 0 42 | 43 | logger = None 44 | 45 | 46 | def create_argument_parser(): 47 | parser = argparse.ArgumentParser( 48 | description='General test case minimizer', 49 | formatter_class=argparse.RawDescriptionHelpFormatter, 50 | epilog=textwrap.dedent('''\ 51 | Example command: 52 | %(prog)s -i file.in -o file.out -m none --stdout "'EXPLOITABLE'" -- ~/src/exploitable/triage.py './w3m -T text/html -dump @@' 53 | ''')) 54 | group = parser.add_argument_group('Required parameters') 55 | group.add_argument( 56 | '-i', dest='input', metavar='file', required=True, help='Input file') 57 | group.add_argument( 58 | '-o', dest='output', metavar='file', required=True, help='Output file') 59 | 60 | group = parser.add_argument_group('Execution control settings') 61 | group.add_argument( 62 | '-t', 63 | dest='time_limit', 64 | type=int, 65 | default=1000, 66 | metavar='msec', 67 | help='timeout for each run (default: %(default)s)') 68 | group.add_argument( 69 | '-m', 70 | dest='memory_limit', 71 | default='none', 72 | metavar='megs', 73 | help='memory limit for child process (default: %(default)s)') 74 | 75 | group = parser.add_argument_group('Output conditions') 76 | group.add_argument('--stdout', metavar='STR', action='append', default=[], 77 | help='If stdout contains STR; --stdout could be specified multiple times.') 78 | group.add_argument('--stderr', metavar='STR', action='append', default=[], 79 | help='If stderr contains STR; --stderr could be specified multiple times.') 80 | 81 | group = parser.add_argument_group('Terminal condition') 82 | group.add_argument( 83 | '--auto', 84 | action='store_true', 85 | help='guess terminal condition by dry runs') 86 | group.add_argument( 87 | '--crash', 88 | action='store_true', 89 | help='Crash (got signal or MSAN error)') 90 | group.add_argument('--returncode', type=int) 91 | group.add_argument('--signal', type=int) 92 | group.add_argument( 93 | '-H', 94 | '--timeout', 95 | action='store_true', 96 | help='Execution time longer than timeout value specified by -t') 97 | # TODO how to detect? will be caught by --returncode now. 98 | #group.add_argument( 99 | # '--memory_exceeded', 100 | # action='store_true', 101 | # help='Used more memory than size specified by -m') 102 | 103 | group = parser.add_argument_group('How to deal flaky case') 104 | group.add_argument( 105 | '--try', dest='try_', metavar='N', type=int, help='try few times (default: %(default)s)', default=1) 106 | group.add_argument( 107 | '--all', 108 | action='store_true', 109 | help='only keep if all trials meet conditions; default is any') 110 | 111 | group = parser.add_argument_group('Misc conditions') 112 | # TODO not implemented yet. For now, use afl-tmin instead. 113 | #group.add_argument('--aflmap', action='store_true', default=False) 114 | 115 | parser.add_argument( 116 | '--debug', 117 | action='store_true', 118 | help='Show detail information for debugging %(prog)s') 119 | parser.add_argument('--verbose', '-v', action='count', default=0) 120 | parser.add_argument('--no-aslr', action='store_true', help='Disable ASLR') 121 | parser.add_argument('--dryrun', action='store_true') 122 | 123 | parser.add_argument('args', nargs='+') 124 | 125 | return parser 126 | 127 | 128 | def setlimits(opts): 129 | if opts.memory_limit.isdigit(): 130 | m = int(opts.memory_limit) * 2**20 131 | resource.setrlimit(resource.RLIMIT_AS, (m, m)) 132 | resource.setrlimit(resource.RLIMIT_DATA, (m, m)) 133 | resource.setrlimit(resource.RLIMIT_STACK, (m, m)) 134 | resource.setrlimit(resource.RLIMIT_RSS, (m, m)) 135 | 136 | 137 | def run_target_once(opts, data, filename=None): 138 | """ 139 | return True if match condition 140 | """ 141 | global g_execs 142 | logger.debug('run %d', len(data)) 143 | 144 | pid = os.getpid() 145 | try: 146 | if filename: 147 | tmp_fn = filename 148 | else: 149 | # because we closed the fd and lose the lock, add pid as prefix to avoid 150 | # racing. 151 | tmp_fd, tmp_fn = tempfile.mkstemp(prefix='tmin-%d' % os.getpid()) 152 | os.write(tmp_fd, data) 153 | os.close(tmp_fd) 154 | 155 | found_input_file = False 156 | cmd = opts.args[:] 157 | for i in range(len(cmd)): 158 | if '@@' in cmd[i]: 159 | found_input_file = True 160 | cmd[i] = cmd[i].replace('@@', tmp_fn) 161 | 162 | env = os.environ.copy() 163 | #env = {} 164 | env.setdefault( 165 | 'ASAN_OPTIONS', 166 | ':'.join([ 167 | 'abort_on_error=1', 168 | 'detect_leaks=0', 169 | 'symbolize=0', 170 | 'allocator_may_return_null=1', 171 | 'detect_odr_violation=0', 172 | 'print_scariness=1', 173 | 'handle_segv=0', 174 | 'handle_sigbus=0', 175 | 'handle_abort=0', 176 | 'handle_sigfpe=0', 177 | 'handle_sigill=0', 178 | ])) 179 | env.setdefault('MSAN_OPTIONS', ':'.join([ 180 | 'exit_code=%d' % MSAN_ERROR, 181 | 'abort_on_error=1', 182 | 'msan_track_origins=0', 183 | 'allocator_may_return_null=1', 184 | 'symbolize=0', 185 | 'handle_segv=0', 186 | 'handle_sigbus=0', 187 | 'handle_abort=0', 188 | 'handle_sigfpe=0', 189 | 'handle_sigill=0', 190 | ])) 191 | env.setdefault( 192 | 'UBSAN_OPTIONS', 193 | ':'.join([ 194 | 'halt_on_error=1', 195 | 'abort_on_error=1', 196 | 'malloc_context_size=0', 197 | 'allocator_may_return_null=1', 198 | 'symbolize=0', 199 | 'handle_segv=0', 200 | 'handle_sigbus=0', 201 | 'handle_abort=0', 202 | 'handle_sigfpe=0', 203 | 'handle_sigill=0', 204 | ])) 205 | env.setdefault( 206 | 'LSAN_OPTIONS', 207 | ':'.join([ 208 | 'exit_code=%d' % LSAN_ERROR, 209 | 'fast_unwind_on_malloc=0', 210 | 'symbolize=0', 211 | 'print_suppressions=0', 212 | ])) 213 | # for stack protector 214 | env['LIBC_FATAL_STDERR_'] = '1' 215 | 216 | if 'AFL_PRELOAD' in env: 217 | env['LD_PRELOAD'] = env['AFL_PRELOAD'] 218 | 219 | if g_execs == 0: 220 | for k, v in sorted(env.items()): 221 | if os.environ.get(k) == v: 222 | continue 223 | logger.debug('env[%r]=%r', k, v) 224 | logger.debug('run command line=%s', subprocess.list2cmdline(cmd)) 225 | 226 | if found_input_file: 227 | stdin_handle = None 228 | else: 229 | stdin_handle = subprocess.PIPE 230 | if opts.stdout or g_execs == 0: 231 | stdout_handle = subprocess.PIPE 232 | else: 233 | stdout_handle = open('/dev/null', 'w') 234 | if opts.stderr or g_execs == 0: 235 | stderr_handle = subprocess.PIPE 236 | else: 237 | stderr_handle = open('/dev/null', 'w') 238 | 239 | p = subprocess.Popen( 240 | cmd, 241 | stdin=stdin_handle, 242 | stdout=stdout_handle, 243 | stderr=stderr_handle, 244 | env=env, 245 | preexec_fn=lambda: setlimits(opts)) 246 | 247 | t0 = time.time() 248 | try: 249 | if opts.time_limit: 250 | 251 | def kill_child(*_args): 252 | try: 253 | p.kill() 254 | except OSError: 255 | pass 256 | 257 | signal.signal(signal.SIGALRM, kill_child) 258 | signal.setitimer(signal.ITIMER_REAL, 0.001 * opts.time_limit) 259 | if not found_input_file: 260 | stdout, stderr = p.communicate(data) 261 | else: 262 | stdout, stderr = p.communicate() 263 | finally: 264 | signal.setitimer(signal.ITIMER_REAL, 0) 265 | signal.signal(signal.SIGALRM, signal.SIG_DFL) 266 | t1 = time.time() 267 | crashed = p.returncode < 0 or p.returncode == MSAN_ERROR 268 | finally: 269 | # XXX why? 270 | if not filename: 271 | if pid == os.getpid(): 272 | os.unlink(tmp_fn) 273 | 274 | #logger.info('stdout begin =====') 275 | #logger.info('%s', stdout) 276 | #logger.info('stdout end =====') 277 | #logger.info('stderr begin =====') 278 | #logger.info('%s', stderr) 279 | #logger.info('stderr end =====') 280 | if g_execs == 0 and stderr: 281 | logger.info('stderr begin (last 8kb, 100 lines) =====') 282 | stderr_brief = b'\n'.join(stderr.splitlines()[-100:]) 283 | try: 284 | stderr_brief = stderr_brief.decode('utf8') 285 | except UnicodeDecodeError: 286 | # keep stderr be bytes if it is not utf8. 287 | pass 288 | stderr_brief = stderr_brief[-8192:] 289 | logger.info('%s', stderr_brief) 290 | logger.info('stderr end =====') 291 | if g_execs == 0: 292 | logger.info('returncode=%d, t=%.3fs', p.returncode, t1 - t0) 293 | else: 294 | logger.debug('returncode=%d, t=%.3fs', p.returncode, t1 - t0) 295 | 296 | g_execs += 1 297 | 298 | if g_execs == 1 and opts.auto: 299 | auto_found = False 300 | opts.timeout = opts.time_limit < (t1 - t0) * 1000 301 | if opts.timeout: 302 | logger.info('AUTO: timeout=%s', opts.timeout) 303 | auto_found = True 304 | if p.returncode < 0: 305 | opts.signal = -p.returncode 306 | logger.info('AUTO: signal=%s', opts.signal) 307 | auto_found = True 308 | if p.returncode >= 0: 309 | opts.returncode = p.returncode 310 | if opts.returncode != 0: 311 | logger.info('AUTO: returncode=%s', opts.returncode) 312 | auto_found = True 313 | m = re.search(br'ERROR: AddressSanitizer: (.+) on', stderr) 314 | if m: 315 | opts.stderr += [m.group()] 316 | m = re.search(br'(READ|WRITE) of size ', stderr) 317 | if m: 318 | opts.stderr.append(m.group()) 319 | m = re.search(br'(SCARINESS: \d+ .*)', stderr) 320 | if m: 321 | opts.stderr.append(m.group()) 322 | logger.info('AUTO: stderr=%r', opts.stderr) 323 | auto_found = True 324 | if not auto_found: 325 | logger.error('failed to detect error conditions automatically (--auto)') 326 | return False 327 | 328 | for s in opts.stdout: 329 | if s not in stdout: 330 | return False 331 | for s in opts.stderr: 332 | if s not in stderr: 333 | return False 334 | 335 | if opts.returncode is not None and opts.returncode != p.returncode: 336 | return False 337 | if opts.timeout and opts.timeout != (opts.time_limit < (t1 - t0) * 1000): 338 | return False 339 | if opts.signal and (p.returncode > 0 or opts.signal != -p.returncode): 340 | return False 341 | if opts.crash and not crashed: 342 | return False 343 | 344 | assert data, 'Even empty file meets required conditions! Please check again' 345 | 346 | return True 347 | 348 | 349 | def run_target(opts, data, filename=None): 350 | """ 351 | return True if match condition 352 | """ 353 | for _ in range(opts.try_): 354 | if run_target_once(opts, data, filename=filename): 355 | return True 356 | return False 357 | 358 | 359 | def next_p2(v): 360 | r = 1 361 | while v > r: 362 | r *= 2 363 | return r 364 | 365 | 366 | TMIN_SET_MIN_SIZE = 4 367 | TMIN_SET_STEPS = 128 368 | TRIM_START_STEPS = 16 369 | 370 | 371 | def step_normalization(opts, data, dummy_char): 372 | in_len = len(data) 373 | 374 | alpha_del0 = 0 375 | 376 | set_len = next_p2(in_len / TMIN_SET_STEPS) 377 | set_pos = 0 378 | 379 | if set_len < TMIN_SET_MIN_SIZE: 380 | set_len = TMIN_SET_MIN_SIZE 381 | 382 | # TODO optimization? 383 | while set_pos < in_len: 384 | use_len = min(set_len, in_len - set_pos) 385 | if data[set_pos:set_pos + use_len].count(dummy_char) != use_len: 386 | tmp_buf = data[:set_pos] + dummy_char * use_len + data[set_pos+use_len:] 387 | 388 | if run_target(opts, tmp_buf): 389 | data = tmp_buf 390 | alpha_del0 += use_len 391 | 392 | set_pos += set_len 393 | 394 | return data, alpha_del0 395 | 396 | 397 | def step_block_deletion(opts, data): 398 | in_len = len(data) 399 | 400 | del_len = next_p2(in_len / TRIM_START_STEPS) 401 | if not del_len: 402 | del_len = 1 403 | 404 | while del_len >= 1 and in_len >= 1: 405 | logger.info('del_len=%d, remain len=%d', del_len, in_len) 406 | del_pos = 0 407 | prev_del = True 408 | 409 | while del_pos < in_len: 410 | tail_len = in_len - del_pos - del_len 411 | if tail_len < 0: 412 | tail_len = 0 413 | 414 | # current block == last block and last block failed, so we can skip current block 415 | if not prev_del and tail_len and \ 416 | data[del_pos - del_len: del_pos] == data[del_pos: del_pos + del_len]: 417 | del_pos += del_len 418 | continue 419 | 420 | prev_del = False 421 | tmp_buf = data[:del_pos] + data[del_pos + del_len:] 422 | if run_target(opts, tmp_buf): 423 | data = tmp_buf 424 | prev_del = True 425 | in_len = len(data) 426 | if opts.verbose >= 1: 427 | sys.stderr.write('\x1b[sso far len=%d\x1b[u' % in_len) 428 | else: 429 | del_pos += del_len 430 | 431 | del_len //= 2 432 | 433 | return data 434 | 435 | 436 | def step_alphabet_minimization(opts, data, dummy_char): 437 | """replace a class of character to dummy char""" 438 | alpha_del1 = 0 439 | alpha_map = [0] * 256 440 | for c in data: 441 | alpha_map[c] += 1 442 | alpha_size = 256 - alpha_map.count(0) 443 | 444 | for i in range(256): 445 | if i == ord(dummy_char) or alpha_map[i] == 0: 446 | continue 447 | 448 | tmp_buf = data.replace(bytes([i]), dummy_char) 449 | 450 | if run_target(opts, tmp_buf): 451 | data = tmp_buf 452 | alpha_del1 += alpha_map[i] 453 | 454 | return data, alpha_del1 455 | 456 | 457 | def step_character_minimization(opts, data, dummy_char): 458 | """replace one character to dummy char""" 459 | for i in range(len(data)): 460 | if data[i] == ord(dummy_char): 461 | continue 462 | tmp_buf = data[:i] + dummy_char + data[i + 1:] 463 | 464 | if run_target(opts, tmp_buf): 465 | data = tmp_buf 466 | 467 | return data 468 | 469 | 470 | # use afl-tmin's minimization strategy 471 | def minimize(opts): 472 | filename = None 473 | if opts.input == '-': 474 | data = orig_data = sys.stdin.read() 475 | else: 476 | with open(opts.input, 'rb') as f: 477 | data = orig_data = f.read() 478 | filename = opts.input 479 | logger.info('initial len=%d', len(data)) 480 | 481 | logger.info('initial dry run') 482 | assert run_target(opts, data, filename=filename) 483 | if opts.dryrun: 484 | return 485 | 486 | alpha_d_total = 0 487 | 488 | dummy_char = b'0' 489 | 490 | logger.info('start normalization') 491 | data, alpha_del0 = step_normalization(opts, data, dummy_char) 492 | alpha_d_total += alpha_del0 493 | assert data 494 | 495 | logger.info('start minimize') 496 | last_data = None 497 | pass_num = 1 498 | while data is not last_data: 499 | logger.info('#%d, remain len=%d', pass_num, len(data)) 500 | last_data = data 501 | data = step_block_deletion(opts, data) 502 | assert data 503 | 504 | data, alpha_del1 = step_alphabet_minimization(opts, data, dummy_char) 505 | assert data 506 | alpha_d_total += alpha_del1 507 | 508 | data = step_character_minimization(opts, data, dummy_char) 509 | assert data 510 | 511 | logger.info('#%d, so far len=%d', pass_num, len(data)) 512 | print(repr(data)) 513 | pass_num += 1 514 | 515 | logger.info('size: %d -> %d', len(orig_data), len(data)) 516 | logger.info('execs: %d', g_execs) 517 | 518 | return data 519 | 520 | 521 | def main(): 522 | global logger 523 | 524 | parser = create_argument_parser() 525 | opts = parser.parse_args() 526 | 527 | log_level = logging.DEBUG if opts.debug else logging.INFO 528 | logging.basicConfig( 529 | level=log_level, format='%(asctime)s - %(levelname)s - %(message)s') 530 | logger = logging.getLogger(__name__) 531 | 532 | if opts.no_aslr: 533 | libc = ctypes.CDLL(None) 534 | ADDR_NO_RANDOMIZE = 0x0040000 535 | assert 0 == libc['personality'](ADDR_NO_RANDOMIZE) 536 | 537 | for i, s in enumerate(opts.stdout): 538 | opts.stdout[i] = s.encode('utf8') 539 | for i, s in enumerate(opts.stderr): 540 | opts.stderr[i] = s.encode('utf8') 541 | 542 | data = minimize(opts) 543 | if opts.dryrun: 544 | return 545 | 546 | print(repr(data)) 547 | with open(opts.output, 'wb') as f: 548 | f.write(data) 549 | 550 | 551 | if __name__ == '__main__': 552 | main() 553 | --------------------------------------------------------------------------------