├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── bin ├── smtparse ├── smtscan ├── stringbreak ├── stringfuzzg ├── stringfuzzx ├── stringmerge ├── stringstats ├── tryparse └── unprintable ├── setup.py ├── stringfuzz ├── __init__.py ├── analyser.py ├── ast.py ├── ast_walker.py ├── constants.py ├── fuzzers │ ├── __init__.py │ └── genetic.py ├── generator.py ├── generators │ ├── __init__.py │ ├── concats.py │ ├── equality.py │ ├── lengths.py │ ├── overlaps.py │ ├── random_ast.py │ ├── random_text.py │ └── regex.py ├── mergers │ ├── __init__.py │ └── simple.py ├── parser.py ├── scanner.py ├── smt.py ├── transformers │ ├── __init__.py │ ├── fuzz.py │ ├── graft.py │ ├── multiply.py │ ├── nop.py │ ├── reverse.py │ ├── rotate.py │ ├── translate.py │ └── unprintable.py ├── types.py └── util.py └── tests ├── ast_tests.py ├── genetic_tests.py ├── parser_tests.py ├── scanner_tests.py └── walker_tests.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyc 3 | dist 4 | build -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | StringFuzz 2 | Copyright (c) Dmitry Blotsky 3 | All rights reserved. 4 | MIT License 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | TOTAL = $(shell find . -name "*.py" | xargs cat | wc -l) 2 | GEN = $(shell find stringfuzz/generators -name "*.py" | xargs cat | wc -l) 3 | TRANS = $(shell find stringfuzz/transformers -name "*.py" | xargs cat | wc -l) 4 | NUM_TOTAL = $(shell find . -name "*.py" | wc -l) 5 | NUM_GEN = $(shell find stringfuzz/generators -name "*.py" | wc -l) 6 | NUM_TRANS = $(shell find stringfuzz/transformers -name "*.py" | wc -l) 7 | PER_TOTAL = $(shell echo $$(( $(TOTAL) / $(NUM_TOTAL) )) ) 8 | PER_GEN = $(shell echo $$(( $(GEN) / $(NUM_GEN) )) ) 9 | PER_TRANS = $(shell echo $$(( $(TRANS) / $(NUM_TRANS) )) ) 10 | 11 | help default all usage: 12 | @echo "Usage: don't use." 13 | 14 | loc: 15 | @echo "total:" $(TOTAL) / $(NUM_TOTAL) = $(PER_TOTAL) 16 | @echo "gen: " $(GEN) / $(NUM_GEN) = $(PER_GEN) 17 | @echo "trans:" $(TRANS) / $(NUM_TRANS) = $(PER_TRANS) 18 | 19 | run: 20 | stringfuzzx --help 21 | stringfuzzg --help 22 | stringstats --help 23 | stringbreak --help 24 | 25 | cvc: 26 | stringbreak "cvc4-latest-release --lang smt2 --strings-exp" 27 | 28 | z3: 29 | stringbreak "z3str3-develop-release smt.string_solver=z3str3 -in" 30 | 31 | test: 32 | python3 -m unittest tests/*.py 33 | 34 | develop: test 35 | python3 setup.py develop 36 | 37 | install: 38 | python3 -m pip install --upgrade pip setuptools wheel 39 | python3 setup.py install 40 | 41 | uninstall: 42 | yes | pip3 uninstall stringfuzz 43 | 44 | reinstall: uninstall install 45 | 46 | clean: 47 | $(RM) *.pyc 48 | $(RM) -r ./**/__pycache__ 49 | $(RM) -r build 50 | $(RM) -r dist 51 | $(RM) -r *.egg-info 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Description 2 | =========== 3 | 4 | A collection of tools to manipulate and generate SMT-LIB 2.x problem instances. 5 | There are four main tools: 6 | 7 | - `stringfuzzg` to generate new instances 8 | - `stringfuzzx` to transform existing instances 9 | - `stringstats` to measure properties of instances 10 | - `stringmerge` to merge several instances into one 11 | 12 | Installing 13 | ========== 14 | 15 | Clone this repository, and run this command inside it: 16 | 17 | python3 setup.py install 18 | 19 | Running 20 | ======= 21 | 22 | Without installing, the scripts can be run from the repository root as follows: 23 | 24 | ./bin/stringfuzzg --help 25 | ./bin/stringfuzzx --help 26 | ./bin/stringstats --help 27 | 28 | If installed, they can be run from anywhere as follows: 29 | 30 | stringfuzzg --help 31 | stringfuzzx --help 32 | stringstats --help 33 | 34 | Examples 35 | ======== 36 | 37 | To create a problem with concats nested 100 levels deep: 38 | 39 | ./bin/stringfuzzg concats --depth 100 40 | 41 | To create the above problem and replace all characters with unprintable ones: 42 | 43 | ./bin/stringfuzzg concats --depth 100 | ./bin/stringfuzzx unprintable 44 | 45 | To create and immediately feed a problem to Z3str3: 46 | 47 | ./bin/stringfuzzg concats --depth 100 | z3str3 -in 48 | -------------------------------------------------------------------------------- /bin/smtparse: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | CLI for the parser. Outputs AST. 5 | ''' 6 | 7 | import sys 8 | import argparse 9 | 10 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING 11 | from stringfuzz.parser import parse 12 | 13 | def main(): 14 | 15 | # create arg parser 16 | parser = argparse.ArgumentParser(description='Parse an SMT 2.* file.') 17 | parser.add_argument( 18 | 'file', 19 | nargs = '?', 20 | default = sys.stdin, 21 | type = argparse.FileType('r'), 22 | help = 'input file (default: stdin)' 23 | ) 24 | parser.add_argument( 25 | '--language', 26 | '-l', 27 | dest = 'language', 28 | type = str, 29 | choices = LANGUAGES, 30 | default = SMT_25_STRING, 31 | help = 'input language (default: {})'.format(SMT_25_STRING) 32 | ) 33 | 34 | # parse args 35 | args = parser.parse_args() 36 | 37 | # parse input 38 | try: 39 | expressions = parse(args.file.read(), args.language) 40 | 41 | # handle errors 42 | except IndexError as e: 43 | print(e, file=sys.stderr) 44 | return 1 45 | 46 | # print result 47 | for expression in expressions: 48 | print(expression) 49 | 50 | return 0 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /bin/smtscan: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | CLI for the scanner. Outputs tokens. 5 | ''' 6 | 7 | import sys 8 | import argparse 9 | 10 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING 11 | from stringfuzz.scanner import scan 12 | 13 | def main(): 14 | 15 | # create arg parser 16 | parser = argparse.ArgumentParser(description='Tokenize an SMT 2.* file.') 17 | parser.add_argument( 18 | 'file', 19 | nargs = '?', 20 | default = sys.stdin, 21 | type = argparse.FileType('r'), 22 | help = 'input file (default: stdin)' 23 | ) 24 | parser.add_argument( 25 | '--language', 26 | '-l', 27 | dest = 'language', 28 | type = str, 29 | choices = LANGUAGES, 30 | default = SMT_25_STRING, 31 | help = 'input language (default: {})'.format(SMT_25_STRING) 32 | ) 33 | 34 | # parse args 35 | args = parser.parse_args() 36 | 37 | # scan input 38 | try: 39 | tokens = scan(args.file.read(), language=args.language) 40 | 41 | # report result 42 | except IndexError as e: 43 | print(e) 44 | return 1 45 | 46 | for token in tokens: 47 | print(token.name, repr(token.value)) 48 | 49 | return 0 50 | 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /bin/stringbreak: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import argparse 5 | 6 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING 7 | from stringfuzz.generators import random_ast 8 | from stringfuzz.generator import generate 9 | from stringfuzz.parser import parse 10 | from stringfuzz.smt import smt_string_logic 11 | 12 | from stringfuzz.fuzzers.genetic import simulate 13 | 14 | DEFAULT_NUM_GENERATIONS = 200 15 | DEFAULT_LOG_RESOLUTION = 1 16 | DEFAULT_WORLD_SIZE = 10 17 | 18 | def main(): 19 | 20 | # create arg parser 21 | parser = argparse.ArgumentParser(description='Test an SMT string solver in an exploratory fashion.') 22 | parser.add_argument( 23 | 'command', 24 | type = str, 25 | help = 'command to run the solver (if input fed on standard in)' 26 | ) 27 | parser.add_argument( 28 | '--out-language', 29 | '-o', 30 | dest = 'out_language', 31 | type = str, 32 | choices = LANGUAGES, 33 | default = SMT_25_STRING, 34 | help = 'solver language (default: {})'.format(SMT_25_STRING) 35 | ) 36 | parser.add_argument( 37 | '--in-language', 38 | '-i', 39 | dest = 'in_language', 40 | type = str, 41 | choices = LANGUAGES, 42 | default = SMT_25_STRING, 43 | help = 'seed problem language (default: {})'.format(SMT_25_STRING) 44 | ) 45 | parser.add_argument( 46 | '--seed-problem', 47 | '-s', 48 | dest = 'seed_problem', 49 | metavar = 'F', 50 | default = None, 51 | type = argparse.FileType('r'), 52 | help = 'input file (default: stdin)' 53 | ) 54 | parser.add_argument( 55 | '--num-generations', 56 | '-g', 57 | dest = 'num_generations', 58 | metavar = 'N', 59 | type = int, 60 | default = DEFAULT_NUM_GENERATIONS, 61 | help = 'number of generations (default: {})'.format(DEFAULT_NUM_GENERATIONS) 62 | ) 63 | 64 | # parse args 65 | args = parser.parse_args() 66 | 67 | # create seed problem, or use an existing one 68 | if args.seed_problem is None: 69 | seed_problem = [smt_string_logic()] + random_ast( 70 | num_vars = 1, 71 | num_asserts = 5, 72 | depth = 3, 73 | max_terms = 5, 74 | max_str_lit_length = 10, 75 | max_int_lit = 30, 76 | literal_probability = 0.5, 77 | semantically_valid = True 78 | ) 79 | else: 80 | seed_problem = parse(args.seed_problem.read(), args.in_language) 81 | 82 | # print seed problem 83 | print('progenitor:') 84 | print('-----') 85 | print(generate(seed_problem, args.out_language)) 86 | print('-----') 87 | 88 | # run the tester 89 | print('') 90 | print('simulating ...') 91 | surviving_problems = simulate( 92 | progenitor = seed_problem, 93 | language = args.out_language, 94 | saint_peter = args.command, 95 | num_generations = args.num_generations, 96 | world_size = DEFAULT_WORLD_SIZE, 97 | log_resolution = DEFAULT_LOG_RESOLUTION 98 | ) 99 | print('finished') 100 | print('') 101 | 102 | # print out final population 103 | for i, problem in enumerate(surviving_problems): 104 | print('survivor #{}'.format(i)) 105 | print('-----') 106 | print(generate(problem, args.out_language)) 107 | print('-----') 108 | 109 | if __name__ == '__main__': 110 | main() 111 | -------------------------------------------------------------------------------- /bin/stringfuzzg: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | The fuzzer tool that generates new problems. 5 | ''' 6 | 7 | import sys 8 | import argparse 9 | import random 10 | 11 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING 12 | from stringfuzz.parser import parse 13 | from stringfuzz.generator import generate 14 | from stringfuzz.smt import smt_get_model, smt_string_logic 15 | 16 | from stringfuzz.generators import concats, SYNTACTIC_DEPTH, SEMANTIC_DEPTH 17 | from stringfuzz.generators import overlaps 18 | from stringfuzz.generators import lengths 19 | from stringfuzz.generators import random_text 20 | from stringfuzz.generators import random_ast 21 | from stringfuzz.generators import equality 22 | from stringfuzz.generators import regex, INCREASING_LITERALS, RANDOM_LITERALS, MEMBER_IN, MEMBER_NOT_IN, MEMBER_ALTERNATING, MEMBER_RANDOM, OPERATOR_STAR, OPERATOR_PLUS, OPERATOR_UNION, OPERATOR_INTER, OPERATOR_CONCAT, OPERATOR_ALTERNATING, OPERATOR_RANDOM 23 | 24 | # constants 25 | CONCATS = 'concats' 26 | LENGTHS = 'lengths' 27 | OVERLAPS = 'overlaps' 28 | RANDOM_TEXT = 'random-text' 29 | RANDOM_AST = 'random-ast' 30 | REGEX = 'regex' 31 | EQUALITY = 'equality' 32 | 33 | GENERATORS = { 34 | CONCATS: concats, 35 | LENGTHS: lengths, 36 | OVERLAPS: overlaps, 37 | RANDOM_TEXT: random_text, 38 | RANDOM_AST: random_ast, 39 | REGEX: regex, 40 | EQUALITY: equality, 41 | } 42 | 43 | DEPTH_TYPES = [SYNTACTIC_DEPTH, SEMANTIC_DEPTH] 44 | LITERAL_TYPES = [INCREASING_LITERALS, RANDOM_LITERALS] 45 | MEMBERSHIP_TYPES = [MEMBER_IN, MEMBER_NOT_IN, MEMBER_ALTERNATING, MEMBER_RANDOM] 46 | OPERATOR_LIST = [OPERATOR_STAR, OPERATOR_PLUS, OPERATOR_UNION, OPERATOR_INTER, OPERATOR_CONCAT] 47 | OPERATOR_TYPES = [OPERATOR_ALTERNATING, OPERATOR_RANDOM] 48 | 49 | # defaults 50 | DEFAULT_SEED = 0 51 | DEFAULT_RANDOM = False 52 | DEFAULT_PRODUCE_MODELS = False 53 | 54 | DEFAULT_LENGTH = 10 55 | DEFAULT_DEPTH = 5 56 | DEFAULT_SOLUTION = None 57 | DEFAULT_BALANCED = False 58 | DEFAULT_RANDOM_RELATIONS = False 59 | DEFAULT_DEPTH_TYPE = SYNTACTIC_DEPTH 60 | DEFAULT_EXTRACTED = 0 61 | DEFAULT_EXTRACT_INDEX = 10 62 | 63 | DEFAULT_NUM_VARS = 5 64 | DEFAULT_MIN_LEN = 0 65 | DEFAULT_MAX_LEN = 10 66 | DEFAULT_NUM_CONCATS = 0 67 | 68 | DEFAULT_NUM_OVERLAPPING = 2 69 | DEFAULT_LENGTH_OF_CONSTS = 5 70 | 71 | DEFAULT_NUM_EQUAL_EXPRESSIONS = 2 72 | DEFAULT_TERMS_PER_EXPRESSION = 7 73 | DEFAULT_PREFIX_LENGTH = 5 74 | DEFAULT_SUFFIX_LENGTH = 0 75 | DEFAULT_INFIX_LENGTH = 3 76 | DEFAULT_INFIX_PROBABILITY = 0.2 77 | DEFAULT_ADD_INFIXES = False 78 | DEFAULT_EQUAL_RANDOMISE_LENGTHS = False 79 | 80 | DEFAULT_LITERAL_TYPE = RANDOM_LITERALS 81 | DEFAULT_MEMBERSHIP_TYPE = MEMBER_IN 82 | DEFAULT_REGEX_COINCIDE = False 83 | DEFAULT_NUM_REGEX_TERMS = 3 84 | DEFAULT_NUM_REGEXES = 1 85 | DEFAULT_REGEX_LIT_MIN = 1 86 | DEFAULT_REGEX_LIT_MAX = 3 87 | DEFAULT_TERM_DEPTH = 0 88 | DEFAULT_REGEX_VAR_MIN = None 89 | DEFAULT_REGEX_VAR_MAX = None 90 | DEFAULT_OPERATORS = OPERATOR_STAR + OPERATOR_PLUS + OPERATOR_UNION 91 | DEFAULT_OPERATOR_TYPE = OPERATOR_RANDOM 92 | 93 | DEFAULT_NUM_RANDOM_VARS = 5 94 | DEFAULT_NUM_RANDOM_ASSERTS = 10 95 | DEFAULT_RANDOM_DEPTH = 2 96 | DEFAULT_MAX_RANDOM_TERMS = 5 97 | DEFAULT_MAX_RANDOM_STRINGS = 10 98 | DEFAULT_MAX_RANDOM_NUMBERS = 10 99 | DEFAULT_SEMANTICALLY_VALID = False 100 | DEFAULT_LITERAL_PROBABILITY = 0.1 101 | 102 | def main(): 103 | 104 | # create arg parser 105 | global_parser = argparse.ArgumentParser(description='SMTLIB 2.* problem generator.') 106 | 107 | # global args 108 | global_parser.add_argument( 109 | '--language', 110 | '-l', 111 | dest = 'language', 112 | type = str, 113 | choices = LANGUAGES, 114 | default = SMT_25_STRING, 115 | help = 'output language (default: {})'.format(SMT_25_STRING) 116 | ) 117 | global_parser.add_argument( 118 | '--models', 119 | '-m', 120 | dest = 'produce_models', 121 | action = 'store_true', 122 | default = DEFAULT_PRODUCE_MODELS, 123 | help = 'append the SMT 2.x command to produce a model (default: {})'.format(DEFAULT_PRODUCE_MODELS) 124 | ) 125 | seed_group = global_parser.add_mutually_exclusive_group() 126 | seed_group.add_argument( 127 | '--seed', 128 | '-s', 129 | dest = 'seed', 130 | metavar = 'S', 131 | type = int, 132 | default = DEFAULT_SEED, 133 | help = 'seed for random number generator (default: {})'.format(DEFAULT_SEED) 134 | ) 135 | seed_group.add_argument( 136 | '--random', 137 | '-r', 138 | dest = 'random', 139 | action = 'store_true', 140 | default = DEFAULT_RANDOM, 141 | help = 'seed the random number generator with the current time (default: {})'.format(DEFAULT_RANDOM) 142 | ) 143 | 144 | # get subparsers 145 | subparsers = global_parser.add_subparsers(dest='generator', help='generator choice') 146 | subparsers.required = True 147 | 148 | # concats fuzzer 149 | concats_parser = subparsers.add_parser(CONCATS, help='instance with deeply nested concats') 150 | concats_parser.add_argument( 151 | '--depth', 152 | '-d', 153 | dest = 'depth', 154 | metavar = 'D', 155 | type = int, 156 | default = DEFAULT_DEPTH, 157 | help = 'depth of the concats (default: {})'.format(DEFAULT_DEPTH) 158 | ) 159 | concats_parser.add_argument( 160 | '--depth-type', 161 | '-t', 162 | dest = 'depth_type', 163 | type = str, 164 | choices = DEPTH_TYPES, 165 | default = DEFAULT_DEPTH_TYPE, 166 | help = 'type of depth (default: {})'.format(DEFAULT_DEPTH_TYPE) 167 | ) 168 | concats_parser.add_argument( 169 | '--solution', 170 | '-s', 171 | dest = 'solution', 172 | metavar = 'S', 173 | type = str, 174 | default = DEFAULT_SOLUTION, 175 | help = 'expected solution (default: {!r})'.format(DEFAULT_SOLUTION) 176 | ) 177 | concats_parser.add_argument( 178 | '--extract', 179 | '-e', 180 | dest = 'num_extracts', 181 | metavar = 'N', 182 | type = int, 183 | default = DEFAULT_EXTRACTED, 184 | help = 'number of extracts to add (default: {})'.format(DEFAULT_EXTRACTED) 185 | ) 186 | concats_parser.add_argument( 187 | '--extract-max', 188 | '-m', 189 | dest = 'max_extract_index', 190 | metavar = 'N', 191 | type = int, 192 | default = DEFAULT_EXTRACT_INDEX, 193 | help = 'max index from which to extract (default: {})'.format(DEFAULT_EXTRACT_INDEX) 194 | ) 195 | concats_parser.add_argument( 196 | '--balanced', 197 | '-b', 198 | action = 'store_true', 199 | default = DEFAULT_BALANCED, 200 | help = 'flag for balanced tree (default: {!r})'.format(DEFAULT_BALANCED) 201 | ) 202 | 203 | # lengths fuzzer 204 | lengths_parser = subparsers.add_parser(LENGTHS, help='instance with length constraints') 205 | lengths_parser.add_argument( 206 | '--num-vars', 207 | '-v', 208 | dest = 'num_vars', 209 | metavar = 'N', 210 | type = int, 211 | default = DEFAULT_NUM_VARS, 212 | help = 'number of variables to create (default: {})'.format(DEFAULT_NUM_VARS) 213 | ) 214 | lengths_parser.add_argument( 215 | '--min-length', 216 | '-n', 217 | dest = 'min_length', 218 | metavar = 'N', 219 | type = int, 220 | default = DEFAULT_MIN_LEN, 221 | help = 'lower bound on length (default: {})'.format(DEFAULT_MIN_LEN) 222 | ) 223 | lengths_parser.add_argument( 224 | '--max-length', 225 | '-x', 226 | dest = 'max_length', 227 | metavar = 'N', 228 | type = int, 229 | default = DEFAULT_MAX_LEN, 230 | help = 'upper bound on length (default: {})'.format(DEFAULT_MAX_LEN) 231 | ) 232 | lengths_parser.add_argument( 233 | '--num-concats', 234 | '-c', 235 | dest = 'num_concats', 236 | metavar = 'N', 237 | type = int, 238 | default = DEFAULT_NUM_CONCATS, 239 | help = 'number of (binary) concats to add (default: {})'.format(DEFAULT_NUM_CONCATS) 240 | ) 241 | lengths_parser.add_argument( 242 | '--random-relations', 243 | '-r', 244 | action = 'store_true', 245 | dest = 'random_relations', 246 | default = DEFAULT_RANDOM_RELATIONS, 247 | help = 'use constraints other than "=" (default: {!r})'.format(DEFAULT_RANDOM_RELATIONS) 248 | ) 249 | 250 | # overlaps fuzzer 251 | overlaps_parser = subparsers.add_parser(OVERLAPS, help='instance with overlapping variables') 252 | overlaps_parser.add_argument( 253 | '--num-vars', 254 | '-n', 255 | dest = 'num_vars', 256 | metavar = 'N', 257 | type = int, 258 | default = DEFAULT_NUM_OVERLAPPING, 259 | help = 'number of overlapping variables to generate (default: {})'.format(DEFAULT_NUM_OVERLAPPING) 260 | ) 261 | overlaps_parser.add_argument( 262 | '--length-of-consts', 263 | '-c', 264 | dest = 'length_of_consts', 265 | metavar = 'N', 266 | type = int, 267 | default = DEFAULT_LENGTH_OF_CONSTS, 268 | help = 'the length of the constant terms (default: {})'.format(DEFAULT_LENGTH_OF_CONSTS) 269 | ) 270 | 271 | # equality fuzzer 272 | equality_parser = subparsers.add_parser(EQUALITY, help='instance with concatenated expressions (of mixed constants and variables) all equal to each other') 273 | equality_parser.add_argument( 274 | '--num-exprs', 275 | '-n', 276 | dest = 'num_expressions', 277 | metavar = 'N', 278 | type = int, 279 | default = DEFAULT_NUM_EQUAL_EXPRESSIONS, 280 | help = 'number of equal expressions to generate (default: {})'.format(DEFAULT_NUM_EQUAL_EXPRESSIONS) 281 | ) 282 | equality_parser.add_argument( 283 | '--num-terms', 284 | '-t', 285 | dest = 'num_terms', 286 | metavar = 'N', 287 | type = int, 288 | default = DEFAULT_TERMS_PER_EXPRESSION, 289 | help = 'number terms in each expression (default: {})'.format(DEFAULT_TERMS_PER_EXPRESSION) 290 | ) 291 | equality_parser.add_argument( 292 | '--prefix-len', 293 | '-p', 294 | dest = 'prefix_length', 295 | metavar = 'N', 296 | type = int, 297 | default = DEFAULT_PREFIX_LENGTH, 298 | help = 'length of constant prefix (default: {})'.format(DEFAULT_PREFIX_LENGTH) 299 | ) 300 | equality_parser.add_argument( 301 | '--suffix-len', 302 | '-s', 303 | dest = 'suffix_length', 304 | metavar = 'N', 305 | type = int, 306 | default = DEFAULT_SUFFIX_LENGTH, 307 | help = 'length of constant suffix (default: {})'.format(DEFAULT_SUFFIX_LENGTH) 308 | ) 309 | equality_parser.add_argument( 310 | '--infix-len', 311 | '-i', 312 | dest = 'infix_length', 313 | metavar = 'N', 314 | type = int, 315 | default = DEFAULT_INFIX_LENGTH, 316 | help = 'lengths of constant infixes (default: {})'.format(DEFAULT_INFIX_LENGTH) 317 | ) 318 | equality_parser.add_argument( 319 | '--infix-chance', 320 | '-c', 321 | dest = 'infix_probability', 322 | metavar = 'P', 323 | type = float, 324 | default = DEFAULT_INFIX_PROBABILITY, 325 | help = 'probability of constant infixes replacing variables (default: {})'.format(DEFAULT_INFIX_PROBABILITY) 326 | ) 327 | equality_parser.add_argument( 328 | '--add-infixes', 329 | '-m', 330 | dest = 'add_infixes', 331 | action = 'store_true', 332 | default = DEFAULT_ADD_INFIXES, 333 | help = 'add constant infixes to expressions with 50%% chance (default: {})'.format(DEFAULT_ADD_INFIXES) 334 | ) 335 | equality_parser.add_argument( 336 | '--random-lengths', 337 | '-r', 338 | dest = 'randomise_lengths', 339 | action = 'store_true', 340 | default = DEFAULT_EQUAL_RANDOMISE_LENGTHS, 341 | help = 'treat all length settings as upper bounds only (default: {})'.format(DEFAULT_EQUAL_RANDOMISE_LENGTHS) 342 | ) 343 | 344 | # regex-pair fuzzer 345 | regex_parser = subparsers.add_parser(REGEX, help='instance testing one variable for regex membership') 346 | regex_parser.add_argument( 347 | '--literal-type', 348 | '-a', 349 | dest = 'literal_type', 350 | type = str, 351 | choices = LITERAL_TYPES, 352 | default = DEFAULT_LITERAL_TYPE, 353 | help = 'way to generate regex literals (default: {})'.format(DEFAULT_LITERAL_TYPE) 354 | ) 355 | regex_parser.add_argument( 356 | '--coincide', 357 | '-c', 358 | dest = 'reset_alphabet', 359 | action = 'store_true', 360 | default = DEFAULT_REGEX_COINCIDE, 361 | help = 'try to make the regexes share alphabets (default: {})'.format(DEFAULT_REGEX_COINCIDE) 362 | ) 363 | regex_parser.add_argument( 364 | '--membership-type', 365 | '-i', 366 | dest = 'membership_type', 367 | type = str, 368 | choices = MEMBERSHIP_TYPES, 369 | default = DEFAULT_MEMBERSHIP_TYPE, 370 | help = 'way to test regex membership (default: {})'.format(DEFAULT_MEMBERSHIP_TYPE) 371 | ) 372 | regex_parser.add_argument( 373 | '--num-terms', 374 | '-t', 375 | dest = 'num_terms', 376 | metavar = 'N', 377 | type = int, 378 | default = DEFAULT_NUM_REGEX_TERMS, 379 | help = 'number of terms in each regex (default: {})'.format(DEFAULT_NUM_REGEX_TERMS) 380 | ) 381 | regex_parser.add_argument( 382 | '--lit-min', 383 | '-m', 384 | dest = 'literal_min', 385 | metavar = 'N', 386 | type = int, 387 | default = DEFAULT_REGEX_LIT_MIN, 388 | help = 'min length of regex terms (default: {})'.format(DEFAULT_REGEX_LIT_MIN) 389 | ) 390 | regex_parser.add_argument( 391 | '--lit-max', 392 | '-x', 393 | dest = 'literal_max', 394 | metavar = 'N', 395 | type = int, 396 | default = DEFAULT_REGEX_LIT_MAX, 397 | help = 'max length of regex terms (default: {})'.format(DEFAULT_REGEX_LIT_MAX) 398 | ) 399 | regex_parser.add_argument( 400 | '--depth', 401 | '-d', 402 | dest = 'term_depth', 403 | metavar = 'D', 404 | type = int, 405 | default = DEFAULT_TERM_DEPTH, 406 | help = 'depth of terms (default: {})'.format(DEFAULT_TERM_DEPTH) 407 | ) 408 | regex_parser.add_argument( 409 | '--num-regexes', 410 | '-r', 411 | dest = 'num_regexes', 412 | metavar = 'N', 413 | type = int, 414 | default = DEFAULT_NUM_REGEXES, 415 | help = 'number of regexes to test (default: {})'.format(DEFAULT_NUM_REGEXES) 416 | ) 417 | regex_parser.add_argument( 418 | '--var-min', 419 | '-M', 420 | dest = 'min_var_length', 421 | metavar = 'N', 422 | type = int, 423 | default = DEFAULT_REGEX_VAR_MIN, 424 | help = 'min length of the variable (default: {})'.format(DEFAULT_REGEX_VAR_MIN) 425 | ) 426 | regex_parser.add_argument( 427 | '--var-max', 428 | '-X', 429 | dest = 'max_var_length', 430 | metavar = 'N', 431 | type = int, 432 | default = DEFAULT_REGEX_VAR_MAX, 433 | help = 'max length of the variable (default: {})'.format(DEFAULT_REGEX_VAR_MAX) 434 | ) 435 | regex_parser.add_argument( 436 | '--operators', 437 | '-o', 438 | dest = 'operators', 439 | metavar = 'OPS', 440 | type = str, 441 | default = DEFAULT_OPERATORS, 442 | help = 'operators to choose from for deep terms. OPS is of the form [{}]+ representing the operators star ({}), plus ({}), union ({}), intersection ({}), concatenation ({}) (default: {})'.format(''.join(OPERATOR_LIST), OPERATOR_STAR, OPERATOR_PLUS, OPERATOR_UNION, OPERATOR_INTER, OPERATOR_CONCAT, DEFAULT_OPERATORS) 443 | ) 444 | regex_parser.add_argument( 445 | '--operator-type', 446 | '-O', 447 | dest = 'operator_type', 448 | type = str, 449 | choices = OPERATOR_TYPES, 450 | default = DEFAULT_OPERATOR_TYPE, 451 | help = 'way to choose operator for deep terms (default: {})'.format(DEFAULT_OPERATOR_TYPE) 452 | ) 453 | 454 | # random text fuzzer 455 | random_parser = subparsers.add_parser(RANDOM_TEXT, help='totally random text') 456 | random_parser.add_argument( 457 | '--length', 458 | '-l', 459 | dest = 'length', 460 | metavar = 'L', 461 | type = int, 462 | default = DEFAULT_LENGTH, 463 | help = 'length of the text (default: {})'.format(DEFAULT_LENGTH) 464 | ) 465 | 466 | # random_ast fuzzer 467 | random_ast_parser = subparsers.add_parser(RANDOM_AST, help='random but syntactically valid problem') 468 | random_ast_parser.add_argument( 469 | '--num-vars', 470 | '-v', 471 | dest = 'num_vars', 472 | metavar = 'N', 473 | type = int, 474 | default = DEFAULT_NUM_RANDOM_VARS, 475 | help = 'number of variables (of each sort) in the problem (default: {})'.format(DEFAULT_NUM_RANDOM_VARS) 476 | ) 477 | random_ast_parser.add_argument( 478 | '--num-asserts', 479 | '-n', 480 | dest = 'num_asserts', 481 | metavar = 'N', 482 | type = int, 483 | default = DEFAULT_NUM_RANDOM_ASSERTS, 484 | help = 'number of asserts in the problem (default: {})'.format(DEFAULT_NUM_RANDOM_ASSERTS) 485 | ) 486 | random_ast_parser.add_argument( 487 | '--depth', 488 | '-d', 489 | dest = 'depth', 490 | metavar = 'D', 491 | type = int, 492 | default = DEFAULT_RANDOM_DEPTH, 493 | help = 'depth of nested expressions (default: {})'.format(DEFAULT_RANDOM_DEPTH) 494 | ) 495 | random_ast_parser.add_argument( 496 | '--max-terms', 497 | '-t', 498 | dest = 'max_terms', 499 | metavar = 'N', 500 | type = int, 501 | default = DEFAULT_MAX_RANDOM_TERMS, 502 | help = 'maximum number of terms for n-ary expressions, like concats (default: {})'.format(DEFAULT_MAX_RANDOM_TERMS) 503 | ) 504 | random_ast_parser.add_argument( 505 | '--max-string', 506 | '-l', 507 | dest = 'max_str_lit_length', 508 | metavar = 'N', 509 | type = int, 510 | default = DEFAULT_MAX_RANDOM_STRINGS, 511 | help = 'maximum length of string literals (default: {})'.format(DEFAULT_MAX_RANDOM_STRINGS) 512 | ) 513 | random_ast_parser.add_argument( 514 | '--max-int', 515 | '-x', 516 | dest = 'max_int_lit', 517 | metavar = 'N', 518 | type = int, 519 | default = DEFAULT_MAX_RANDOM_NUMBERS, 520 | help = 'maximum size of int literals (default: {})'.format(DEFAULT_MAX_RANDOM_NUMBERS) 521 | ) 522 | random_ast_parser.add_argument( 523 | '--meaningful', 524 | '-m', 525 | dest = 'semantically_valid', 526 | action = 'store_true', 527 | default = DEFAULT_SEMANTICALLY_VALID, 528 | help = 'generate semantically valid problems (default: {})'.format(DEFAULT_SEMANTICALLY_VALID) 529 | ) 530 | random_ast_parser.add_argument( 531 | '--literal-chance', 532 | '-p', 533 | dest = 'literal_probability', 534 | metavar = 'P', 535 | type = float, 536 | default = DEFAULT_LITERAL_PROBABILITY, 537 | help = 'probability of creating literals instead of variables (default: {})'.format(DEFAULT_LITERAL_PROBABILITY) 538 | ) 539 | 540 | # parse args 541 | args = global_parser.parse_args() 542 | 543 | # get the generator function based on args 544 | generator_name = args.generator 545 | generator = GENERATORS[generator_name] 546 | 547 | # seed the RNG 548 | if args.random is True: 549 | random.seed() 550 | else: 551 | random.seed(args.seed) 552 | 553 | # get some flags that will get popped from args before they're used 554 | produce_models = args.produce_models 555 | language = args.language 556 | 557 | # get args as a dict 558 | # NOTE: 559 | # argparse's Namespace object (which 'args' is) returns itself as a 560 | # dict when vars() is called on it 561 | generator_args = vars(args) 562 | 563 | # pop arguments that are specific to this script because 564 | # they shouldn't be passed on to the generator 565 | generator_args.pop('language') 566 | generator_args.pop('produce_models') 567 | generator_args.pop('generator') 568 | generator_args.pop('seed') 569 | generator_args.pop('random') 570 | 571 | # run the generator with the args 572 | generated = generator(**generator_args) 573 | 574 | # prepend the logic setting 575 | generated = [smt_string_logic()] + generated 576 | 577 | # the random text generator produces raw text 578 | if (generator == random_text): 579 | print(generated) 580 | 581 | # other generators produce ASTs 582 | else: 583 | 584 | # add the model-getting node if needed 585 | if produce_models is True: 586 | generated.append(smt_get_model()) 587 | 588 | print(generate(generated, language)) 589 | 590 | if __name__ == '__main__': 591 | main() 592 | -------------------------------------------------------------------------------- /bin/stringfuzzx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | The fuzzer tool that transforms existing problems. 5 | ''' 6 | 7 | import sys 8 | import argparse 9 | import random 10 | 11 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING 12 | from stringfuzz.transformers import unprintable, nop, rotate, fuzz, graft, translate, reverse, multiply 13 | from stringfuzz.generator import generate 14 | from stringfuzz.parser import parse, ParsingError 15 | from stringfuzz.ast import SettingNode, ExpressionNode, MetaCommandNode 16 | 17 | # constants 18 | UNPRINTABLE = 'unprintable' 19 | NOP = 'nop' 20 | ROTATE = 'rotate' 21 | FUZZ = 'fuzz' 22 | GRAFT = 'graft' 23 | TRANSLATE = 'translate' 24 | REVERSE = 'reverse' 25 | MULTIPLY = 'multiply' 26 | 27 | TRANSFORMERS = { 28 | UNPRINTABLE: unprintable, 29 | NOP: nop, 30 | ROTATE: rotate, 31 | FUZZ: fuzz, 32 | GRAFT: graft, 33 | TRANSLATE: translate, 34 | REVERSE: reverse, 35 | MULTIPLY: multiply 36 | } 37 | 38 | # defaults 39 | DEFAULT_SEED = 0 40 | DEFAULT_RANDOM = False 41 | DEFAULT_FACTOR = 2 42 | DEFAULT_INTEGER_FLAG = False 43 | DEFAULT_SKIP_RE_RANGE = True 44 | DEFAULT_SKIP_STR_TO_RE = True 45 | 46 | GET_MODEL = "get-model" 47 | GET_INFO = "get-info" 48 | TO_STRIP = [GET_MODEL, GET_INFO] 49 | 50 | def should_keep(expr): 51 | if isinstance(expr, SettingNode): 52 | return False 53 | if isinstance(expr, MetaCommandNode): 54 | return False 55 | if isinstance(expr, ExpressionNode): 56 | if expr.symbol in TO_STRIP: 57 | return False 58 | return True 59 | 60 | def main(): 61 | 62 | # create arg parser 63 | global_parser = argparse.ArgumentParser(description='SMTLIB 2.* problem transformer.') 64 | global_parser.add_argument( 65 | '--file', 66 | '-f', 67 | dest = 'input_file', 68 | metavar = 'F', 69 | default = sys.stdin, 70 | type = argparse.FileType('r'), 71 | help = 'input file (default: stdin)' 72 | ) 73 | global_parser.add_argument( 74 | '--in-lang', 75 | '-i', 76 | dest = 'input_language', 77 | type = str, 78 | choices = LANGUAGES, 79 | default = SMT_25_STRING, 80 | help = 'input language (default: {})'.format(SMT_25_STRING) 81 | ) 82 | global_parser.add_argument( 83 | '--out-lang', 84 | '-o', 85 | dest = 'output_language', 86 | type = str, 87 | choices = LANGUAGES, 88 | default = SMT_25_STRING, 89 | help = 'output language (default: {})'.format(SMT_25_STRING) 90 | ) 91 | 92 | seed_group = global_parser.add_mutually_exclusive_group() 93 | seed_group.add_argument( 94 | '--seed', 95 | '-s', 96 | dest = 'seed', 97 | metavar = 'S', 98 | type = int, 99 | default = DEFAULT_SEED, 100 | help = 'seed for random number generator (default: {})'.format(DEFAULT_SEED) 101 | ) 102 | seed_group.add_argument( 103 | '--random', 104 | '-r', 105 | dest = 'random', 106 | action = 'store_true', 107 | default = DEFAULT_RANDOM, 108 | help = 'seed the random number generator with the current time (default: {})'.format(DEFAULT_RANDOM) 109 | ) 110 | 111 | # get subparsers 112 | subparsers = global_parser.add_subparsers(dest='transformer', help='transformer choice') 113 | subparsers.required = True 114 | 115 | # fuzz transformer 116 | fuzz_parser = subparsers.add_parser(FUZZ, help='fuzz transformer') 117 | fuzz_parser.add_argument( 118 | '--re-range', 119 | dest = 'skip_re_range', 120 | action = 'store_false', 121 | default = DEFAULT_SKIP_RE_RANGE, 122 | help = 'Include re_range nodes in multiplication (default: {})'.format(DEFAULT_SKIP_RE_RANGE) 123 | ) 124 | # graft transformer 125 | graft_parser = subparsers.add_parser(GRAFT, help='graft transformer') 126 | graft_parser.add_argument( 127 | '--str-to-re', 128 | dest = 'skip_str_to_re', 129 | action = 'store_false', 130 | default = DEFAULT_SKIP_STR_TO_RE, 131 | help = 'Include str_to_re nodes in grafting (default: {})'.format(DEFAULT_SKIP_STR_TO_RE) 132 | ) 133 | 134 | # multiply transformer 135 | multiply_parser = subparsers.add_parser(MULTIPLY, help='multiply transformer') 136 | multiply_parser.add_argument( 137 | '--factor', 138 | dest = 'factor', 139 | metavar = 'N', 140 | type = int, 141 | default = DEFAULT_FACTOR, 142 | help = 'number to multiply literals by (default: {})'.format(DEFAULT_FACTOR) 143 | ) 144 | multiply_parser.add_argument( 145 | '--re-range', 146 | dest = 'skip_re_range', 147 | action = 'store_false', 148 | default = DEFAULT_SKIP_RE_RANGE, 149 | help = 'Include re_range nodes in multiplication (default: {})'.format(DEFAULT_SKIP_RE_RANGE) 150 | ) 151 | 152 | # nop transformer 153 | nop_parser = subparsers.add_parser(NOP, help='nop transformer') 154 | 155 | # reverse transformer 156 | reverse_parser = subparsers.add_parser(REVERSE, help='reverse transformer') 157 | 158 | # rotate transformer 159 | rotate_parser = subparsers.add_parser(ROTATE, help='rotate transformer') 160 | 161 | # translate transformer 162 | translate_parser = subparsers.add_parser(TRANSLATE, help='translate transformer') 163 | translate_parser.add_argument( 164 | '--integers', 165 | dest = 'integer_flag', 166 | action = 'store_true', 167 | default = DEFAULT_INTEGER_FLAG, 168 | help = 'Include integers in translation (default: {})'.format(DEFAULT_INTEGER_FLAG) 169 | ) 170 | translate_parser.add_argument( 171 | '--re-range', 172 | dest = 'skip_re_range', 173 | action = 'store_false', 174 | default = DEFAULT_SKIP_RE_RANGE, 175 | help = 'Include re_range nodes in translation (default: {})'.format(DEFAULT_SKIP_RE_RANGE) 176 | ) 177 | 178 | # unprintable transformer 179 | unprintable_parser = subparsers.add_parser(UNPRINTABLE, help='unprintable transformer') 180 | 181 | # parse args 182 | args = global_parser.parse_args() 183 | 184 | # get the transformer function based on args 185 | transformer_name = args.transformer 186 | transformer = TRANSFORMERS[transformer_name] 187 | 188 | # get some flags that will get popped from args before they're used 189 | input_file = args.input_file 190 | input_language = args.input_language 191 | output_language = args.output_language 192 | 193 | # seed the RNG 194 | if args.random is True: 195 | random.seed() 196 | else: 197 | random.seed(args.seed) 198 | 199 | # read input 200 | raw_in = args.input_file.read() 201 | 202 | # parse input 203 | try: 204 | ast = parse(raw_in, input_language) 205 | except ParsingError as e: 206 | print(e, file=sys.stderr) 207 | return 1 208 | 209 | # the nop transformer should not modify anything 210 | if transformer != nop: 211 | 212 | # filter out suppressed expressions 213 | ast = list(filter(should_keep, ast)) 214 | 215 | # get args as a dict 216 | transformer_args = vars(args) 217 | 218 | # pop arguments that are specific to this script because 219 | # they shouldn't be passed on to the transformer 220 | transformer_args.pop('input_file') 221 | transformer_args.pop('input_language') 222 | transformer_args.pop('output_language') 223 | transformer_args.pop('seed') 224 | transformer_args.pop('random') 225 | transformer_args.pop('transformer') 226 | 227 | # run the transformer with the args 228 | transformed = transformer(ast, **transformer_args) 229 | 230 | # transformers produce ASTs 231 | print(generate(transformed, output_language)) 232 | 233 | if __name__ == '__main__': 234 | main() 235 | -------------------------------------------------------------------------------- /bin/stringmerge: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | Merge two existing problems. 5 | ''' 6 | 7 | import sys 8 | import argparse 9 | import random 10 | 11 | from stringfuzz.constants import LANGUAGES, SMT_25_STRING 12 | from stringfuzz.generator import generate 13 | from stringfuzz.parser import parse, ParsingError 14 | from stringfuzz.smt import smt_string_logic, smt_check_sat 15 | from stringfuzz.ast import SettingNode, ExpressionNode, MetaCommandNode, GenericExpressionNode 16 | from stringfuzz.mergers import simple 17 | 18 | #constants 19 | SIMPLE = 'simple' 20 | 21 | MERGERS = { 22 | SIMPLE: simple 23 | } 24 | 25 | # defaults 26 | DEFAULT_RENAME_IDS = False 27 | DEFAULT_SEED = 0 28 | DEFAULT_RANDOM = False 29 | 30 | GET_MODEL = "get-model" 31 | GET_INFO = "get-info" 32 | EXPR_TO_STRIP = [GET_MODEL, GET_INFO] 33 | CHECK_SAT = "check-sat" 34 | GEN_TO_STRIP = [CHECK_SAT] 35 | 36 | def should_keep(node): 37 | if isinstance(node, SettingNode): 38 | return False 39 | if isinstance(node, MetaCommandNode): 40 | return False 41 | if isinstance(node, ExpressionNode): 42 | if node.symbol in EXPR_TO_STRIP: 43 | return False 44 | if isinstance(node, GenericExpressionNode): 45 | if node.symbol.name in GEN_TO_STRIP: 46 | return False 47 | return True 48 | 49 | # entry point 50 | def main(): 51 | 52 | # create arg parser 53 | global_parser = argparse.ArgumentParser(description='SMTLIB 2.* problem merger.') 54 | global_parser.add_argument( 55 | 'files', 56 | nargs = '+', 57 | metavar = 'F', 58 | type = argparse.FileType('r'), 59 | help = 'input files' 60 | ) 61 | global_parser.add_argument( 62 | '--in-lang', 63 | '-i', 64 | dest = 'input_language', 65 | type = str, 66 | choices = LANGUAGES, 67 | default = SMT_25_STRING, 68 | help = 'input language (default: {})'.format(SMT_25_STRING) 69 | ) 70 | global_parser.add_argument( 71 | '--out-lang', 72 | '-o', 73 | dest = 'output_language', 74 | type = str, 75 | choices = LANGUAGES, 76 | default = SMT_25_STRING, 77 | help = 'output language (default: {})'.format(SMT_25_STRING) 78 | ) 79 | seed_group = global_parser.add_mutually_exclusive_group() 80 | seed_group.add_argument( 81 | '--seed', 82 | '-s', 83 | dest = 'seed', 84 | metavar = 'S', 85 | type = int, 86 | default = DEFAULT_SEED, 87 | help = 'seed for random number generator (default: {})'.format(DEFAULT_SEED) 88 | ) 89 | seed_group.add_argument( 90 | '--random', 91 | '-r', 92 | dest = 'random', 93 | action = 'store_true', 94 | default = DEFAULT_RANDOM, 95 | help = 'seed the random number generator with the current time (default: {})'.format(DEFAULT_RANDOM) 96 | ) 97 | 98 | # get subparsers 99 | subparsers = global_parser.add_subparsers(dest='merger', help='merger choice') 100 | subparsers.required = True 101 | 102 | # simple transformer 103 | simple_parser = subparsers.add_parser(SIMPLE, help='simple transformer') 104 | simple_parser.add_argument( 105 | '--rename', 106 | dest = 'rename_ids', 107 | action = 'store_true', 108 | default = DEFAULT_RENAME_IDS, 109 | help = 'Rename identifiers to avoid conflicts (default: {})'.format(DEFAULT_RENAME_IDS) 110 | ) 111 | 112 | # parse args 113 | args = global_parser.parse_args() 114 | 115 | # get the merger function based on args 116 | merger_name = args.merger 117 | merger = MERGERS[merger_name] 118 | 119 | # get general args 120 | files = args.files 121 | input_language = args.input_language 122 | output_language = args.output_language 123 | 124 | # seed the RNG 125 | if args.random is True: 126 | random.seed() 127 | else: 128 | random.seed(args.seed) 129 | 130 | # get args as a dict and pop general args 131 | merge_args = vars(args) 132 | merge_args.pop('files') 133 | merge_args.pop('input_language') 134 | merge_args.pop('output_language') 135 | merge_args.pop('seed') 136 | merge_args.pop('random') 137 | merge_args.pop('merger') 138 | 139 | # read input 140 | raw_in = [f.read() for f in files] 141 | 142 | # parse input 143 | try: 144 | asts = [parse(raw, input_language) for raw in raw_in] 145 | except ParsingError as e: 146 | print(e, file=sys.stderr) 147 | return 1 148 | 149 | # filter out suppressed expressions 150 | asts = [list(filter(should_keep, ast)) for ast in asts] 151 | # merge the two ASTs into a new AST 152 | merged = merger(asts, **merge_args) 153 | # add back the logic and get-sat 154 | merged = [smt_string_logic()] + merged + [smt_check_sat()] 155 | 156 | # transformers produce ASTs 157 | print(generate(merged, output_language)) 158 | 159 | if __name__ == '__main__': 160 | main() 161 | -------------------------------------------------------------------------------- /bin/stringstats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | Prints stats about problems. 5 | ''' 6 | 7 | import sys 8 | import argparse 9 | 10 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING 11 | from stringfuzz.parser import parse 12 | from stringfuzz.analyser import analyse 13 | from stringfuzz.ast import StringLitNode, ConcatNode 14 | 15 | def main(): 16 | 17 | # create arg parser 18 | parser = argparse.ArgumentParser(description='Analyse an SMT 2.* file.') 19 | parser.add_argument( 20 | 'file', 21 | nargs = '?', 22 | default = sys.stdin, 23 | type = argparse.FileType('r'), 24 | help = 'input file (default: stdin)' 25 | ) 26 | parser.add_argument( 27 | '--language', 28 | '-l', 29 | dest = 'language', 30 | type = str, 31 | choices = LANGUAGES, 32 | default = SMT_25_STRING, 33 | help = 'input language (default: {})'.format(SMT_25_STRING) 34 | ) 35 | 36 | # parse args 37 | args = parser.parse_args() 38 | 39 | # parse input 40 | try: 41 | expressions = parse(args.file.read(), args.language) 42 | 43 | # handle errors 44 | except IndexError as e: 45 | print(e, file=sys.stderr) 46 | return 1 47 | 48 | # get stats 49 | points, variables, literals = analyse(expressions) 50 | str_literals = [l for l in literals if isinstance(l, StringLitNode)] 51 | concat_points = [p for p in points if isinstance(p.expression, ConcatNode)] 52 | 53 | # compute stats 54 | if len(str_literals) > 1: 55 | avg_literal_length = sum(map(len, str_literals)) / len(str_literals) 56 | else: 57 | avg_literal_length = 0 58 | 59 | if len(points) > 1: 60 | max_depth = max(p.depth for p in points) 61 | else: 62 | max_depth = 0 63 | 64 | if len(concat_points) > 1: 65 | max_nesting = max(p.nesting for p in points if isinstance(p.expression, ConcatNode)) 66 | else: 67 | max_nesting = 0 68 | 69 | # print stats 70 | print('stats') 71 | print('=========') 72 | print('num. of expressions: ', len(points)) 73 | print('num. of variables: ', len(variables)) 74 | print('num. of literals: ', len(literals)) 75 | print('num. of string literals: ', len(str_literals)) 76 | print('avg. length of literals: ', '{:.4f}'.format(avg_literal_length)) 77 | print('max expression depth: ', max_depth) 78 | print('max concat nesting level: ', max_nesting) 79 | 80 | return 0 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /bin/tryparse: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | Try parsing all files in given problem lists, and print out the ones that failed. 5 | ''' 6 | 7 | import os 8 | import sys 9 | import multiprocessing as mp 10 | import time 11 | import datetime 12 | import traceback 13 | import queue 14 | import ctypes 15 | 16 | from collections import deque 17 | 18 | from stringfuzz.constants import SMT_20_STRING, SMT_25_STRING, LANGUAGES 19 | from stringfuzz.scanner import scan, ScanningError 20 | from stringfuzz.parser import parse_file, parse_tokens, ParsingError 21 | from stringfuzz.generator import generate, NotSupported 22 | 23 | # constants 24 | DEFAULT_NUM_WORKERS = 8 25 | BATCH_SIZE = 50 26 | MAX_HISTORY = 500 27 | ESC = '\033' 28 | BACK_ONE_LINE = ESC + '[1A' 29 | ERASE_LINE = ESC + '[2K' 30 | FRAME_DURATION = 0.05 # in seconds 31 | POISON_PILL = None 32 | 33 | SMT_25_PATTERNS = [ 34 | 'dumpCVC4', 35 | 'smt25', 36 | '/cvc4/', 37 | 'kaluza25', 38 | ] 39 | 40 | # globals 41 | io_lock = mp.Lock() 42 | 43 | # helpers 44 | def reset_cursor(): 45 | return BACK_ONE_LINE + ERASE_LINE 46 | 47 | def now(): 48 | return datetime.datetime.now() 49 | 50 | def sec2minsec(seconds): 51 | return (seconds // 60, seconds % 60) 52 | 53 | def show_failure(message): 54 | with io_lock: 55 | print(reset_cursor() + message + '\n') 56 | 57 | def show_progress(*args): 58 | with io_lock: 59 | print(*args, file=sys.stderr) 60 | 61 | def is_smt25(file_path): 62 | return any(pattern in file_path for pattern in SMT_25_PATTERNS) 63 | 64 | # functions 65 | def consumer(q, num_done, i, crash): 66 | 67 | # run forever 68 | while True: 69 | 70 | # try to get a task 71 | batch = q.get() 72 | 73 | # stop running if got poison pill 74 | if batch is POISON_PILL: 75 | break 76 | 77 | # go through problem batch 78 | for problem_path in batch: 79 | 80 | try: 81 | parse_problem(problem_path) 82 | 83 | # on uncaught exceptions 84 | except Exception as e: 85 | 86 | # print stack trace 87 | with io_lock: 88 | traceback.print_exc() 89 | 90 | # signal crash to parent process 91 | with crash.get_lock(): 92 | crash.value = 1 93 | 94 | # signal completion 95 | with num_done.get_lock(): 96 | num_done.value += 1 97 | 98 | def parse_problem(input_path): 99 | 100 | # get start time 101 | start_time = now() 102 | 103 | # figure out input language 104 | if is_smt25(input_path): 105 | language = SMT_25_STRING 106 | else: 107 | language = SMT_20_STRING 108 | 109 | # read in file 110 | with open(input_path, 'r') as file: 111 | text = file.read() 112 | 113 | # try to scan 114 | try: 115 | tokens = scan(text, language) 116 | except ScanningError as e: 117 | show_failure('{language:<5} failed scanning {problem}\n{error}'.format( 118 | path = input_path, 119 | language = language, 120 | error = e 121 | )) 122 | 123 | # if scanned, try to parse 124 | else: 125 | try: 126 | expressions = parse_tokens(tokens, language, text) 127 | except ParsingError as e: 128 | show_failure('{language:<5} failed parsing {path}\n{error}'.format( 129 | path = input_path, 130 | language = language, 131 | error = e 132 | )) 133 | 134 | # if parsed, try to generate 135 | else: 136 | for output_language in [SMT_25_STRING, SMT_20_STRING]: 137 | try: 138 | translated = generate(expressions, output_language) 139 | except NotSupported as e: 140 | show_failure('{language:<5} failed generating {path}\n{error}'.format( 141 | path = input_path, 142 | language = language, 143 | error = e 144 | )) 145 | 146 | # measure run time 147 | run_time = now() - start_time 148 | 149 | def usage(): 150 | print('Usage', sys.argv[0], 'problem_list [problem_list [...]]', file=sys.stderr) 151 | 152 | def add_record(history, last_sample, current_sample): 153 | 154 | # create new record 155 | new_record = current_sample - last_sample 156 | 157 | # discard last record if needed 158 | if len(history) >= MAX_HISTORY: 159 | history.popleft() 160 | 161 | history.append(new_record) 162 | 163 | def print_status(history, done_so_far, num_problems): 164 | 165 | # get rate 166 | history_size = len(history) 167 | history_problems = sum(history) 168 | history_time = history_size * FRAME_DURATION 169 | 170 | if history_problems > 0: 171 | sec_per_problem = history_time / history_problems 172 | else: 173 | sec_per_problem = FRAME_DURATION 174 | 175 | # calculate progress 176 | num_left = num_problems - done_so_far 177 | percent_done = (float(done_so_far) / float(num_problems)) * 100.0 178 | time_left = int(float(sec_per_problem) * float(num_left)) 179 | min_left, sec_left = sec2minsec(time_left) 180 | 181 | # format progress 182 | seconds_progress = '{:.0f}s'.format(sec_left) 183 | minutes_progress = '{:.0f}m'.format(min_left) 184 | time_progress = seconds_progress 185 | 186 | if min_left > 0: 187 | time_progress = minutes_progress + ' ' + time_progress 188 | 189 | progress = '{} / {} ({:.2f}%) done; {} left ({:.6f} s per)'.format( 190 | done_so_far, 191 | num_problems, 192 | percent_done, 193 | time_progress, 194 | sec_per_problem 195 | ) 196 | 197 | # show progress 198 | show_progress(reset_cursor() + progress) 199 | 200 | def main(): 201 | 202 | # record start time 203 | start_time = now() 204 | 205 | # get args 206 | list_paths = sys.argv[1:] 207 | 208 | # check args 209 | if len(list_paths) < 1: 210 | usage() 211 | exit(1) 212 | 213 | # read input lists 214 | problems = [] 215 | for list_path in list_paths: 216 | with open(list_path, 'r') as list_file: 217 | problems += [line.strip() for line in list_file.readlines()] 218 | num_problems = len(problems) 219 | 220 | # create shared values 221 | num_done = mp.Value(ctypes.c_ulong, 0) 222 | crash = mp.Value(ctypes.c_bool, 0) 223 | q = mp.Queue() 224 | 225 | # populate queue 226 | for i in range(0, num_problems, BATCH_SIZE): 227 | batch = problems[i:i + BATCH_SIZE] 228 | q.put_nowait(batch) 229 | 230 | # calculate number of workers 231 | num_workers = os.cpu_count() 232 | if num_workers is None: 233 | num_workers = DEFAULT_NUM_WORKERS 234 | 235 | # add poison pills to the end of the queue 236 | for i in range(0, num_workers): 237 | q.put_nowait(POISON_PILL) 238 | 239 | # create workers 240 | workers = [mp.Process(target=consumer, args=(q, num_done, i, crash)) for i in range(num_workers)] 241 | 242 | # start workers 243 | for worker in workers: 244 | worker.start() 245 | 246 | # print newline to start update line 247 | show_progress('') 248 | 249 | # set up bookkeeping 250 | last_sample = num_done.value 251 | history = deque() 252 | 253 | # run until done 254 | while num_done.value < num_problems: 255 | 256 | # sample number of done problems 257 | current_sample = num_done.value 258 | add_record(history, last_sample, current_sample) 259 | 260 | # print status 261 | print_status(history, current_sample, num_problems) 262 | 263 | # update bookkeeping 264 | last_sample = current_sample 265 | 266 | # sleep for a frame 267 | time.sleep(FRAME_DURATION) 268 | 269 | # check for crash condition 270 | if crash.value != 0: 271 | 272 | # terminate workers 273 | for worker in workers: 274 | worker.terminate() 275 | 276 | exit(1) 277 | 278 | # wait for the workers to finish 279 | for worker in workers: 280 | worker.join() 281 | 282 | # print final results 283 | end_time = now() 284 | run_time = end_time - start_time 285 | min_total, sec_total = sec2minsec(run_time.seconds) 286 | sec_per_problem = run_time.seconds / num_done.value 287 | show_progress('finished in {}m {}s, {:.4f}s per run'.format(min_total, sec_total, sec_per_problem)) 288 | 289 | if __name__ == '__main__': 290 | main() 291 | -------------------------------------------------------------------------------- /bin/unprintable: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ''' 4 | Obsolete 'unprintable' transformer. Kept for reference. 5 | ''' 6 | 7 | import sys 8 | import re 9 | import random 10 | 11 | # constants 12 | EXCLUDED = map(ord, '\n\t\x00') 13 | UNPRINTABLE = [i for i in range(32) if i not in EXCLUDED] 14 | 15 | LITERAL_PATTERN = r'"((?:[^"]|"")*)"' 16 | 17 | # globals 18 | unprintable_chars = None 19 | 20 | # functions 21 | def _gen_unprintable(): 22 | while True: 23 | yield random.choice(UNPRINTABLE) 24 | 25 | def get_char(): 26 | global unprintable_chars 27 | char = next(unprintable_chars) 28 | return '\\x{:0>2x}'.format(char) 29 | 30 | def get_string(n): 31 | return ''.join(get_char() for i in range(n)) 32 | 33 | def replace_unprintable(match): 34 | string = match.group(1) 35 | replacement = '"' + get_string(len(string)) + '"' 36 | return replacement 37 | 38 | def main(): 39 | 40 | global unprintable_chars 41 | 42 | # create generators 43 | unprintable_chars = _gen_unprintable() 44 | 45 | # process input 46 | program = sys.stdin.read() 47 | program = re.sub(LITERAL_PATTERN, replace_unprintable, program) 48 | sys.stdout.write(program) 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | 5 | from setuptools import setup, find_packages 6 | 7 | setup( 8 | name = 'stringfuzz', 9 | version = '0.1', 10 | description = 'Fuzzer for SMTLIB 2.x solvers.', 11 | author = 'Dmitry Blotsky, Federico Mora', 12 | author_email = 'dmitry.blotsky@gmail.com, fmora@cs.toronto.edu', 13 | url = 'https://github.com/dblotsky/stringfuzz', 14 | scripts = [ 15 | 'bin/stringfuzzx', 16 | 'bin/stringfuzzg', 17 | 'bin/stringstats', 18 | 'bin/stringmerge', 19 | 'bin/stringbreak' 20 | ], 21 | packages = find_packages(), 22 | package_dir = { 23 | 'stringfuzz': 'stringfuzz', 24 | }, 25 | ) 26 | -------------------------------------------------------------------------------- /stringfuzz/__init__.py: -------------------------------------------------------------------------------- 1 | from stringfuzz.scanner import ALPHABET, WHITESPACE 2 | 3 | ALL_CHARS = ALPHABET + WHITESPACE -------------------------------------------------------------------------------- /stringfuzz/analyser.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from collections import namedtuple 4 | from stringfuzz.ast_walker import ASTWalker 5 | 6 | __all__ = [ 7 | 'analyse', 8 | ] 9 | 10 | ZERO_DEPTH = 1 11 | 12 | # NOTE: 13 | # depth - depth in tree 14 | # nesting - nesting of the same expression in tree 15 | Point = namedtuple('Point', ('expression', 'parent', 'depth', 'nesting')) 16 | 17 | class StatsWalker(ASTWalker): 18 | 19 | def __init__(self, ast): 20 | super().__init__(ast) 21 | 22 | # bookkeeping 23 | self.expr_stack = [] 24 | self.point_stack = [] 25 | self.nesting_stack = [] 26 | 27 | self.depth = ZERO_DEPTH 28 | 29 | # results 30 | self.points = [] 31 | self.variables = set() 32 | self.literals = [] 33 | 34 | def make_point(self, expression): 35 | return Point( 36 | expression = expression, 37 | parent = self.parent, 38 | depth = self.depth, 39 | nesting = self.nesting, 40 | ) 41 | 42 | @property 43 | def expression(self): 44 | assert len(self.expr_stack) > 0 45 | return self.expr_stack[-1] 46 | 47 | @property 48 | def point(self): 49 | assert len(self.point_stack) > 0 50 | return self.point_stack[-1] 51 | 52 | @property 53 | def parent(self): 54 | if len(self.expr_stack) > 1: 55 | return self.expr_stack[-2] 56 | return None 57 | 58 | @property 59 | def nesting(self): 60 | assert len(self.nesting_stack) > 0 61 | return self.nesting_stack[-1] 62 | 63 | def enter_expression(self, expression, parent): 64 | 65 | # push nesting if we're at least one expression deep 66 | if self.depth > 1: 67 | 68 | if self.expression.symbol == expression.symbol: 69 | new_nesting = self.nesting + 1 70 | else: 71 | new_nesting = ZERO_DEPTH 72 | 73 | self.nesting_stack.append(new_nesting) 74 | 75 | # otherwise, start off with no nesting 76 | else: 77 | self.nesting_stack.append(ZERO_DEPTH) 78 | 79 | # create a new point 80 | point = self.make_point(expression) 81 | self.points.append(point) 82 | 83 | # push point and expression 84 | self.point_stack.append(point) 85 | self.expr_stack.append(expression) 86 | 87 | # increase depth 88 | self.depth += 1 89 | 90 | def exit_expression(self, expression, parent): 91 | 92 | # decrease depth 93 | self.depth -= 1 94 | 95 | # pop all stacks 96 | self.point_stack.pop() 97 | self.expr_stack.pop() 98 | self.nesting_stack.pop() 99 | 100 | def enter_literal(self, literal, parent): 101 | assert self.point is not None 102 | self.literals.append(literal) 103 | 104 | def enter_identifier(self, variable, parent): 105 | assert self.point is not None 106 | self.variables.add(variable.name) 107 | 108 | def analyse(ast): 109 | walker = StatsWalker(ast) 110 | walker.walk() 111 | return walker.points, walker.variables, walker.literals 112 | -------------------------------------------------------------------------------- /stringfuzz/ast.py: -------------------------------------------------------------------------------- 1 | import string 2 | import numbers 3 | 4 | ''' 5 | The AST is a list of ASTNodes. 6 | ''' 7 | 8 | __all__ = [ 9 | 'STRING_SORT', 10 | 'INT_SORT', 11 | 'BOOL_SORT', 12 | 'REGEX_SORT', 13 | 'UNIT_SORT', 14 | 'ANY_SORT', 15 | 'DECLARABLE_SORTS', 16 | 17 | 'LiteralNode', 18 | 'BoolLitNode', 19 | 'IntLitNode', 20 | 'StringLitNode', 21 | 'AtomicSortNode', 22 | 'CompoundSortNode', 23 | 'SettingNode', 24 | 'MetaDataNode', 25 | 'IdentifierNode', 26 | 'FunctionDeclarationNode', 27 | 'FunctionDefinitionNode', 28 | 'ConstantDeclarationNode', 29 | 'SortedVarNode', 30 | 'BracketsNode', 31 | 'ExpressionNode', 32 | 'GenericExpressionNode', 33 | 'MetaCommandNode', 34 | 'AssertNode', 35 | 'CheckSatNode', 36 | 'GetModelNode', 37 | 'AndNode', 38 | 'OrNode', 39 | 'NotNode', 40 | 'EqualNode', 41 | 'GtNode', 42 | 'LtNode', 43 | 'GteNode', 44 | 'LteNode', 45 | 'ConcatNode', 46 | 'ContainsNode', 47 | 'AtNode', 48 | 'LengthNode', 49 | 'IndexOfNode', 50 | 'IndexOf2Node', 51 | 'PrefixOfNode', 52 | 'SuffixOfNode', 53 | 'StringReplaceNode', 54 | 'SubstringNode', 55 | 'FromIntNode', 56 | 'ToIntNode', 57 | 'InReNode', 58 | 'StrToReNode', 59 | 'ReConcatNode', 60 | 'ReStarNode', 61 | 'RePlusNode', 62 | 'ReRangeNode', 63 | 'ReUnionNode', 64 | 'ReInterNode', 65 | 'ReAllCharNode', 66 | ] 67 | 68 | # constants 69 | STRING_SORT = 'String' 70 | INT_SORT = 'Int' 71 | BOOL_SORT = 'Bool' 72 | REGEX_SORT = 'Regex' 73 | UNIT_SORT = 'Unit' 74 | ANY_SORT = '*' 75 | 76 | UNIT_SIGNATURE = [] 77 | UNCHECKED_SIGNATURE = None 78 | 79 | DECLARABLE_SORTS = [ 80 | STRING_SORT, 81 | INT_SORT, 82 | BOOL_SORT, 83 | ] 84 | 85 | SORT_TYPE = str 86 | SIGNATURE_TYPE = list 87 | 88 | # helpers 89 | def with_spaces(terms): 90 | return ' '.join(map(repr, terms)) 91 | 92 | # data structures 93 | class _ASTNode(object): 94 | def __eq__(self, other): 95 | return repr(self) == repr(other) 96 | 97 | def __hash__(self): 98 | return hash(repr(self)) 99 | 100 | # "atoms" 101 | class SortNode(_ASTNode): 102 | pass 103 | 104 | class AtomicSortNode(SortNode): 105 | def __init__(self, name): 106 | self.name = name 107 | 108 | def __repr__(self): 109 | return 'Sort<{}>'.format(self.name) 110 | 111 | class CompoundSortNode(SortNode): 112 | def __init__(self, constructor, sorts): 113 | self.constructor = constructor 114 | self.sorts = sorts 115 | 116 | def __repr__(self): 117 | return 'Sort<{} {}>'.format(self.symbol, with_spaces(self.sorts)) 118 | 119 | class SettingNode(_ASTNode): 120 | def __init__(self, name): 121 | self.name = name 122 | 123 | def __repr__(self): 124 | return 'Setting<{}>'.format(self.name) 125 | 126 | class MetaDataNode(_ASTNode): 127 | def __init__(self, value): 128 | self.value = value 129 | 130 | def __repr__(self): 131 | return 'MetaData<{}>'.format(self.value) 132 | 133 | class IdentifierNode(_ASTNode): 134 | def __init__(self, name): 135 | self.name = name 136 | 137 | def __repr__(self): 138 | return 'Id<{}>'.format(self.name) 139 | 140 | class SortedVarNode(_ASTNode): 141 | def __init__(self, var_name, var_sort): 142 | self.var_name = var_name 143 | self.var_sort = var_sort 144 | 145 | def __repr__(self): 146 | return 'Decl<{} {}>'.format(self.var_name, self.var_sort) 147 | 148 | class ReAllCharNode(_ASTNode): 149 | def __repr__(self): 150 | return 'ReAllChar<.>' 151 | 152 | class BracketsNode(_ASTNode): 153 | def __init__(self, body): 154 | self.body = body 155 | 156 | def __repr__(self): 157 | return '({})'.format(with_spaces(self.body)) 158 | 159 | # NOTE: 160 | # sort-wise, we're treating everything as a function; even literals 161 | class _SortedASTNode(_ASTNode): 162 | _signature = NotImplemented 163 | _sort = NotImplemented 164 | 165 | def __init__(self): 166 | assert isinstance(self._sort, SORT_TYPE) 167 | assert self._signature == UNCHECKED_SIGNATURE or isinstance(self._signature, SIGNATURE_TYPE) 168 | 169 | @classmethod 170 | def get_signature(cls): 171 | return cls._signature 172 | 173 | @classmethod 174 | def get_sort(cls): 175 | return cls._sort 176 | 177 | @classmethod 178 | def is_terminal(cls): 179 | return cls._signature == UNIT_SIGNATURE 180 | 181 | @classmethod 182 | def accepts(cls, sort): 183 | if cls._signature == UNCHECKED_SIGNATURE: 184 | return False 185 | return sort in cls._signature 186 | 187 | @classmethod 188 | def returns(cls, sort): 189 | return sort == cls._sort 190 | 191 | # literals 192 | class LiteralNode(_SortedASTNode): 193 | _signature = UNIT_SIGNATURE 194 | 195 | def __init__(self, value): 196 | super().__init__() 197 | self.value = value 198 | 199 | def __repr__(self): 200 | return '{}<{}>'.format(self.get_sort(), self.value) 201 | 202 | class BoolLitNode(LiteralNode): 203 | _sort = BOOL_SORT 204 | 205 | def __init__(self, value): 206 | assert isinstance(value, bool) 207 | super().__init__(value) 208 | 209 | class IntLitNode(LiteralNode): 210 | _sort = INT_SORT 211 | 212 | def __init__(self, value): 213 | assert isinstance(value, numbers.Real) and not isinstance(value, bool) 214 | super().__init__(value) 215 | 216 | class StringLitNode(LiteralNode): 217 | _sort = STRING_SORT 218 | 219 | def __init__(self, value): 220 | assert isinstance(value, str) 221 | super().__init__(value) 222 | 223 | def __len__(self): 224 | return len(self.value) 225 | 226 | # expressions 227 | class ExpressionNode(_ASTNode): 228 | _symbol = NotImplemented 229 | 230 | def __init__(self, body): 231 | if isinstance(self._symbol, str): 232 | self._symbol = IdentifierNode(self._symbol) 233 | self.body = body 234 | 235 | @property 236 | def symbol(self): 237 | return self._symbol 238 | 239 | def __repr__(self): 240 | return '(\'{}\' {})'.format(self.symbol, with_spaces(self.body)) 241 | 242 | class _SortedExpressionNode(ExpressionNode, _SortedASTNode): 243 | def __init__(self, body): 244 | # TODO: 245 | # enforce that the arguments are of correct types 246 | _SortedASTNode.__init__(self) 247 | ExpressionNode.__init__(self, body) 248 | 249 | class _NullaryExpression(_SortedExpressionNode): 250 | def __init__(self): 251 | super().__init__([]) 252 | 253 | class _UnaryExpression(_SortedExpressionNode): 254 | def __init__(self, a): 255 | super().__init__([a]) 256 | 257 | class _BinaryExpression(_SortedExpressionNode): 258 | def __init__(self, a, b): 259 | super().__init__([a, b]) 260 | 261 | class _TernaryExpression(_SortedExpressionNode): 262 | def __init__(self, a, b, c): 263 | super().__init__([a, b, c]) 264 | 265 | class _QuaternaryExpression(_SortedExpressionNode): 266 | def __init__(self, a, b, c, d): 267 | super().__init__([a, b, c, d]) 268 | 269 | class _NaryExpression(_SortedExpressionNode): 270 | def __init__(self, *args): 271 | super().__init__(list(args)) 272 | 273 | class _RelationExpressionNode(_BinaryExpression): 274 | _signature = [INT_SORT, INT_SORT] 275 | _sort = BOOL_SORT 276 | 277 | class GenericExpressionNode(_NaryExpression): 278 | _signature = UNCHECKED_SIGNATURE 279 | _sort = UNIT_SORT 280 | 281 | def __init__(self, symbol, *args): 282 | self._symbol = symbol 283 | super().__init__(*args) 284 | 285 | # commands 286 | class _CommandNode(_SortedASTNode): 287 | _sort = UNIT_SORT 288 | 289 | class MetaCommandNode(_CommandNode, _NaryExpression): 290 | _signature = UNCHECKED_SIGNATURE 291 | 292 | def __init__(self, symbol, *args): 293 | self._symbol = symbol 294 | super().__init__(*args) 295 | 296 | class AssertNode(_CommandNode, _UnaryExpression): 297 | _signature = [BOOL_SORT] 298 | _symbol = 'assert' 299 | 300 | class CheckSatNode(_CommandNode, _NullaryExpression): 301 | _signature = UNIT_SIGNATURE 302 | _symbol = 'check-sat' 303 | 304 | class GetModelNode(_CommandNode, _NullaryExpression): 305 | _signature = UNIT_SIGNATURE 306 | _symbol = 'get-model' 307 | 308 | class FunctionDeclarationNode(_CommandNode, _TernaryExpression): 309 | _signature = UNCHECKED_SIGNATURE 310 | _symbol = 'declare-fun' 311 | 312 | class FunctionDefinitionNode(_CommandNode, _QuaternaryExpression): 313 | _signature = UNCHECKED_SIGNATURE 314 | _symbol = 'define-fun' 315 | 316 | class ConstantDeclarationNode(_CommandNode, _BinaryExpression): 317 | _signature = UNCHECKED_SIGNATURE 318 | _symbol = 'declare-const' 319 | 320 | # boolean expressions 321 | class AndNode(_BinaryExpression): 322 | _signature = [BOOL_SORT, BOOL_SORT] 323 | _sort = BOOL_SORT 324 | _symbol = 'and' 325 | 326 | class OrNode(_BinaryExpression): 327 | _signature = [BOOL_SORT, BOOL_SORT] 328 | _sort = BOOL_SORT 329 | _symbol = 'or' 330 | 331 | class NotNode(_UnaryExpression): 332 | _signature = [BOOL_SORT] 333 | _sort = BOOL_SORT 334 | _symbol = 'not' 335 | 336 | # relations 337 | class EqualNode(_RelationExpressionNode): 338 | _signature = [ANY_SORT, ANY_SORT] 339 | _symbol = '=' 340 | 341 | class GtNode(_RelationExpressionNode): 342 | _symbol = '>' 343 | 344 | class LtNode(_RelationExpressionNode): 345 | _symbol = '<' 346 | 347 | class GteNode(_RelationExpressionNode): 348 | 349 | _symbol = '>=' 350 | 351 | class LteNode(_RelationExpressionNode): 352 | 353 | _symbol = '<=' 354 | 355 | # functions 356 | class ConcatNode(_BinaryExpression): 357 | _signature = [STRING_SORT, STRING_SORT] 358 | _sort = STRING_SORT 359 | _symbol = 'Concat' 360 | 361 | class ContainsNode(_BinaryExpression): 362 | _signature = [STRING_SORT, STRING_SORT] 363 | _sort = BOOL_SORT 364 | _symbol = 'Contains' 365 | 366 | class AtNode(_BinaryExpression): 367 | _signature = [STRING_SORT, INT_SORT] 368 | _sort = STRING_SORT 369 | _symbol = 'At' 370 | 371 | class LengthNode(_UnaryExpression): 372 | _signature = [STRING_SORT] 373 | _sort = INT_SORT 374 | _symbol = 'Length' 375 | 376 | class IndexOfNode(_BinaryExpression): 377 | _signature = [STRING_SORT, STRING_SORT] 378 | _sort = INT_SORT 379 | _symbol = 'IndexOf' 380 | 381 | class IndexOf2Node(_TernaryExpression): 382 | _signature = [STRING_SORT, STRING_SORT, INT_SORT] 383 | _sort = INT_SORT 384 | _symbol = 'IndexOf2' 385 | 386 | class PrefixOfNode(_BinaryExpression): 387 | _signature = [STRING_SORT, STRING_SORT] 388 | _sort = BOOL_SORT 389 | _symbol = 'PrefixOf' 390 | 391 | class SuffixOfNode(_BinaryExpression): 392 | _signature = [STRING_SORT, STRING_SORT] 393 | _sort = BOOL_SORT 394 | _symbol = 'SuffixOf' 395 | 396 | class StringReplaceNode(_TernaryExpression): 397 | _signature = [STRING_SORT, STRING_SORT, STRING_SORT] 398 | _sort = STRING_SORT 399 | _symbol = 'Replace' 400 | 401 | class SubstringNode(_TernaryExpression): 402 | _signature = [STRING_SORT, INT_SORT, INT_SORT] 403 | _sort = STRING_SORT 404 | _symbol = 'Substring' 405 | 406 | class FromIntNode(_UnaryExpression): 407 | _signature = [INT_SORT] 408 | _sort = STRING_SORT 409 | _symbol = 'FromInt' 410 | 411 | class ToIntNode(_UnaryExpression): 412 | _signature = [STRING_SORT] 413 | _sort = INT_SORT 414 | _symbol = 'ToInt' 415 | 416 | class InReNode(_BinaryExpression): 417 | _signature = [STRING_SORT, REGEX_SORT] 418 | _sort = BOOL_SORT 419 | _symbol = 'InRegex' 420 | 421 | class StrToReNode(_UnaryExpression): 422 | _signature = [STRING_SORT] 423 | _sort = REGEX_SORT 424 | _symbol = 'Str2Re' 425 | 426 | class ReConcatNode(_BinaryExpression): 427 | _signature = [REGEX_SORT, REGEX_SORT] 428 | _sort = REGEX_SORT 429 | _symbol = 'ReConcat' 430 | 431 | class ReStarNode(_UnaryExpression): 432 | _signature = [REGEX_SORT] 433 | _sort = REGEX_SORT 434 | _symbol = 'ReStar' 435 | 436 | class RePlusNode(_UnaryExpression): 437 | _signature = [REGEX_SORT] 438 | _sort = REGEX_SORT 439 | _symbol = 'RePlus' 440 | 441 | class ReRangeNode(_BinaryExpression): 442 | _signature = [STRING_SORT, STRING_SORT] 443 | _sort = REGEX_SORT 444 | _symbol = 'ReRange' 445 | 446 | def __init__(self, a, b): 447 | # TODO: 448 | # assert that arguments are literals 449 | super().__init__(a, b) 450 | 451 | class ReUnionNode(_BinaryExpression): 452 | _signature = [REGEX_SORT, REGEX_SORT] 453 | _sort = REGEX_SORT 454 | _symbol = 'ReUnion' 455 | 456 | class ReInterNode(_BinaryExpression): 457 | _signature = [REGEX_SORT, REGEX_SORT] 458 | _sort = REGEX_SORT 459 | _symbol = 'ReInter' 460 | -------------------------------------------------------------------------------- /stringfuzz/ast_walker.py: -------------------------------------------------------------------------------- 1 | from stringfuzz.ast import * 2 | 3 | __all__ = [ 4 | 'ASTWalker' 5 | ] 6 | 7 | class ASTWalker(object): 8 | 9 | def __init__(self, ast): 10 | super().__init__() 11 | self.__ast = ast 12 | 13 | # public API 14 | def walk(self): 15 | for expression in self.__ast: 16 | self.walk_expression(expression, None) 17 | 18 | return self.__ast 19 | 20 | # walks 21 | def walk_expression(self, expression, parent): 22 | 23 | self.enter_expression(expression, parent) 24 | 25 | for sub_expression in expression.body: 26 | if isinstance(sub_expression, ExpressionNode): 27 | self.walk_expression(sub_expression, expression) 28 | 29 | if isinstance(sub_expression, IdentifierNode): 30 | self.walk_identifier(sub_expression, expression) 31 | 32 | if isinstance(sub_expression, LiteralNode): 33 | self.walk_literal(sub_expression, expression) 34 | 35 | self.exit_expression(expression, parent) 36 | 37 | def walk_literal(self, literal, parent): 38 | self.enter_literal(literal, parent) 39 | self.exit_literal(literal, parent) 40 | 41 | def walk_identifier(self, identifier, parent): 42 | self.enter_identifier(identifier, parent) 43 | self.exit_identifier(identifier, parent) 44 | 45 | # enters/exits 46 | def enter_expression(self, expression, parent): 47 | pass 48 | 49 | def exit_expression(self, expression, parent): 50 | pass 51 | 52 | def enter_literal(self, literal, parent): 53 | pass 54 | 55 | def exit_literal(self, literal, parent): 56 | pass 57 | 58 | def enter_identifier(self, identifier, parent): 59 | pass 60 | 61 | def exit_identifier(self, identifier, parent): 62 | pass 63 | 64 | -------------------------------------------------------------------------------- /stringfuzz/constants.py: -------------------------------------------------------------------------------- 1 | SMT_20 = 'smt2' 2 | SMT_20_STRING = 'smt20' 3 | SMT_25_STRING = 'smt25' 4 | 5 | LANGUAGES = [ 6 | SMT_20, 7 | SMT_20_STRING, 8 | SMT_25_STRING, 9 | ] 10 | -------------------------------------------------------------------------------- /stringfuzz/fuzzers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dblotsky/stringfuzz/5507894ed5d94ed36098753357d33adee182b298/stringfuzz/fuzzers/__init__.py -------------------------------------------------------------------------------- /stringfuzz/fuzzers/genetic.py: -------------------------------------------------------------------------------- 1 | import random 2 | import os 3 | import sys 4 | import subprocess 5 | import threading 6 | import datetime 7 | import signal 8 | import statistics 9 | 10 | from heapq import heappush, heappop 11 | 12 | from stringfuzz.transformers import fuzz, graft 13 | from stringfuzz.generators import random_ast 14 | from stringfuzz.generator import generate 15 | from stringfuzz.ast import AssertNode, CheckSatNode 16 | from stringfuzz.util import coin_toss 17 | 18 | __all__ = [ 19 | 'simulate' 20 | ] 21 | 22 | # constants 23 | DEFAULT_MUTATION_ROUNDS = 4 24 | DEFAULT_TIMEOUT = 5 25 | MAX_NUM_ASSERTS = 20 26 | NUM_RUNS = 8 27 | 28 | # globals 29 | _language = None 30 | _timeout = DEFAULT_TIMEOUT 31 | 32 | # helpers 33 | def mutate_fuzz(ast): 34 | return ast 35 | # return fuzz(ast, skip_re_range=False) 36 | 37 | def decompose(ast): 38 | head = [] 39 | asserts = [] 40 | tail = [] 41 | for e in ast: 42 | if isinstance(e, AssertNode): 43 | asserts.append(e) 44 | elif isinstance(e, CheckSatNode): 45 | tail.append(e) 46 | else: 47 | head.append(e) 48 | return head, asserts, tail 49 | 50 | def mutate_add(ast): 51 | 52 | if len(ast) >= MAX_NUM_ASSERTS: 53 | return ast 54 | 55 | # decompose existing AST 56 | head, asserts, tail = decompose(ast) 57 | 58 | # create random AST with one assert 59 | new_ast = random_ast( 60 | num_vars = 1, 61 | num_asserts = 1, 62 | depth = 5, 63 | max_terms = 5, 64 | max_str_lit_length = 10, 65 | max_int_lit = 30, 66 | literal_probability = 0.5, 67 | semantically_valid = True 68 | ) 69 | 70 | # isolate just the new assert 71 | _, new_asserts, _ = decompose(new_ast) 72 | 73 | # return with the new asserts added 74 | return head + asserts + new_asserts + tail 75 | 76 | def mutate_pop(ast): 77 | head, asserts, tail = decompose(ast) 78 | return head + asserts[:-1] + tail 79 | 80 | def mutate_graft(ast): 81 | return ast 82 | # return graft(ast, skip_str_to_re=False) 83 | 84 | def mutate(ast): 85 | choice = random.randint(1, 4) 86 | 87 | if choice == 1: 88 | return mutate_fuzz(ast) 89 | 90 | if choice == 2: 91 | return mutate_pop(ast) 92 | 93 | if choice == 3: 94 | return mutate_add(ast) 95 | 96 | if choice == 4: 97 | return mutate_graft(ast) 98 | 99 | def vegetative_mate(parent, num_mutation_rounds=DEFAULT_MUTATION_ROUNDS): 100 | child = parent 101 | for i in range(num_mutation_rounds): 102 | child = mutate(child) 103 | return child 104 | 105 | def mate(parents): 106 | return vegetative_mate(random.choice(parents)) 107 | 108 | def time_solver(command, problem, timeout, verbose=False, debug=False): 109 | 110 | # print command that will be run 111 | if verbose is True or debug is True: 112 | print('RUNNING:', repr(command), file=sys.stderr) 113 | 114 | # get start time 115 | start = datetime.datetime.now().timestamp() 116 | 117 | # run command 118 | process = subprocess.Popen( 119 | command, 120 | shell = True, 121 | stdin = subprocess.PIPE, 122 | stdout = subprocess.PIPE, 123 | stderr = subprocess.PIPE, 124 | preexec_fn = os.setsid, 125 | universal_newlines = True 126 | ) 127 | 128 | # feed it the problem and wait for it to complete 129 | try: 130 | stdout, stderr = process.communicate(input=problem, timeout=timeout) 131 | 132 | # if it times out ... 133 | except subprocess.TimeoutExpired as e: 134 | 135 | # if verbose is True: 136 | print('TIMED OUT:', repr(command), '... killing', process.pid, file=sys.stderr) 137 | 138 | # kill it 139 | os.killpg(os.getpgid(process.pid), signal.SIGINT) 140 | 141 | # set timeout result 142 | elapsed = timeout 143 | 144 | # print output 145 | # if verbose is True: 146 | print('STDOUT:', process.stdout.read(), file=sys.stderr, end='') 147 | print('STDERR:', process.stderr.read(), file=sys.stderr, end='') 148 | 149 | # if it completes in time ... 150 | else: 151 | 152 | # measure run time 153 | end = datetime.datetime.now().timestamp() 154 | elapsed = end - start 155 | 156 | if stderr != '': 157 | print('STDERR IS NOT EMPTY!:', stderr, file=sys.stderr, end='') 158 | print('PROBLEM: \n', problem, file=sys.stderr, end='') 159 | 160 | # print output 161 | if debug is True: 162 | print('STDOUT:', stdout, file=sys.stderr, end='') 163 | print('STDERR:', stderr, file=sys.stderr, end='') 164 | 165 | return elapsed 166 | 167 | def reproduce(survivors, world_size): 168 | 169 | # create offspring 170 | num_offspring = world_size - len(survivors) 171 | offspring = [mate(survivors) for i in range(num_offspring)] 172 | 173 | # return new population 174 | new_population = survivors + offspring 175 | return new_population 176 | 177 | def generate_problem(problem): 178 | global _language 179 | return generate(problem, _language) 180 | 181 | def normalise(bottom, top, value): 182 | width = top - bottom 183 | return value / width 184 | 185 | def time_in_thread(index, times, **kwargs): 186 | time = time_solver(**kwargs) 187 | times[index] = time 188 | 189 | def get_score(organism, saint_peter): 190 | global _timeout 191 | 192 | # get average run time 193 | times = [0 for i in range(NUM_RUNS)] 194 | threads = [] 195 | for i in range(NUM_RUNS): 196 | thread = threading.Thread( 197 | target = time_in_thread, 198 | args = (i, times), 199 | kwargs = { 200 | 'command': saint_peter, 201 | 'timeout': _timeout, 202 | 'problem': generate_problem(organism) 203 | } 204 | ) 205 | threads.append(thread) 206 | 207 | # run experiments in parallel 208 | for thread in threads: 209 | thread.start() 210 | 211 | for thread in threads: 212 | thread.join() 213 | 214 | # return median run time 215 | score = statistics.median(times) 216 | return score 217 | 218 | def judge(population, saint_peter): 219 | for organism in population: 220 | yield get_score(organism, saint_peter) 221 | 222 | def cull(population, scores): 223 | 224 | # annotate specimens with their scores 225 | global _timeout 226 | indices = range(len(population)) 227 | annotated = zip([(_timeout - s) for s in scores], indices) 228 | 229 | # create a min-heap out of annotated specimens 230 | heap = [] 231 | for entry in annotated: 232 | heappush(heap, entry) 233 | 234 | # get best specimens 235 | print('population', ' '.join(['p[{i}]={s}'.format(s=len(e), i=i) for i, e in enumerate(population)])) 236 | best_entries = [heappop(heap) for i in range(3)] 237 | print('best entries', best_entries) 238 | best_indices = [entry[1] for entry in best_entries] 239 | print('best indices', best_indices) 240 | best = [population[i] for i in best_indices] 241 | print('best:', ' '.join(['p[{i}]={s} for {t}'.format(t=e[0], s=len(population[e[1]]), i=e[1]) for e in best_entries])) 242 | print('') 243 | 244 | return best 245 | 246 | def time_to_log(generation, resolution): 247 | return (generation % resolution) == 0 248 | 249 | # public API 250 | def simulate(progenitor, language, saint_peter, num_generations, world_size, log_resolution): 251 | 252 | # set global config 253 | global _language 254 | _language = language 255 | 256 | # create initial population 257 | population = [progenitor] 258 | 259 | # run simulation 260 | for g in range(num_generations): 261 | 262 | # log generation progress 263 | if time_to_log(g, log_resolution): 264 | print('generation {}'.format(g)) 265 | 266 | # sanity check: there should be organisms 267 | assert len(population) > 0 268 | 269 | # populate world 270 | population = reproduce(population, world_size) 271 | 272 | # measure performance of each organism 273 | scores = judge(population, saint_peter) 274 | 275 | # keep only the "best" organisms 276 | population = cull(population, scores) 277 | 278 | # return final population 279 | return population 280 | -------------------------------------------------------------------------------- /stringfuzz/generator.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from stringfuzz.constants import SMT_20, SMT_20_STRING, SMT_25_STRING 4 | from stringfuzz.scanner import scan, ALPHABET, WHITESPACE 5 | from stringfuzz.ast import * 6 | 7 | __all__ = [ 8 | 'generate', 9 | 'generate_file', 10 | 'NotSupported', 11 | ] 12 | 13 | # exceptions 14 | class NotSupported(ValueError): 15 | def __init__(self, e, language): 16 | message = 'can\'t generate {!r} in language {!r}'.format(e, language) 17 | super().__init__(message) 18 | 19 | # functions 20 | def needs_encoding(c): 21 | return c not in ALPHABET 22 | 23 | def encode_char(c, language): 24 | if c == '"': 25 | if language == SMT_25_STRING: 26 | return '""' 27 | else: 28 | return '\\"' 29 | elif c == '\\': 30 | return '\\\\' 31 | elif c in WHITESPACE: 32 | return repr(c) 33 | elif needs_encoding(c): 34 | return '\\x{:0>2x}'.format(ord(c)) 35 | return c 36 | 37 | def encode_string(s, language): 38 | encoded = ''.join(encode_char(c, language) for c in s) 39 | return '"' + encoded + '"' 40 | 41 | def generate_node(node, language): 42 | 43 | # generate each known node 44 | if isinstance(node, ExpressionNode): 45 | return generate_expr(node, language) 46 | 47 | if isinstance(node, SortedVarNode): 48 | return '({} {})'.format(generate_node(node.name, language), generate_node(node.sort, language)) 49 | 50 | if isinstance(node, LiteralNode): 51 | return generate_lit(node, language) 52 | 53 | if isinstance(node, IdentifierNode): 54 | return node.name 55 | 56 | if isinstance(node, AtomicSortNode): 57 | return node.name 58 | 59 | if isinstance(node, CompoundSortNode): 60 | return '({} {})'.format(generate_node(node.symbol, language), ' '.join(generate_node(s, language) for s in node.sorts)) 61 | 62 | if isinstance(node, BracketsNode): 63 | return '({})'.format(' '.join(generate_node(s, language) for s in node.body)) 64 | 65 | if isinstance(node, SettingNode): 66 | return '{}'.format(generate_node(node.name, language)) 67 | 68 | if isinstance(node, MetaDataNode): 69 | return node.value 70 | 71 | if isinstance(node, ReAllCharNode): 72 | if language == SMT_25_STRING: 73 | return 're.allchar' 74 | else: 75 | raise NotSupported(node, language) 76 | 77 | if isinstance(node, str): 78 | return node 79 | 80 | # error out on all others 81 | raise NotImplementedError('no generator for {}'.format(type(node))) 82 | 83 | def generate_lit(lit, language): 84 | if isinstance(lit, StringLitNode): 85 | return encode_string(lit.value, language) 86 | 87 | if isinstance(lit, BoolLitNode): 88 | return str(lit.value).lower() 89 | 90 | if isinstance(lit, IntLitNode): 91 | if (lit.value < 0): 92 | return '(- {})'.format(lit.value) 93 | return str(lit.value) 94 | 95 | raise NotImplementedError('unknown literal type {!r}'.format(lit)) 96 | 97 | def generate_expr(e, language): 98 | components = [] 99 | 100 | # special expressions 101 | if isinstance(e, ConcatNode): 102 | if language == SMT_20_STRING: 103 | components.append('Concat') 104 | elif language == SMT_25_STRING: 105 | components.append('str.++') 106 | else: 107 | raise NotSupported(e, language) 108 | 109 | elif isinstance(e, ContainsNode): 110 | if language == SMT_20_STRING: 111 | components.append('Contains') 112 | elif language == SMT_25_STRING: 113 | components.append('str.contains') 114 | else: 115 | raise NotSupported(e, language) 116 | 117 | elif isinstance(e, AtNode): 118 | if language == SMT_20_STRING: 119 | components.append('CharAt') 120 | elif language == SMT_25_STRING: 121 | components.append('str.at') 122 | else: 123 | raise NotSupported(e, language) 124 | 125 | elif isinstance(e, LengthNode): 126 | if language == SMT_20_STRING: 127 | components.append('Length') 128 | elif language == SMT_25_STRING: 129 | components.append('str.len') 130 | else: 131 | raise NotSupported(e, language) 132 | 133 | elif isinstance(e, IndexOfNode): 134 | if language == SMT_20_STRING: 135 | components.append('IndexOf') 136 | elif language == SMT_25_STRING: 137 | components.append('str.indexof') 138 | else: 139 | raise NotSupported(e, language) 140 | 141 | elif isinstance(e, IndexOf2Node): 142 | if language == SMT_20_STRING: 143 | components.append('IndexOf2') 144 | elif language == SMT_25_STRING: 145 | components.append('str.indexof') 146 | else: 147 | raise NotSupported(e, language) 148 | 149 | elif isinstance(e, PrefixOfNode): 150 | if language == SMT_20_STRING: 151 | components.append('StartsWith') 152 | elif language == SMT_25_STRING: 153 | components.append('str.prefixof') 154 | else: 155 | raise NotSupported(e, language) 156 | 157 | elif isinstance(e, SuffixOfNode): 158 | if language == SMT_20_STRING: 159 | components.append('EndsWith') 160 | elif language == SMT_25_STRING: 161 | components.append('str.suffixof') 162 | else: 163 | raise NotSupported(e, language) 164 | 165 | elif isinstance(e, StringReplaceNode): 166 | if language == SMT_20_STRING: 167 | components.append('Replace') 168 | elif language == SMT_25_STRING: 169 | components.append('str.replace') 170 | else: 171 | raise NotSupported(e, language) 172 | 173 | elif isinstance(e, SubstringNode): 174 | if language == SMT_20_STRING: 175 | components.append('Substring') 176 | elif language == SMT_25_STRING: 177 | components.append('str.substr') 178 | else: 179 | raise NotSupported(e, language) 180 | 181 | elif isinstance(e, FromIntNode): 182 | if language == SMT_25_STRING: 183 | components.append('str.from.int') 184 | else: 185 | raise NotSupported(e, language) 186 | 187 | elif isinstance(e, ToIntNode): 188 | if language == SMT_25_STRING: 189 | components.append('str.to.int') 190 | else: 191 | raise NotSupported(e, language) 192 | 193 | elif isinstance(e, StrToReNode): 194 | if language == SMT_20_STRING: 195 | components.append('Str2Reg') 196 | elif language == SMT_25_STRING: 197 | components.append('str.to.re') 198 | else: 199 | raise NotSupported(e, language) 200 | 201 | elif isinstance(e, InReNode): 202 | if language == SMT_20_STRING: 203 | components.append('RegexIn') 204 | elif language == SMT_25_STRING: 205 | components.append('str.in.re') 206 | else: 207 | raise NotSupported(e, language) 208 | 209 | elif isinstance(e, ReConcatNode): 210 | if language == SMT_20_STRING: 211 | components.append('RegexConcat') 212 | elif language == SMT_25_STRING: 213 | components.append('re.++') 214 | else: 215 | raise NotSupported(e, language) 216 | 217 | elif isinstance(e, ReStarNode): 218 | if language == SMT_20_STRING: 219 | components.append('RegexStar') 220 | elif language == SMT_25_STRING: 221 | components.append('re.*') 222 | else: 223 | raise NotSupported(e, language) 224 | 225 | elif isinstance(e, RePlusNode): 226 | if language == SMT_20_STRING: 227 | components.append('RegexPlus') 228 | elif language == SMT_25_STRING: 229 | components.append('re.+') 230 | else: 231 | raise NotSupported(e, language) 232 | 233 | elif isinstance(e, ReRangeNode): 234 | if language == SMT_20_STRING: 235 | components.append('RegexCharRange') 236 | elif language == SMT_25_STRING: 237 | components.append('re.range') 238 | else: 239 | raise NotSupported(e, language) 240 | 241 | elif isinstance(e, ReUnionNode): 242 | if language == SMT_20_STRING: 243 | components.append('RegexUnion') 244 | elif language == SMT_25_STRING: 245 | components.append('re.union') 246 | else: 247 | raise NotSupported(e, language) 248 | 249 | elif isinstance(e, ReInterNode): 250 | if language == SMT_25_STRING: 251 | components.append('re.inter') 252 | else: 253 | raise NotSupported(e, language) 254 | 255 | # all other expressions 256 | else: 257 | components.append(generate_node(e.symbol, language)) 258 | 259 | # generate args 260 | components.extend(generate_node(n, language) for n in e.body) 261 | 262 | return '({})'.format(' '.join(components)) 263 | 264 | # public API 265 | def generate_file(ast, language, path): 266 | with open(path, 'w+') as file: 267 | file.write(generate(ast, language)) 268 | 269 | def generate(ast, language): 270 | return '\n'.join(generate_node(e, language) for e in ast) 271 | -------------------------------------------------------------------------------- /stringfuzz/generators/__init__.py: -------------------------------------------------------------------------------- 1 | from stringfuzz.generators.concats import * 2 | from stringfuzz.generators.lengths import * 3 | from stringfuzz.generators.overlaps import * 4 | from stringfuzz.generators.random_ast import * 5 | from stringfuzz.generators.random_text import * 6 | from stringfuzz.generators.regex import * 7 | from stringfuzz.generators.equality import * 8 | -------------------------------------------------------------------------------- /stringfuzz/generators/concats.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from stringfuzz.scanner import ALPHABET 4 | from stringfuzz.smt import * 5 | 6 | __all__ = [ 7 | 'concats', 8 | 'SYNTACTIC_DEPTH', 9 | 'SEMANTIC_DEPTH', 10 | ] 11 | 12 | # constants 13 | SYNTACTIC_DEPTH = 'syntactic' 14 | SEMANTIC_DEPTH = 'semantic' 15 | 16 | # functions 17 | def set_equal(a, b): 18 | return smt_assert(smt_equal(a, b)) 19 | 20 | def set_concat(result, a, b): 21 | return set_equal(result, smt_concat(a, b)) 22 | 23 | def extract(character, string, index): 24 | return set_equal(character, smt_at(string, index)) 25 | 26 | def make_semantic_concats(depth, balanced): 27 | 28 | if balanced is True: 29 | raise ValueError('balanced trees with semantic concats are unsupported') 30 | 31 | # compute number of variables 32 | num_vars = (depth * 2) + 1 33 | 34 | # make variable names 35 | variables = [smt_var(i) for i in range(num_vars)] 36 | 37 | # make concats 38 | expressions = [] 39 | for i in range(0, len(variables) - 2, 2): 40 | expression = set_concat(variables[i], variables[i + 1], variables[i + 2]) 41 | expressions.append(expression) 42 | 43 | return variables, [], expressions 44 | 45 | def make_syntactic_concats(depth, balanced): 46 | 47 | def concats_helper(depth, balanced): 48 | 49 | # base case 50 | if depth < 1: 51 | new_var = smt_new_var() 52 | return [new_var], new_var 53 | 54 | # make right side 55 | right_vars, right_expr = concats_helper(depth - 1, balanced) 56 | 57 | # make left side 58 | if balanced is True: 59 | left_vars, left_expr = concats_helper(depth - 1, balanced) 60 | else: 61 | left_vars, left_expr = concats_helper(0, balanced) 62 | 63 | # build return value 64 | all_vars = left_vars + right_vars 65 | concat = smt_concat(left_expr, right_expr) 66 | 67 | return all_vars, concat 68 | 69 | # make first variable 70 | first_var = smt_new_var() 71 | 72 | # create return values 73 | variables = [first_var] 74 | constants = [] 75 | expressions = [] 76 | 77 | # make deep concat 78 | if depth > 0: 79 | concat_variables, concat_expr = concats_helper(depth, balanced) 80 | 81 | variables += concat_variables 82 | expressions = [set_equal(first_var, concat_expr)] 83 | 84 | return variables, constants, expressions 85 | 86 | def make_concats(depth, depth_type, solution, balanced, num_extracts, max_extract_index): 87 | 88 | # generate concats 89 | if depth_type == SEMANTIC_DEPTH: 90 | variables, constants, expressions = make_semantic_concats(depth, balanced) 91 | 92 | else: 93 | variables, constants, expressions = make_syntactic_concats(depth, balanced) 94 | 95 | # get first variable 96 | first_var = variables[0] 97 | 98 | # validate args 99 | max_num_extracts = max_extract_index + 1 100 | num_chars_in_vars = max_num_extracts * len(variables) 101 | num_chars_in_consts = sum(map(len, constants)) 102 | num_possible_extracts = num_chars_in_vars + num_chars_in_consts 103 | if num_extracts > num_possible_extracts: 104 | raise ValueError('number of requested extracts exceeds number of possible unique extracts') 105 | 106 | # set first variable to expected solution if one was given 107 | if solution is not None: 108 | expressions.append(set_equal(first_var, smt_str_lit(solution))) 109 | 110 | # add extracts if required 111 | if num_extracts > 0: 112 | 113 | # create model to avoid contradictions 114 | extract_model = {var : list(range(max_num_extracts)) for var in variables} 115 | remaining_vars = list(variables) 116 | 117 | # shuffle indices in model 118 | for indices in extract_model.values(): 119 | random.shuffle(indices) 120 | 121 | # create the extracts 122 | for i in range(num_extracts): 123 | 124 | # randomly pick a variable and a char to extract from it 125 | var_index = random.randrange(len(remaining_vars)) 126 | var = remaining_vars[var_index] 127 | char = smt_str_lit(random.choice(ALPHABET)) 128 | 129 | # pop the first index from which to extract, without replacement 130 | index = smt_int_lit(extract_model[var].pop()) 131 | 132 | # remove the variable if it can no longer be extracted from 133 | num_remaining_indices = len(extract_model[var]) 134 | if num_remaining_indices < 1: 135 | remaining_vars.pop(var_index) 136 | 137 | # add extract 138 | expressions.append(extract(char, var, index)) 139 | 140 | # create definitions 141 | definitions = [] 142 | definitions.extend([smt_declare_var(v) for v in variables]) 143 | definitions.extend([smt_declare_const(v) for v in constants]) 144 | 145 | # add sat-check 146 | expressions.append(smt_check_sat()) 147 | 148 | return definitions + expressions 149 | 150 | # public API 151 | def concats(*args, **kwargs): 152 | smt_reset_counters() 153 | return make_concats(*args, **kwargs) 154 | -------------------------------------------------------------------------------- /stringfuzz/generators/equality.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from stringfuzz.scanner import ALPHABET 4 | from stringfuzz.smt import * 5 | from stringfuzz.util import join_terms_with, random_string 6 | 7 | __all__ = [ 8 | 'equality', 9 | ] 10 | 11 | def get_length(max_length, randomise): 12 | if randomise is False: 13 | return max_length 14 | return random.randint(0, max_length) 15 | 16 | def randomly_add_infix(probability): 17 | return random.random() < probability 18 | 19 | def make_equality(num_expressions, num_terms, prefix_length, suffix_length, add_infixes, infix_length, randomise_lengths, infix_probability): 20 | 21 | # check args 22 | if num_expressions < 1: 23 | raise ValueError('the number of expressions must be at least 1') 24 | 25 | if num_terms < 2: 26 | raise ValueError('the number of terms per expression must be at least 2') 27 | 28 | if infix_probability < 0.0 or 1.0 < infix_probability: 29 | raise ValueError('the probability of infixes must be between 0.0 and 1.0') 30 | 31 | # result values 32 | expressions = [] 33 | variables = [] 34 | 35 | # create root variable 36 | root = smt_new_var() 37 | variables.append(root) 38 | 39 | # create expressions 40 | for i in range(num_expressions): 41 | 42 | # prefix and suffix 43 | prefix = smt_str_lit(random_string(get_length(prefix_length, randomise_lengths))) 44 | suffix = smt_str_lit(random_string(get_length(suffix_length, randomise_lengths))) 45 | 46 | # keep track of new variables 47 | new_variables = [] 48 | 49 | # create middle 50 | middle = [] 51 | for i in range(num_terms - 2): 52 | 53 | # if infixes are enabled, add them with the given probability 54 | if add_infixes is True and randomly_add_infix(infix_probability) is True: 55 | new_term = smt_str_lit(random_string(get_length(infix_length, randomise_lengths))) 56 | 57 | # otherwise, just add variables 58 | else: 59 | new_term = smt_new_var() 60 | new_variables.append(new_term) 61 | 62 | middle.append(new_term) 63 | 64 | # compose full expression 65 | terms = [prefix] + middle + [suffix] 66 | concat = join_terms_with(terms, smt_concat) 67 | equality = smt_assert(smt_equal(root, concat)) 68 | 69 | # remember variables and expressions 70 | variables += new_variables 71 | expressions.append(equality) 72 | 73 | # add check sat 74 | expressions.append(smt_check_sat()) 75 | 76 | # create variable declarations 77 | declarations = [] 78 | for v in variables: 79 | declarations.append(smt_declare_var(v)) 80 | 81 | return declarations + expressions 82 | 83 | # public API 84 | def equality(*args, **kwargs): 85 | smt_reset_counters() 86 | return make_equality(*args, **kwargs) 87 | -------------------------------------------------------------------------------- /stringfuzz/generators/lengths.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from collections import namedtuple 4 | 5 | from stringfuzz.smt import * 6 | 7 | __all__ = [ 8 | 'lengths', 9 | ] 10 | 11 | # data structures 12 | Variable = namedtuple('Variable', ['length']) 13 | 14 | # functions 15 | def new_model(min_length, max_length): 16 | length = random.randint(min_length, max_length) 17 | return Variable(length) 18 | 19 | def set_equal(a, b): 20 | return smt_assert(smt_equal(a, b)) 21 | 22 | def make_lengths(num_vars, min_length, max_length, num_concats, random_relations): 23 | 24 | # make list of possible relations to use in constraints 25 | if random_relations is True: 26 | def choose_relation(): 27 | return random.choice([smt_equal, smt_gt, smt_lt]) 28 | else: 29 | def choose_relation(): 30 | return smt_equal 31 | 32 | # create variables 33 | variables = [smt_new_var() for i in range(num_vars)] 34 | 35 | # create model 36 | model = {v : new_model(min_length, max_length) for v in variables} 37 | 38 | # create length constraints 39 | expressions = [] 40 | for v in variables: 41 | 42 | # pick a relation 43 | chosen_relation = choose_relation() 44 | 45 | # build constraint 46 | model_length = smt_int_lit(model[v].length) 47 | actual_length = smt_len(v) 48 | constraint = smt_assert(chosen_relation(model_length, actual_length)) 49 | 50 | # add constraint 51 | expressions.append(constraint) 52 | 53 | # validate args 54 | max_num_concats = num_vars // 2 55 | if num_concats > max_num_concats: 56 | raise ValueError('can\'t add more concats than the number of variables divided by 2 (that is, {})'.format(max_num_concats)) 57 | 58 | # if concats are required, add them 59 | if num_concats > 0: 60 | 61 | # copy and shuffle variable list to use in concats 62 | unused_variables = list(variables) 63 | random.shuffle(unused_variables) 64 | 65 | # generate the concats 66 | for i in range(num_concats): 67 | 68 | # pick operands 69 | a = unused_variables.pop() 70 | b = unused_variables.pop() 71 | concat = smt_concat(a, b) 72 | sum_length = model[a].length + model[b].length 73 | 74 | # pick a relation 75 | chosen_relation = choose_relation() 76 | 77 | # build constraint 78 | sum_length_lit = smt_int_lit(sum_length) 79 | actual_length = smt_len(concat) 80 | constraint = smt_assert(chosen_relation(sum_length_lit, actual_length)) 81 | 82 | # add constraint 83 | expressions.append(constraint) 84 | 85 | # add sat-check 86 | expressions.append(smt_check_sat()) 87 | 88 | # create declarations 89 | declarations = [smt_declare_var(v) for v in variables] 90 | 91 | return declarations + expressions 92 | 93 | # public API 94 | def lengths(*args, **kwargs): 95 | smt_reset_counters() 96 | return make_lengths(*args, **kwargs) 97 | -------------------------------------------------------------------------------- /stringfuzz/generators/overlaps.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from stringfuzz.scanner import ALPHABET 4 | from stringfuzz.smt import * 5 | from stringfuzz.util import join_terms_with, random_string 6 | 7 | __all__ = [ 8 | 'overlaps', 9 | ] 10 | 11 | def make_overlaps(num_vars, length_of_consts): 12 | 13 | # check args 14 | if num_vars < 1: 15 | raise ValueError('the number of variables must be at least 1') 16 | 17 | # create constants 18 | left = smt_str_lit(random_string(length_of_consts)) 19 | right = smt_str_lit(random_string(length_of_consts)) 20 | 21 | # create middle variables 22 | middle_vars = [smt_new_var() for i in range(num_vars)] 23 | middle = join_terms_with(middle_vars, smt_concat) 24 | 25 | # create overlapping constraint 26 | left_concat = smt_concat(left, middle) 27 | right_concat = smt_concat(middle, right) 28 | concat_equality = smt_assert(smt_equal(left_concat, right_concat)) 29 | 30 | # add constraint and sat-check 31 | expressions = [ 32 | concat_equality, 33 | smt_check_sat() 34 | ] 35 | 36 | # create variable declarations 37 | declarations = [] 38 | for v in middle_vars: 39 | declarations.append(smt_declare_var(v)) 40 | 41 | return declarations + expressions 42 | 43 | # public API 44 | def overlaps(*args, **kwargs): 45 | smt_reset_counters() 46 | return make_overlaps(*args, **kwargs) 47 | -------------------------------------------------------------------------------- /stringfuzz/generators/random_ast.py: -------------------------------------------------------------------------------- 1 | import random 2 | import inspect 3 | 4 | from stringfuzz.ast import * 5 | from stringfuzz.smt import smt_new_var, smt_reset_counters, smt_declare_var 6 | from stringfuzz.util import random_string, coin_toss 7 | 8 | __all__ = [ 9 | 'random_ast' 10 | ] 11 | 12 | # constants 13 | # nodes that have no inputs 14 | TERMINALS = [ 15 | ReAllCharNode, 16 | ] 17 | 18 | # nodes that can take expressions 19 | NONTERMINALS = [ 20 | NotNode, 21 | GtNode, 22 | LtNode, 23 | GteNode, 24 | LteNode, 25 | ContainsNode, 26 | AtNode, 27 | LengthNode, 28 | # IndexOfNode, 29 | IndexOf2Node, 30 | PrefixOfNode, 31 | SuffixOfNode, 32 | StringReplaceNode, 33 | SubstringNode, 34 | InReNode, 35 | ReStarNode, 36 | RePlusNode, 37 | # FromIntNode, 38 | # ToIntNode, 39 | ] 40 | 41 | # nodes that can take only terminals 42 | ALMOST_TERMINALS = [ 43 | StrToReNode, 44 | ReRangeNode, 45 | ] 46 | 47 | N_ARY_NONTERMINALS = [ 48 | ConcatNode, 49 | ReConcatNode, 50 | AndNode, 51 | OrNode, 52 | EqualNode, 53 | ReUnionNode, 54 | ReInterNode, 55 | ] 56 | 57 | EXPRESSION_SORTS = DECLARABLE_SORTS + [REGEX_SORT] 58 | 59 | # global config 60 | _max_terms = 0 61 | _max_str_lit_length = 0 62 | _max_int_lit = 0 63 | _literal_probability = 0.0 64 | _semantically_valid = False 65 | 66 | # helpers 67 | def get_all_returning_a(sort, nodes): 68 | return list(filter(lambda node: node.returns(sort), nodes)) 69 | 70 | def get_terminals(nodes): 71 | return filter(lambda node: node.is_terminal(), nodes) 72 | 73 | def make_random_literal(sort): 74 | if sort == STRING_SORT: 75 | return StringLitNode(random_string(_max_str_lit_length)) 76 | 77 | if sort == INT_SORT: 78 | return IntLitNode(random.randint(0, _max_int_lit)) 79 | 80 | if sort == BOOL_SORT: 81 | return BoolLitNode(coin_toss()) 82 | 83 | raise ValueError('unknown sort {}'.format(sort)) 84 | 85 | def should_choose_literal(): 86 | global _literal_probability 87 | return random.random() < _literal_probability 88 | 89 | def make_random_terminal(variables, sort): 90 | 91 | if sort == REGEX_SORT: 92 | return ReAllCharNode() 93 | 94 | # randomly choose between a variable or a literal 95 | if should_choose_literal(): 96 | return make_random_literal(sort) 97 | 98 | return random.choice(variables[sort]) 99 | 100 | def make_random_expression(variables, sort, depth): 101 | global _semantically_valid 102 | 103 | # if semantics are going to hell, then randomly reinvent the sort 104 | if _semantically_valid is False: 105 | sort = random.choice(EXPRESSION_SORTS) 106 | 107 | # at depth 0, make a terminal 108 | if depth < 1: 109 | return make_random_terminal(variables, sort) 110 | 111 | # randomly shrink the depth 112 | shrunken_depth = random.randint(0, depth - 1) 113 | 114 | # get random expression generator 115 | candidate_nodes = get_all_returning_a(sort, NONTERMINALS) 116 | expression_node = random.choice(candidate_nodes) 117 | signature = expression_node.get_signature() 118 | num_args = len(signature) 119 | 120 | # if the expression takes any sort, pick one 121 | if expression_node.accepts(ANY_SORT): 122 | collapsed_sort = random.choice(EXPRESSION_SORTS) 123 | signature = [collapsed_sort for i in range(num_args)] 124 | 125 | # generate random arguments 126 | random_args = [make_random_expression(variables, arg_sort, shrunken_depth) for arg_sort in signature] 127 | 128 | # build expression 129 | expression = expression_node(*random_args) 130 | 131 | return expression 132 | 133 | def generate_assert(variables, depth): 134 | expression = make_random_expression(variables, BOOL_SORT, depth) 135 | return AssertNode(expression) 136 | 137 | def make_random_ast(num_vars, num_asserts, depth, max_terms, max_str_lit_length, max_int_lit, literal_probability, semantically_valid): 138 | global _max_terms 139 | global _max_str_lit_length 140 | global _max_int_lit 141 | global _literal_probability 142 | global _semantically_valid 143 | 144 | # set global config 145 | _max_terms = max_terms 146 | _max_str_lit_length = max_str_lit_length 147 | _max_int_lit = max_int_lit 148 | _literal_probability = literal_probability 149 | _semantically_valid = semantically_valid 150 | 151 | # create variables 152 | variables = {s: [smt_new_var() for i in range(num_vars)] for s in DECLARABLE_SORTS} 153 | 154 | # create declarations 155 | declarations = [] 156 | for s in DECLARABLE_SORTS: 157 | new_declarations = [smt_declare_var(v, sort=s) for v in variables[s]] 158 | declarations.extend(new_declarations) 159 | 160 | # create asserts 161 | asserts = [generate_assert(variables, depth) for i in range(num_asserts)] 162 | 163 | # add check-sat 164 | expressions = asserts + [CheckSatNode()] 165 | 166 | return declarations + expressions 167 | 168 | # public API 169 | def random_ast(*args, **kwargs): 170 | smt_reset_counters() 171 | return make_random_ast(*args, **kwargs) 172 | -------------------------------------------------------------------------------- /stringfuzz/generators/random_text.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from stringfuzz.scanner import ALPHABET, WHITESPACE 4 | 5 | __all__ = [ 6 | 'random_text', 7 | ] 8 | 9 | # constants 10 | ALL_CHARS = ALPHABET + WHITESPACE 11 | 12 | # functions 13 | def make_random_text(length): 14 | return ''.join(random.choice(ALL_CHARS) for i in range(length)) 15 | 16 | # public API 17 | def random_text(*args, **kwargs): 18 | return make_random_text(*args, **kwargs) 19 | -------------------------------------------------------------------------------- /stringfuzz/generators/regex.py: -------------------------------------------------------------------------------- 1 | import random 2 | import re 3 | 4 | from stringfuzz.scanner import ALPHABET 5 | from stringfuzz.smt import * 6 | from stringfuzz.util import join_terms_with, random_string, coin_toss 7 | 8 | __all__ = [ 9 | 'regex', 10 | 'INCREASING_LITERALS', 11 | 'RANDOM_LITERALS', 12 | 'MEMBER_IN', 13 | 'MEMBER_NOT_IN', 14 | 'MEMBER_ALTERNATING', 15 | 'MEMBER_RANDOM', 16 | 'OPERATOR_STAR', 17 | 'OPERATOR_PLUS', 18 | 'OPERATOR_UNION', 19 | 'OPERATOR_INTER', 20 | 'OPERATOR_CONCAT', 21 | 'OPERATOR_ALTERNATING', 22 | 'OPERATOR_RANDOM', 23 | ] 24 | 25 | # constants 26 | INCREASING_LITERALS = 'increasing' 27 | RANDOM_LITERALS = 'random' 28 | 29 | LITERAL_TYPES = [ 30 | INCREASING_LITERALS, 31 | RANDOM_LITERALS, 32 | ] 33 | 34 | MEMBER_IN = 'in' 35 | MEMBER_NOT_IN = 'not-in' 36 | MEMBER_ALTERNATING = 'alternating' 37 | MEMBER_RANDOM = 'random' 38 | 39 | MEMBERSHIP_TYPES = [ 40 | MEMBER_IN, 41 | MEMBER_NOT_IN, 42 | MEMBER_ALTERNATING, 43 | MEMBER_RANDOM, 44 | ] 45 | 46 | OPERATOR_STAR = 's' 47 | OPERATOR_PLUS = 'p' 48 | OPERATOR_UNION = 'u' 49 | OPERATOR_INTER = 'i' 50 | OPERATOR_CONCAT = 'c' 51 | 52 | OPERATOR_LIST = [ 53 | OPERATOR_STAR, 54 | OPERATOR_PLUS, 55 | OPERATOR_UNION, 56 | OPERATOR_INTER, 57 | OPERATOR_CONCAT, 58 | ] 59 | 60 | OPERATOR_ALTERNATING = 'alternating' 61 | OPERATOR_RANDOM = 'random' 62 | 63 | OPERATOR_TYPES = [ 64 | OPERATOR_ALTERNATING, 65 | OPERATOR_RANDOM, 66 | ] 67 | 68 | # global config 69 | # NOTE: 70 | # using globals because it's annoying to pass around a bunch of variables 71 | _cursor = 0 72 | _literal_type = None 73 | _literal_min = 1 74 | _literal_max = 1 75 | 76 | # helpers 77 | def fill_string(character, length): 78 | return character * length 79 | 80 | def get_char_and_advance(): 81 | global _cursor 82 | character = ALPHABET[_cursor] 83 | _cursor = (_cursor + 1) % len(ALPHABET) 84 | return character 85 | 86 | def make_regex_string(min_length, max_length): 87 | global _literal_type 88 | 89 | chosen_length = random.randint(min_length, max_length) 90 | 91 | # use a fixed-length string of one character, each time using 92 | # the next character from the alphabet 93 | if _literal_type == INCREASING_LITERALS: 94 | filler = get_char_and_advance() 95 | string = fill_string(filler, chosen_length) 96 | 97 | # generate a random string 98 | elif _literal_type == RANDOM_LITERALS: 99 | string = random_string(chosen_length) 100 | 101 | return smt_str_to_re(smt_str_lit(string)) 102 | 103 | def make_random_term(depth, operator_index): 104 | if depth == 0: 105 | return make_regex_string(_literal_min, _literal_max) 106 | 107 | if _operator_type == OPERATOR_ALTERNATING: 108 | next_operator_index = operator_index + 1 109 | else: 110 | next_operator_index = random.randrange(len(_operator_list)) 111 | 112 | operator = get_operator_at_index(operator_index) 113 | subterm = make_random_term(depth - 1, next_operator_index) 114 | 115 | if operator == OPERATOR_STAR: 116 | return smt_regex_star(subterm) 117 | 118 | if operator == OPERATOR_PLUS: 119 | return smt_regex_plus(subterm) 120 | 121 | if operator == OPERATOR_UNION: 122 | second_subterm = make_random_term(depth - 1, next_operator_index) 123 | return smt_regex_union(subterm, second_subterm) 124 | 125 | if operator == OPERATOR_INTER: 126 | second_subterm = make_random_term(depth - 1, next_operator_index) 127 | return smt_regex_inter(subterm, second_subterm) 128 | 129 | if operator == OPERATOR_CONCAT: 130 | second_subterm = make_random_term(depth - 1, next_operator_index) 131 | return smt_regex_concat(subterm, second_subterm) 132 | 133 | def make_random_terms(num_terms, depth): 134 | if _operator_type == OPERATOR_ALTERNATING: 135 | terms = [make_random_term(depth, 0) for i in range(num_terms)] 136 | else: 137 | terms = [make_random_term(depth, random.randrange(len(_operator_list))) for i in range(num_terms)] 138 | 139 | regex = join_terms_with(terms, smt_regex_concat) 140 | return regex 141 | 142 | def toggle_membership_type(t): 143 | if t == MEMBER_IN: 144 | return MEMBER_NOT_IN 145 | return MEMBER_IN 146 | 147 | def get_operator_at_index(index): 148 | global _operator_list 149 | 150 | return _operator_list[index % len(_operator_list)] 151 | 152 | def make_constraint(variable, r): 153 | global _configured_membership 154 | global _current_membership 155 | 156 | # if random, set the membership type randomly 157 | if _configured_membership == MEMBER_RANDOM: 158 | if coin_toss(): 159 | _current_membership = MEMBER_IN 160 | else: 161 | _current_membership = MEMBER_NOT_IN 162 | 163 | # if toggle, toggle membership type 164 | elif _configured_membership == MEMBER_ALTERNATING: 165 | _current_membership = toggle_membership_type(_current_membership) 166 | 167 | # create constraint 168 | constraint = smt_regex_in(variable, r) 169 | 170 | # negate it if required 171 | if _current_membership == MEMBER_NOT_IN: 172 | constraint = smt_not(constraint) 173 | 174 | return constraint 175 | 176 | def make_regex( 177 | num_regexes, 178 | num_terms, 179 | literal_min, 180 | literal_max, 181 | term_depth, 182 | literal_type, 183 | membership_type, 184 | reset_alphabet, 185 | max_var_length, 186 | min_var_length, 187 | operators, 188 | operator_type, 189 | ): 190 | 191 | # check args 192 | if num_regexes < 1: 193 | raise ValueError('number of regexes must be greater than 0') 194 | 195 | if num_terms < 1: 196 | raise ValueError('number of terms must be greater than 0') 197 | 198 | if literal_min < 1: 199 | raise ValueError('min literal length must be greater than 0') 200 | 201 | if literal_max < 1: 202 | raise ValueError('max literal length must be greater than 0') 203 | 204 | if literal_max < literal_min: 205 | raise ValueError('max literal length must not be less than min literal length') 206 | 207 | if term_depth < 0: 208 | raise ValueError('depths of terms must not be less than 0') 209 | 210 | if literal_type not in LITERAL_TYPES: 211 | raise ValueError('unknown literal type: {!r}'.format(literal_type)) 212 | 213 | if membership_type not in MEMBERSHIP_TYPES: 214 | raise ValueError('unknown membership type: {!r}'.format(membership_type)) 215 | 216 | if min_var_length is not None and min_var_length < 0: 217 | raise ValueError('min variable length must not be less than 0') 218 | 219 | if max_var_length is not None and max_var_length < 0: 220 | raise ValueError('max variable length must not be less than 0') 221 | 222 | if len(operators) < 1 or any(map(lambda x: x not in OPERATOR_LIST, operators)): 223 | raise ValueError('invalid operators: {!r}'.format(operators)) 224 | 225 | if operator_type not in OPERATOR_TYPES: 226 | raise ValueError('unknown operator type: {!r}'.format(operator_type)) 227 | 228 | # set globals 229 | global _cursor 230 | global _literal_type 231 | global _configured_membership 232 | global _current_membership 233 | global _literal_min 234 | global _literal_max 235 | global _operator_list 236 | global _operator_type 237 | 238 | _cursor = 0 239 | _literal_type = literal_type 240 | _configured_membership = membership_type 241 | _current_membership = _configured_membership 242 | _literal_min = literal_min 243 | _literal_max = literal_max 244 | _operator_list = [] 245 | _operator_type = operator_type 246 | 247 | # parse operator list in order, in case user wants a custom alternation order 248 | for c in operators: 249 | if c not in _operator_list: 250 | _operator_list.append(c) 251 | 252 | # create variable 253 | matched = smt_new_var() 254 | 255 | # create regexes 256 | regexes = [] 257 | for i in range(num_regexes): 258 | 259 | # reset alphabet for every regex if required 260 | if reset_alphabet is True: 261 | _cursor = 0 262 | 263 | new_regex = make_random_terms(num_terms, term_depth) 264 | regexes.append(new_regex) 265 | 266 | # create regex constraints 267 | expressions = [] 268 | for r in regexes: 269 | constraint = make_constraint(matched, r) 270 | expressions.append(smt_assert(constraint)) 271 | 272 | # create length constraints if required 273 | if min_var_length is not None: 274 | min_bound = smt_int_lit(min_var_length) 275 | equality = smt_lte(min_bound, smt_len(matched)) 276 | expressions.append(smt_assert(equality)) 277 | 278 | if max_var_length is not None: 279 | max_bound = smt_int_lit(max_var_length) 280 | equality = smt_lte(smt_len(matched), max_bound) 281 | expressions.append(smt_assert(equality)) 282 | 283 | # add sat check 284 | expressions.append(smt_check_sat()) 285 | 286 | # create declarations 287 | declarations = [ 288 | smt_declare_var(matched) 289 | ] 290 | 291 | return declarations + expressions 292 | 293 | # public API 294 | def regex(*args, **kwargs): 295 | smt_reset_counters() 296 | return make_regex(*args, **kwargs) 297 | -------------------------------------------------------------------------------- /stringfuzz/mergers/__init__.py: -------------------------------------------------------------------------------- 1 | from stringfuzz.mergers.simple import * 2 | -------------------------------------------------------------------------------- /stringfuzz/mergers/simple.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from stringfuzz.ast import ExpressionNode, SortNode, IdentifierNode, FunctionDeclarationNode, SortedVarNode 4 | from stringfuzz.ast_walker import ASTWalker 5 | 6 | __all__ = [ 7 | 'simple' 8 | ] 9 | 10 | def alternate_merge(asts, merged): 11 | for ast in asts: 12 | if ast: 13 | node = ast.pop(0) 14 | if not node in merged: 15 | merged.append(node) 16 | if any(asts): 17 | merged = alternate_merge(asts, merged) 18 | return merged 19 | 20 | class RenameIDWalker(ASTWalker): 21 | def __init__(self, ast, suffix): 22 | super(RenameIDWalker, self).__init__(ast) 23 | self.suffix = suffix 24 | 25 | def exit_identifier(self, identifier, parent): 26 | identifier.name += "_{}".format(self.suffix) 27 | 28 | def simple(asts, rename_ids): 29 | if rename_ids: 30 | for i in range(len(asts)): 31 | asts[i] = RenameIDWalker(asts[i], i).walk() 32 | merged = alternate_merge(asts, []) 33 | return merged 34 | -------------------------------------------------------------------------------- /stringfuzz/parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from stringfuzz.scanner import scan 4 | from stringfuzz.ast import * 5 | from stringfuzz.util import join_terms_with 6 | 7 | __all__ = [ 8 | 'parse', 9 | 'parse_file', 10 | 'parse_tokens', 11 | 'ParsingError', 12 | ] 13 | 14 | # constants 15 | MAX_ERROR_SIZE = 200 16 | UNDERLINE = '-' 17 | 18 | MESSAGE_FORMAT = '''Parsing error on line {number}: 19 | 20 | {context}{actual_value} 21 | {underline}^ 22 | {filler}expected {expected}, got {actual_type} {actual_value!r}''' 23 | 24 | # data structures 25 | class Stream(object): 26 | 27 | def __init__(self, tokens, text): 28 | self.text = text 29 | self.current_token = None 30 | self.stream = (t for t in tokens) 31 | 32 | def advance(self): 33 | self.current_token = next(self.stream, None) 34 | 35 | def accept(self, name): 36 | if self.current_token is not None and self.current_token.name == name: 37 | self.advance() 38 | return True 39 | return False 40 | 41 | def peek(self): 42 | return self.current_token 43 | 44 | def expect(self, expected): 45 | previous = self.current_token 46 | if self.accept(expected): 47 | return previous 48 | raise ParsingError(expected, self) 49 | 50 | class ParsingError(IndexError): 51 | def __init__(self, expected, stream): 52 | 53 | # compute actual value 54 | actual_token = stream.current_token 55 | if actual_token is not None: 56 | actual_type = actual_token.name 57 | actual_value = actual_token.value 58 | error_index = actual_token.position 59 | else: 60 | actual_type = 'nothing' 61 | actual_value = '' 62 | error_index = len(stream.text) - 1 63 | 64 | # get error context 65 | parsed_text = stream.text[0:error_index] 66 | context = parsed_text[-MAX_ERROR_SIZE:] 67 | 68 | if len(context) < len(parsed_text): 69 | context = '... ' + context 70 | 71 | # find row and column of error 72 | try: 73 | latest_newline_index = parsed_text.rindex('\n') 74 | except ValueError as e: 75 | latest_newline_index = 0 76 | 77 | error_row = parsed_text.count('\n') + 1 78 | error_column = error_index - latest_newline_index - 1 79 | 80 | # compose message 81 | message = MESSAGE_FORMAT.format( 82 | number = error_row, 83 | context = context, 84 | underline = (UNDERLINE * error_column), 85 | filler = (' ' * error_column), 86 | expected = expected, 87 | actual_type = actual_type, 88 | actual_value = actual_value, 89 | ) 90 | 91 | # pass message to superclass 92 | super().__init__(message) 93 | 94 | # parsers 95 | def accept_arg(s): 96 | token = s.peek() 97 | 98 | # nested expression 99 | if s.accept('LPAREN'): 100 | expression = expect_expression(s) 101 | s.expect('RPAREN') 102 | return expression 103 | 104 | # literal 105 | if s.accept('BOOL_LIT'): 106 | if token.value == 'true': 107 | return BoolLitNode(True) 108 | elif token.value == 'false': 109 | return BoolLitNode(False) 110 | 111 | if s.accept('INT_LIT'): 112 | return IntLitNode(int(token.value)) 113 | 114 | if s.accept('STRING_LIT'): 115 | return StringLitNode(token.value) 116 | 117 | # others 118 | if s.accept('RE_ALLCHAR'): 119 | return ReAllCharNode() 120 | 121 | if s.accept('IDENTIFIER'): 122 | return IdentifierNode(token.value) 123 | 124 | if s.accept('SETTING'): 125 | return SettingNode(token.value) 126 | 127 | return None 128 | 129 | def accept_meta_arg(s): 130 | arg = s.peek() 131 | 132 | if ( 133 | s.accept('BOOL_LIT') or 134 | s.accept('INT_LIT') or 135 | s.accept('STRING_LIT') or 136 | s.accept('IDENTIFIER') 137 | ): 138 | return MetaDataNode(arg.value) 139 | 140 | if s.accept('SETTING'): 141 | return SettingNode(arg.value) 142 | 143 | return None 144 | 145 | def expect_identifier(s): 146 | token = s.expect('IDENTIFIER') 147 | return IdentifierNode(token.value) 148 | 149 | def expect_arg(s): 150 | result = accept_arg(s) 151 | 152 | if result is None: 153 | raise ParsingError('an argument', s) 154 | 155 | return result 156 | 157 | def expect_sort(s): 158 | result = accept_sort(s) 159 | 160 | if result is None: 161 | raise ParsingError('a sort', s) 162 | 163 | return result 164 | 165 | def repeat_star(s, getter): 166 | terms = [] 167 | 168 | while True: 169 | term = getter(s) 170 | 171 | # break on no term 172 | if term is None: 173 | break 174 | 175 | terms.append(term) 176 | 177 | return terms 178 | 179 | def accept_sort(s): 180 | 181 | # compound sort 182 | if s.accept('LPAREN'): 183 | symbol = expect_identifier(s) 184 | sorts = [expect_sort(s)] 185 | sorts += repeat_star(s, accept_sort) 186 | s.expect('RPAREN') 187 | return CompoundSortNode(symbol, sorts) 188 | 189 | # atomic sort 190 | token = s.peek() 191 | if s.accept('IDENTIFIER'): 192 | return AtomicSortNode(token.value) 193 | 194 | return None 195 | 196 | def accept_sorted_var(s): 197 | if s.accept('LPAREN'): 198 | name = expect_identifier(s) 199 | sort = expect_sort(s) 200 | s.expect('RPAREN') 201 | return SortedVarNode(name, sort) 202 | 203 | return None 204 | 205 | def expect_expression(s): 206 | 207 | if s.accept('ASSERT'): 208 | assertion = expect_arg(s) 209 | return AssertNode(assertion) 210 | 211 | # declarations and definitions 212 | if s.accept('DECLARE_FUN'): 213 | name = expect_identifier(s) 214 | 215 | s.expect('LPAREN') 216 | signature = repeat_star(s, accept_sort) 217 | s.expect('RPAREN') 218 | 219 | return_sort = expect_sort(s) 220 | 221 | return FunctionDeclarationNode(name, BracketsNode(signature), return_sort) 222 | 223 | if s.accept('DEFINE_FUN'): 224 | name = expect_identifier(s) 225 | 226 | s.expect('LPAREN') 227 | signature = repeat_star(s, accept_sorted_var) 228 | s.expect('RPAREN') 229 | 230 | return_sort = expect_sort(s) 231 | 232 | s.expect('LPAREN') 233 | body = expect_expression(s) 234 | s.expect('RPAREN') 235 | 236 | return FunctionDefinitionNode(name, BracketsNode(signature), return_sort, body) 237 | 238 | if s.accept('DECLARE_CONST'): 239 | name = expect_identifier(s) 240 | return_sort = expect_sort(s) 241 | return ConstantDeclarationNode(name, return_sort) 242 | 243 | # special expression cases 244 | if s.accept('CONCAT'): 245 | 246 | # first two args are mandatory 247 | a = expect_arg(s) 248 | b = expect_arg(s) 249 | 250 | # more args are optional 251 | other_args = repeat_star(s, accept_arg) 252 | 253 | # re-format n-ary concats into binary concats 254 | concat = join_terms_with([a, b] + other_args, ConcatNode) 255 | 256 | return concat 257 | 258 | if s.accept('CONTAINS'): 259 | a = expect_arg(s) 260 | b = expect_arg(s) 261 | return ContainsNode(a, b) 262 | 263 | if s.accept('AT'): 264 | a = expect_arg(s) 265 | b = expect_arg(s) 266 | return AtNode(a, b) 267 | 268 | if s.accept('LENGTH'): 269 | a = expect_arg(s) 270 | return LengthNode(a) 271 | 272 | if s.accept('INDEXOFVAR'): 273 | 274 | # two arguments are expected 275 | a = expect_arg(s) 276 | b = expect_arg(s) 277 | 278 | # the third argument may or may not be there 279 | c = accept_arg(s) 280 | 281 | if c is not None: 282 | return IndexOf2Node(a, b, c) 283 | 284 | return IndexOfNode(a, b) 285 | 286 | if s.accept('INDEXOF'): 287 | a = expect_arg(s) 288 | b = expect_arg(s) 289 | return IndexOfNode(a, b) 290 | 291 | if s.accept('INDEXOF2'): 292 | a = expect_arg(s) 293 | b = expect_arg(s) 294 | c = expect_arg(s) 295 | return IndexOf2Node(a, b, c) 296 | 297 | if s.accept('PREFIXOF'): 298 | a = expect_arg(s) 299 | b = expect_arg(s) 300 | return PrefixOfNode(a, b) 301 | 302 | if s.accept('SUFFIXOF'): 303 | a = expect_arg(s) 304 | b = expect_arg(s) 305 | return SuffixOfNode(a, b) 306 | 307 | if s.accept('REPLACE'): 308 | a = expect_arg(s) 309 | b = expect_arg(s) 310 | c = expect_arg(s) 311 | return StringReplaceNode(a, b, c) 312 | 313 | if s.accept('SUBSTRING'): 314 | a = expect_arg(s) 315 | b = expect_arg(s) 316 | c = expect_arg(s) 317 | return SubstringNode(a, b, c) 318 | 319 | if s.accept('FROM_INT'): 320 | a = expect_arg(s) 321 | return FromIntNode(a) 322 | 323 | if s.accept('TO_INT'): 324 | a = expect_arg(s) 325 | return ToIntNode(a) 326 | 327 | if s.accept('IN_RE'): 328 | a = expect_arg(s) 329 | b = expect_arg(s) 330 | return InReNode(a, b) 331 | 332 | if s.accept('STR_TO_RE'): 333 | a = expect_arg(s) 334 | return StrToReNode(a) 335 | 336 | if s.accept('RE_CONCAT'): 337 | # first two args are mandatory 338 | a = expect_arg(s) 339 | b = expect_arg(s) 340 | 341 | # more args are optional 342 | other_args = repeat_star(s, accept_arg) 343 | 344 | # re-format n-ary concats into binary concats 345 | concat = join_terms_with([a, b] + other_args, ReConcatNode) 346 | 347 | return concat 348 | 349 | if s.accept('RE_STAR'): 350 | a = expect_arg(s) 351 | return ReStarNode(a) 352 | 353 | if s.accept('RE_PLUS'): 354 | a = expect_arg(s) 355 | return RePlusNode(a) 356 | 357 | if s.accept('RE_RANGE'): 358 | a = expect_arg(s) 359 | b = expect_arg(s) 360 | return ReRangeNode(a, b) 361 | 362 | if s.accept('RE_UNION'): 363 | 364 | # first two args are mandatory 365 | a = expect_arg(s) 366 | b = expect_arg(s) 367 | 368 | # more args are optional 369 | other_args = repeat_star(s, accept_arg) 370 | 371 | # re-format n-ary unions into binary unions 372 | union = join_terms_with([a, b] + other_args, ReUnionNode) 373 | 374 | return union 375 | 376 | if s.accept('RE_INTER'): 377 | 378 | # first two args are mandatory 379 | a = expect_arg(s) 380 | b = expect_arg(s) 381 | 382 | # more args are optional 383 | other_args = repeat_star(s, accept_arg) 384 | 385 | # re-format n-ary intersections into binary intersections 386 | inter = join_terms_with([a, b] + other_args, ReInterNode) 387 | 388 | return inter 389 | 390 | token = s.peek() 391 | if s.accept('META_COMMAND'): 392 | body = repeat_star(s, accept_meta_arg) 393 | return MetaCommandNode(token.value, *body) 394 | 395 | # generic expression case 396 | name = expect_identifier(s) 397 | body = repeat_star(s, accept_arg) 398 | 399 | return GenericExpressionNode(name, *body) 400 | 401 | def get_expressions(s): 402 | 403 | expressions = [] 404 | s.advance() 405 | 406 | while s.peek() is not None: 407 | s.expect('LPAREN') 408 | expressions.append(expect_expression(s)) 409 | s.expect('RPAREN') 410 | 411 | return expressions 412 | 413 | # public API 414 | def parse_file(path, language): 415 | with open(path, 'r') as file: 416 | return parse(file.read(), language) 417 | 418 | def parse(text, language): 419 | return parse_tokens(scan(text, language), language, text) 420 | 421 | def parse_tokens(tokens, language, text): 422 | return get_expressions(Stream(tokens, text)) 423 | -------------------------------------------------------------------------------- /stringfuzz/scanner.py: -------------------------------------------------------------------------------- 1 | import re 2 | import string 3 | 4 | from stringfuzz.constants import * 5 | 6 | __all__ = [ 7 | 'scan', 8 | 'scan_file', 9 | 'ScanningError', 10 | 'ALPHABET', 11 | 'WHITESPACE', 12 | ] 13 | 14 | # data structures 15 | class ScanningError(ValueError): 16 | pass 17 | 18 | class Token(object): 19 | 20 | def __init__(self, name, value, position): 21 | self.name = name 22 | self.value = value 23 | self.position = position 24 | 25 | def __str__(self): 26 | return self.value 27 | 28 | def __repr__(self): 29 | return '{} {!r} @ {}'.format(self.name, self.value, self.position) 30 | 31 | # helpers 32 | def strip_quotes(string_literal): 33 | return string_literal[1:-1] 34 | 35 | def unescape(string_literal): 36 | return string_literal.encode().decode('unicode_escape') 37 | 38 | def replace_double_quotes(string_literal): 39 | return string_literal.replace("\"\"", "\"") 40 | 41 | # token functions 42 | def make_whitespace(s, w): return Token('WHITESPACE', w, s.match.start()) 43 | def make_identifier(s, w): return Token('IDENTIFIER', w, s.match.start()) 44 | def make_lparen(s, w): return Token('LPAREN', w, s.match.start()) 45 | def make_rparen(s, w): return Token('RPAREN', w, s.match.start()) 46 | def make_setting(s, w): return Token('SETTING', w, s.match.start()) 47 | def make_bool_lit(s, w): return Token('BOOL_LIT', w, s.match.start()) 48 | def make_int_lit(s, w): return Token('INT_LIT', w, s.match.start()) 49 | def make_sym(s, w): return Token('IDENTIFIER', w, s.match.start()) 50 | 51 | def make_string_lit(s, w): 52 | literal = unescape(strip_quotes(w)) 53 | return Token('STRING_LIT', literal, s.match.start()) 54 | 55 | def make_string_lit_25(s, w): 56 | literal = replace_double_quotes(unescape(strip_quotes(w))) 57 | return Token('STRING_LIT', literal, s.match.start()) 58 | 59 | # specific symbol tokens 60 | def make_meta_command(s, w): return Token('META_COMMAND', w, s.match.start()) 61 | def make_declare_fun(s, w): return Token('DECLARE_FUN', w, s.match.start()) 62 | def make_define_fun(s, w): return Token('DEFINE_FUN', w, s.match.start()) 63 | def make_declare_const(s, w): return Token('DECLARE_CONST', w, s.match.start()) 64 | def make_assert(s, w): return Token('ASSERT', w, s.match.start()) 65 | def make_contains(s, w): return Token('CONTAINS', w, s.match.start()) 66 | def make_concat(s, w): return Token('CONCAT', w, s.match.start()) 67 | def make_at(s, w): return Token('AT', w, s.match.start()) 68 | def make_indexof_var_args(s, w): return Token('INDEXOFVAR', w, s.match.start()) 69 | def make_indexof_2_args(s, w): return Token('INDEXOF', w, s.match.start()) 70 | def make_indexof_3_args(s, w): return Token('INDEXOF2', w, s.match.start()) 71 | def make_prefixof(s, w): return Token('PREFIXOF', w, s.match.start()) 72 | def make_suffixof(s, w): return Token('SUFFIXOF', w, s.match.start()) 73 | def make_replace(s, w): return Token('REPLACE', w, s.match.start()) 74 | def make_substring(s, w): return Token('SUBSTRING', w, s.match.start()) 75 | def make_str_from_int(s, w): return Token('FROM_INT', w, s.match.start()) 76 | def make_str_to_int(s, w): return Token('TO_INT', w, s.match.start()) 77 | def make_length(s, w): return Token('LENGTH', w, s.match.start()) 78 | def make_in_re(s, w): return Token('IN_RE', w, s.match.start()) 79 | def make_str_to_re(s, w): return Token('STR_TO_RE', w, s.match.start()) 80 | def make_re_allchar(s, w): return Token('RE_ALLCHAR', w, s.match.start()) 81 | def make_re_concat(s, w): return Token('RE_CONCAT', w, s.match.start()) 82 | def make_re_star(s, w): return Token('RE_STAR', w, s.match.start()) 83 | def make_re_plus(s, w): return Token('RE_PLUS', w, s.match.start()) 84 | def make_re_range(s, w): return Token('RE_RANGE', w, s.match.start()) 85 | def make_re_union(s, w): return Token('RE_UNION', w, s.match.start()) 86 | def make_re_inter(s, w): return Token('RE_INTER', w, s.match.start()) 87 | 88 | # constants 89 | ALPHABET = string.digits + string.ascii_letters + string.punctuation 90 | WHITESPACE = string.whitespace 91 | ID_CHAR = r'[\w._\+\-\*\=%?!$_~&^<>@/|:\\]' 92 | SETTING_CHAR = r'[\w._\+\-\*\=%?!$_~&^<>@/|:]' 93 | 94 | # token lists 95 | # NOTE: 96 | # more specific patterns (e.g. reserved words) have to come before more 97 | # general patterns (e.g. identifiers) because otherwise the more general 98 | # pattern will match before the more specific one 99 | SMT_20_TOKENS = [ 100 | 101 | # Boolean functions 102 | (r'ite', make_sym), 103 | (r'not', make_sym), 104 | (r'and', make_sym), 105 | (r'or', make_sym), 106 | 107 | # commands 108 | (r'set-logic', make_meta_command), 109 | (r'set-option', make_meta_command), 110 | (r'set-info', make_meta_command), 111 | (r'declare-sort', make_sym), 112 | (r'define-sort', make_sym), 113 | (r'declare-fun', make_declare_fun), 114 | (r'define-fun', make_define_fun), 115 | (r'declare-const', make_sym), 116 | (r'define-const', make_declare_const), 117 | (r'declare-variable', make_sym), 118 | (r'define-variable', make_sym), 119 | (r'push', make_sym), 120 | (r'pop', make_sym), 121 | (r'assert', make_assert), 122 | (r'check-sat', make_sym), 123 | (r'get-assertions', make_sym), 124 | (r'get-proof', make_sym), 125 | (r'get-model', make_sym), 126 | (r'get-unsat-core', make_sym), 127 | (r'get-value', make_sym), 128 | (r'get-assignment', make_sym), 129 | (r'get-option', make_sym), 130 | (r'get-info', make_sym), 131 | (r'exit', make_sym), 132 | 133 | # math operators 134 | (r'\+', make_sym), 135 | (r'-', make_sym), 136 | (r'\*', make_sym), 137 | (r'=', make_sym), 138 | (r'<=', make_sym), 139 | (r'<', make_sym), 140 | (r'>=', make_sym), 141 | (r'>', make_sym), 142 | (r'div', make_sym), 143 | 144 | # whitespace 145 | (r'\s+', make_whitespace), 146 | 147 | # parens 148 | (r'\(', make_lparen), 149 | (r'\)', make_rparen), 150 | 151 | # boolean literals 152 | (r'true', make_bool_lit), 153 | (r'false', make_bool_lit), 154 | 155 | # int literals: digits not followed by identifier characters 156 | (r'\d+(?!' + ID_CHAR + r')', make_int_lit), 157 | 158 | # comments 159 | (r';[^\n]*', make_whitespace), 160 | (r'//[^\n]*', make_whitespace), 161 | 162 | # settings: can use most characters, and start with colons 163 | (r':' + SETTING_CHAR + r'+', make_setting), 164 | 165 | # identifiers: can use most characters, but can't start with digits 166 | (ID_CHAR + r'(? 0: 259 | token_context = '\n'.join(' {} {!r}'.format(t.name, t.value) for t in tokens[-5:]) 260 | text_context = remainder[:100] 261 | raise ScanningError('scanning error:\n{}\n {!r}...'.format(token_context, text_context)) 262 | 263 | return [t for t in tokens if t.name != 'WHITESPACE'] 264 | 265 | def scan_file(path, language): 266 | with open(path, 'r') as file: 267 | return scan(file.read(), language) 268 | -------------------------------------------------------------------------------- /stringfuzz/smt.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Functions for creating ASTs. 3 | ''' 4 | 5 | from stringfuzz.ast import * 6 | 7 | __all__ = [ 8 | 'smt_var', 9 | 'smt_const', 10 | 'smt_new_var', 11 | 'smt_new_const', 12 | 'smt_str_lit', 13 | 'smt_int_lit', 14 | 'smt_bool_lit', 15 | 'smt_assert', 16 | 'smt_equal', 17 | 'smt_gt', 18 | 'smt_lt', 19 | 'smt_gte', 20 | 'smt_lte', 21 | 'smt_concat', 22 | 'smt_at', 23 | 'smt_len', 24 | 'smt_declare_var', 25 | 'smt_declare_const', 26 | 'smt_check_sat', 27 | 'smt_get_model', 28 | 'smt_reset_counters', 29 | 'smt_str_to_re', 30 | 'smt_regex_in', 31 | 'smt_regex_concat', 32 | 'smt_regex_star', 33 | 'smt_regex_plus', 34 | 'smt_regex_range', 35 | 'smt_regex_union', 36 | 'smt_regex_inter', 37 | 'smt_and', 38 | 'smt_or', 39 | 'smt_not', 40 | 'smt_string_logic', 41 | 'smt_is_sat', 42 | 'smt_is_unsat', 43 | ] 44 | 45 | # constants 46 | VAR_PREFIX = 'var' 47 | CONST_PREFIX = 'const' 48 | 49 | # globals 50 | var_counter = 0 51 | const_counter = 0 52 | 53 | # helper functions 54 | def smt_var(suffix): 55 | return IdentifierNode('{}{}'.format(VAR_PREFIX, suffix)) 56 | 57 | def smt_const(suffix): 58 | return IdentifierNode('{}{}'.format(CONST_PREFIX, suffix)) 59 | 60 | def smt_new_var(): 61 | global var_counter 62 | returned = var_counter 63 | var_counter += 1 64 | return smt_var(returned) 65 | 66 | def smt_new_const(): 67 | global const_counter 68 | returned = const_counter 69 | const_counter += 1 70 | return smt_const(returned) 71 | 72 | def smt_reset_counters(): 73 | global const_counter 74 | global var_counter 75 | const_counter = 0 76 | var_counter = 0 77 | 78 | # leaf expressions 79 | def smt_str_lit(value): 80 | return StringLitNode(value) 81 | 82 | def smt_int_lit(value): 83 | return IntLitNode(value) 84 | 85 | def smt_bool_lit(value): 86 | return BoolLitNode(value) 87 | 88 | # node expressions 89 | def smt_and(a, b): 90 | return AndNode(a, b) 91 | 92 | def smt_or(a, b): 93 | return OrNode(a, b) 94 | 95 | def smt_not(a): 96 | return NotNode(a) 97 | 98 | def smt_equal(a, b): 99 | return EqualNode(a, b) 100 | 101 | def smt_gt(a, b): 102 | return GtNode(a, b) 103 | 104 | def smt_lt(a, b): 105 | return LtNode(a, b) 106 | 107 | def smt_gte(a, b): 108 | return GteNode(a, b) 109 | 110 | def smt_lte(a, b): 111 | return LteNode(a, b) 112 | 113 | def smt_concat(a, b): 114 | return ConcatNode(a, b) 115 | 116 | def smt_at(s, i): 117 | return AtNode(s, i) 118 | 119 | def smt_len(a): 120 | return LengthNode(a) 121 | 122 | def smt_str_to_re(s): 123 | return StrToReNode(s) 124 | 125 | def smt_regex_in(s, r): 126 | return InReNode(s, r) 127 | 128 | def smt_regex_concat(a, b): 129 | return ReConcatNode(a, b) 130 | 131 | def smt_regex_plus(a): 132 | return RePlusNode(a) 133 | 134 | def smt_regex_range(a, b): 135 | return ReRangeNode(a, b) 136 | 137 | def smt_regex_star(a): 138 | return ReStarNode(a) 139 | 140 | def smt_regex_union(a, b): 141 | return ReUnionNode(a, b) 142 | 143 | def smt_regex_inter(a, b): 144 | return ReInterNode(a, b) 145 | 146 | # commands 147 | def smt_assert(exp): 148 | return AssertNode(exp) 149 | 150 | def smt_declare_var(identifier, sort='String'): 151 | return FunctionDeclarationNode(identifier, BracketsNode([]), AtomicSortNode(sort)) 152 | 153 | def smt_declare_const(identifier, sort='String'): 154 | return ConstantDeclarationNode(identifier, AtomicSortNode(sort)) 155 | 156 | def smt_check_sat(): 157 | return CheckSatNode() 158 | 159 | def smt_get_model(): 160 | return GetModelNode() 161 | 162 | def _smt_status(status): 163 | return MetaCommandNode(IdentifierNode('set-info'), SettingNode('status'), MetaDataNode(status)) 164 | 165 | def smt_is_sat(): 166 | return _smt_status('sat') 167 | 168 | def smt_is_unsat(): 169 | return _smt_status('unsat') 170 | 171 | def smt_string_logic(): 172 | return MetaCommandNode(IdentifierNode('set-logic'), IdentifierNode('QF_S')) 173 | -------------------------------------------------------------------------------- /stringfuzz/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | from stringfuzz.transformers.unprintable import * 2 | from stringfuzz.transformers.nop import * 3 | from stringfuzz.transformers.rotate import * 4 | from stringfuzz.transformers.fuzz import * 5 | from stringfuzz.transformers.graft import * 6 | from stringfuzz.transformers.translate import * 7 | from stringfuzz.transformers.reverse import * 8 | from stringfuzz.transformers.multiply import * 9 | -------------------------------------------------------------------------------- /stringfuzz/transformers/fuzz.py: -------------------------------------------------------------------------------- 1 | ''' 2 | The Fuzz transformer performs two types of transformations. 3 | The first is for literals. The second is for operators. 4 | 5 | Literals are fuzzed to similar literals. For example, 6 | an integer literal x will be replaced with x+r where 7 | r is a random number between -x and x. String literals 8 | are processed character by character. Each character can either 9 | remain in the updated string, be replaced by a random string, 10 | or be deleted with equal probability. 11 | 12 | Operators are fuzzed, with 50% probability, to a new operator 13 | with the same function type. For example, regex * can be fuzzed 14 | to regex +. 15 | ''' 16 | 17 | import random 18 | 19 | from stringfuzz.ast import IntLitNode, StringLitNode, ReRangeNode 20 | from stringfuzz.types import REPLACEABLE_OPS 21 | from stringfuzz.ast_walker import ASTWalker 22 | from stringfuzz.generators import random_text 23 | 24 | __all__ = [ 25 | 'fuzz', 26 | ] 27 | 28 | def fuzz_char(c): 29 | 30 | # with equal probability: replace, keep, add, or delete a character 31 | operation = random.randint(1,4) 32 | 33 | # replace it 34 | if operation == 1: 35 | return random_text(1) 36 | 37 | # keep it the same 38 | if operation == 2: 39 | return c 40 | 41 | # add a new character 42 | if operation == 3: 43 | return c + random_text(1) 44 | 45 | # delete it 46 | return '' 47 | 48 | def fuzz_string(string): 49 | return ''.join(fuzz_char(c) for c in string) 50 | 51 | class LitTransformer(ASTWalker): 52 | def __init__(self, ast, skip_re_range): 53 | super().__init__(ast) 54 | self.skip_re_range = skip_re_range 55 | 56 | def exit_literal(self, literal, parent): 57 | 58 | # int literal 59 | if isinstance(literal, IntLitNode): 60 | 61 | # maintain sign of literal 62 | literal.value += random.randint(-literal.value, literal.value) 63 | 64 | # string literal 65 | elif isinstance(literal, StringLitNode): 66 | 67 | # skip children of regex range if required 68 | if isinstance(parent, ReRangeNode) and self.skip_re_range: 69 | return 70 | 71 | # create new value for literal 72 | new_val = fuzz_string(literal.value) 73 | 74 | # replace old value with new value 75 | literal.value = new_val 76 | 77 | def exit_expression(self, expr, parent): 78 | for type_list in REPLACEABLE_OPS: 79 | for i in range(len(expr.body)): 80 | 81 | # check if it's a replaceable type; if so, randomly replace it 82 | replaceable = [isinstance(expr.body[i], C) for C in type_list] 83 | if any(replaceable): 84 | choice = random.choice(type_list) 85 | expr.body[i] = choice(*expr.body[i].body) 86 | 87 | # public API 88 | def fuzz(ast, skip_re_range): 89 | transformed = LitTransformer(ast, skip_re_range).walk() 90 | return transformed 91 | -------------------------------------------------------------------------------- /stringfuzz/transformers/graft.py: -------------------------------------------------------------------------------- 1 | ''' 2 | The graft transform picks a subtree and a leaf at random 3 | and swaps them for each type. 4 | ''' 5 | 6 | import random 7 | 8 | from stringfuzz.ast import StringLitNode, BoolLitNode, IntLitNode, StrToReNode 9 | from stringfuzz.types import STR_RET, INT_RET, BOOL_RET, RX_RET 10 | from stringfuzz.ast_walker import ASTWalker 11 | 12 | __all__ = [ 13 | 'graft', 14 | ] 15 | 16 | class GraftTransformer(ASTWalker): 17 | def __init__(self, ast, pairs): 18 | super().__init__(ast) 19 | self.pairs = pairs 20 | 21 | def enter_expression(self, expr, parent): 22 | for i in range(len(expr.body)): 23 | for pair in self.pairs: 24 | if expr.body[i] == pair[0]: 25 | expr.body[i] = pair[1] 26 | elif expr.body[i] == pair[1]: 27 | expr.body[i] = pair[0] 28 | 29 | class GraftFinder(ASTWalker): 30 | def __init__(self, ast, skip_str_to_re): 31 | super().__init__(ast) 32 | self.skip_str_to_re = skip_str_to_re 33 | # expr, lit 34 | self.str = [None, None] 35 | self.bool = [None, None] 36 | self.int = [None, None] 37 | self.rx = [None, None] 38 | 39 | @property 40 | def pairs(self): 41 | pairs = [] 42 | if all(self.str): 43 | pairs.append(self.str) 44 | if all(self.bool): 45 | pairs.append(self.bool) 46 | if all(self.int): 47 | pairs.append(self.int) 48 | if all(self.rx): 49 | pairs.append(self.rx) 50 | return pairs 51 | 52 | def enter_literal(self, literal, parent): 53 | replace = random.choice([True, False]) 54 | if isinstance(literal, StringLitNode): 55 | if isinstance(parent, StrToReNode) and self.skip_str_to_re: 56 | return 57 | if self.str[1]: 58 | if replace: 59 | self.str[1] = literal 60 | else: 61 | self.str[1] = literal 62 | elif isinstance(literal, BoolLitNode): 63 | if self.bool[1]: 64 | if replace: 65 | self.bool[1] = literal 66 | else: 67 | self.bool[1] = literal 68 | elif isinstance(literal, IntLitNode): 69 | if self.int[1]: 70 | if replace: 71 | self.int[1] = literal 72 | else: 73 | self.int[1] = literal 74 | 75 | def enter_identifier(self, ident, parent): 76 | #TODO How to check type of identifiers? 77 | # if self.str[1]: 78 | # if random.random() < 0.5: 79 | # self.str[1] = ident 80 | # else: 81 | # self.str[1] = ident 82 | pass 83 | 84 | def enter_expression(self, expr, parent): 85 | replace = random.choice([True, False]) 86 | if isinstance(expr, StrToReNode): 87 | # take StrToReNode's to be literals for RX 88 | if self.rx[1]: 89 | if replace: 90 | self.rx[1] = expr 91 | else: 92 | self.rx[1] = expr 93 | 94 | # assign expr part of pair 95 | elif any([isinstance(expr, C) for C in STR_RET]): 96 | if self.str[0]: 97 | if replace: 98 | self.str[0] = expr 99 | else: 100 | self.str[0] = expr 101 | elif any([isinstance(expr, C) for C in INT_RET]): 102 | if self.int[0]: 103 | if replace: 104 | self.int[0] = expr 105 | else: 106 | self.int[0] = expr 107 | elif any([isinstance(expr, C) for C in BOOL_RET]): 108 | if self.bool[0]: 109 | if replace: 110 | self.bool[0] = expr 111 | else: 112 | self.bool[0] = expr 113 | elif any([isinstance(expr, C) for C in RX_RET]): 114 | if self.rx[0]: 115 | if replace: 116 | self.rx[0] = expr 117 | else: 118 | self.rx[0] = expr 119 | 120 | 121 | # public API 122 | def graft(ast, skip_str_to_re): 123 | finder = GraftFinder(ast, skip_str_to_re) 124 | finder.walk() 125 | transformed = GraftTransformer(ast, finder.pairs).walk() 126 | return transformed 127 | -------------------------------------------------------------------------------- /stringfuzz/transformers/multiply.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Multiplying every integer literal by n and repeating 3 | every character in a string literal n times for some n 4 | ''' 5 | 6 | from stringfuzz.ast import StringLitNode, IntLitNode, ReRangeNode 7 | from stringfuzz.ast_walker import ASTWalker 8 | 9 | __all__ = [ 10 | 'multiply', 11 | ] 12 | 13 | class MultiplyTransformer(ASTWalker): 14 | def __init__(self, ast, factor, skip_re_range): 15 | super().__init__(ast) 16 | self.factor = factor 17 | self.skip_re_range = skip_re_range 18 | 19 | def exit_literal(self, literal, parent): 20 | if isinstance(literal, StringLitNode): 21 | if isinstance(parent, ReRangeNode) and self.skip_re_range: 22 | return 23 | new_val = "" 24 | for char in literal.value: 25 | new_val += char * self.factor 26 | literal.value = new_val 27 | elif isinstance(literal, IntLitNode): 28 | literal.value = literal.value * self.factor 29 | 30 | # public API 31 | def multiply(ast, factor, skip_re_range): 32 | transformed = MultiplyTransformer(ast, factor, skip_re_range).walk() 33 | return transformed 34 | -------------------------------------------------------------------------------- /stringfuzz/transformers/nop.py: -------------------------------------------------------------------------------- 1 | from stringfuzz.parser import parse 2 | 3 | __all__ = [ 4 | 'nop', 5 | ] 6 | 7 | # public API 8 | def nop(ast): 9 | return ast 10 | -------------------------------------------------------------------------------- /stringfuzz/transformers/reverse.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Reversing every string literal 3 | ''' 4 | 5 | from stringfuzz.ast import StringLitNode, ConcatNode, ReConcatNode 6 | from stringfuzz.ast_walker import ASTWalker 7 | 8 | __all__ = [ 9 | 'reverse', 10 | ] 11 | 12 | class ReverseTransformer(ASTWalker): 13 | def __init__(self, ast): 14 | super().__init__(ast) 15 | 16 | def exit_literal(self, literal, parent): 17 | if isinstance(literal, StringLitNode): 18 | literal.value = literal.value[::-1] 19 | 20 | def exit_expression(self, expr, parent): 21 | if isinstance(expr, (ConcatNode, ReConcatNode)): 22 | expr.body = reversed(expr.body) 23 | 24 | # public API 25 | def reverse(ast): 26 | transformed = ReverseTransformer(ast).walk() 27 | return transformed 28 | -------------------------------------------------------------------------------- /stringfuzz/transformers/rotate.py: -------------------------------------------------------------------------------- 1 | from stringfuzz.types import ALL_INT_ARGS, ALL_RX_ARGS, ALL_STR_ARGS 2 | from stringfuzz.ast_walker import ASTWalker 3 | 4 | __all__ = [ 5 | 'rotate', 6 | ] 7 | 8 | class RotateTransformer(ASTWalker): 9 | def __init__(self, ast): 10 | super().__init__(ast) 11 | 12 | def exit_expression(self, expr, parent): 13 | for uniform in [ALL_INT_ARGS, ALL_RX_ARGS, ALL_STR_ARGS]: 14 | # need at least two top level children 15 | uniform_expr = [isinstance(expr, C) for C in uniform] 16 | if any(uniform_expr) and len(expr.body) > 1: 17 | for i in range(len(expr.body)): 18 | uniform_child = [isinstance(expr.body[i], C) for C in uniform] 19 | if any(uniform_child): 20 | # rotate clockwise 21 | # j is the other top level child 22 | if i == len(expr.body)-1: 23 | j = 0 24 | else: 25 | j = len(expr.body)-1 26 | temp = expr.body[j] 27 | expr.body[j] = expr.body[i].body[0] 28 | new_body = expr.body[i].body[1:] + [temp] 29 | expr.body[i].body = new_body 30 | 31 | # public API 32 | def rotate(ast): 33 | transformed = RotateTransformer(ast).walk() 34 | return transformed 35 | -------------------------------------------------------------------------------- /stringfuzz/transformers/translate.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Permuting the alphabet in every string literal. 3 | ''' 4 | 5 | import random 6 | import copy 7 | 8 | from stringfuzz.ast import StringLitNode, ReRangeNode 9 | from stringfuzz.ast_walker import ASTWalker 10 | from stringfuzz import ALL_CHARS 11 | 12 | __all__ = [ 13 | 'translate' 14 | ] 15 | 16 | WITH_INTEGERS = list(ALL_CHARS) 17 | WITHOUT_INTEGERS = [c for c in ALL_CHARS if not c.isdecimal()] 18 | 19 | class TranslateTransformer(ASTWalker): 20 | def __init__(self, ast, character_set, skip_re_range): 21 | super().__init__(ast) 22 | self.table = self.make_table(character_set) 23 | self.skip_re_range = skip_re_range 24 | 25 | def make_table(self, character_set): 26 | shuffled = copy.copy(character_set) 27 | random.shuffle(shuffled) 28 | shuffled = ''.join(shuffled) 29 | character_set = ''.join(character_set) 30 | return str.maketrans(character_set, shuffled) 31 | 32 | def exit_literal(self, literal, parent): 33 | if isinstance(literal, StringLitNode): 34 | if isinstance(parent, ReRangeNode) and self.skip_re_range: 35 | return 36 | literal.value = literal.value.translate(self.table) 37 | 38 | # public API 39 | def translate(ast, integer_flag, skip_re_range): 40 | if integer_flag: 41 | character_set = WITH_INTEGERS 42 | else: 43 | character_set = WITHOUT_INTEGERS 44 | transformed = TranslateTransformer(ast, character_set, skip_re_range).walk() 45 | return transformed 46 | -------------------------------------------------------------------------------- /stringfuzz/transformers/unprintable.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | import random 4 | import string 5 | 6 | from stringfuzz.ast import ExpressionNode, StringLitNode 7 | 8 | __all__ = [ 9 | 'unprintable', 10 | ] 11 | 12 | EXCLUDED_CHARS = '\n\t\x00' 13 | UNPRINTABLE_CHARS = [chr(i) for i in range(32) if chr(i) not in EXCLUDED_CHARS] 14 | ALL_CHARS = string.printable 15 | 16 | # TODO: 17 | # sanity check that the transformation should not inject 18 | # if len(UNPRINTABLE_CHARS) < len(ALL_CHARS): 19 | # print("REALLY BAD ERROR: 'unprintable' transformation loses data", file=sys.stderr) 20 | # exit(1) 21 | 22 | # TODO: 23 | # fix pick_unprintable to pick without replacement 24 | def pick_unprintable(): 25 | return random.choice(UNPRINTABLE_CHARS) 26 | 27 | def make_charmap(): 28 | return {c : pick_unprintable() for c in ALL_CHARS} 29 | 30 | def make_unprintable_string(s, charmap): 31 | return ''.join(charmap[c] for c in s) 32 | 33 | def make_unprintable_expression(expression, charmap): 34 | 35 | for i in range(len(expression.body)): 36 | 37 | arg = expression.body[i] 38 | 39 | # recurse down expressions 40 | if isinstance(arg, ExpressionNode): 41 | make_unprintable_expression(arg, charmap) 42 | 43 | # replace string literals 44 | elif isinstance(arg, StringLitNode): 45 | 46 | # create new string 47 | old_string = arg.value 48 | new_string = make_unprintable_string(old_string, charmap) 49 | 50 | # assign new literal 51 | expression.body[i] = StringLitNode(new_string) 52 | 53 | # public API 54 | def unprintable(ast): 55 | charmap = make_charmap() 56 | 57 | for expression in ast: 58 | make_unprintable_expression(expression, charmap) 59 | 60 | return ast 61 | -------------------------------------------------------------------------------- /stringfuzz/types.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Groupings operators by types for transformers. 3 | ''' 4 | 5 | from stringfuzz.ast import * 6 | 7 | # Groups of replaceable_OPS operators by Function Type 8 | # ARG_...RET 9 | # e.g. STR_STR_STR means takes two strings and returns a string 10 | STR_STR_STR = [ConcatNode] 11 | STR_STR_BOOL = [ContainsNode, PrefixOfNode, SuffixOfNode] 12 | STR_INT_STR = [AtNode] 13 | STR_INT = [LengthNode, ToIntNode] 14 | STR_STR_INT = [IndexOfNode] 15 | STR_STR_INT_INT = [IndexOf2Node] 16 | STR_STR_STR_STR = [StringReplaceNode] 17 | STR_INT_INT_STR = [SubstringNode] 18 | INT_STR = [FromIntNode] 19 | STR_RX_BOOL = [InReNode] 20 | STR_RX = [StrToReNode] 21 | RX_RX_RX = [ReConcatNode, ReUnionNode, ReInterNode] 22 | RX_RX = [ReStarNode, RePlusNode] 23 | INT_INT_RX = [ReRangeNode] 24 | 25 | # types with more than one inhabitant for fuzzing 26 | REPLACEABLE_OPS = [STR_STR_BOOL, STR_INT, RX_RX_RX, RX_RX] 27 | 28 | # all the same argument types for rotating 29 | ALL_STR_ARGS = STR_STR_STR_STR + STR_STR_STR + STR_STR_INT + STR_STR_BOOL + STR_INT + STR_RX 30 | ALL_RX_ARGS = RX_RX_RX + RX_RX 31 | ALL_INT_ARGS = INT_STR + INT_INT_RX 32 | 33 | # all the same return type for cutting 34 | STR_RET = STR_STR_STR + STR_STR_STR_STR + INT_STR + STR_INT_STR + STR_INT_INT_STR 35 | INT_RET = STR_INT + STR_STR_INT + STR_STR_INT_INT 36 | BOOL_RET = STR_STR_BOOL + STR_RX_BOOL 37 | RX_RET = STR_RX + RX_RX + RX_RX_RX + INT_INT_RX 38 | -------------------------------------------------------------------------------- /stringfuzz/util.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from stringfuzz.scanner import ALPHABET 4 | from stringfuzz.ast import ConcatNode, ReConcatNode 5 | 6 | __all__ = [ 7 | 'coin_toss', 8 | 'random_string', 9 | 'join_terms_with', 10 | 'all_same', 11 | ] 12 | 13 | # public API 14 | def coin_toss(): 15 | return random.choice([True, False]) 16 | 17 | def random_string(length): 18 | return ''.join(random.choice(ALPHABET) for i in range(length)) 19 | 20 | def join_terms_with(terms, concatenator): 21 | assert len(terms) > 0 22 | 23 | # initialise result to the last term (i.e. first in reversed list) 24 | reversed_terms = reversed(terms) 25 | result = next(reversed_terms) 26 | 27 | # keep appending preceding terms to the result 28 | for term in reversed_terms: 29 | result = concatenator(term, result) 30 | 31 | return result 32 | 33 | # CREDIT: 34 | # https://stackoverflow.com/questions/3844801/check-if-all-elements-in-a-list-are-identical 35 | def all_same(lst): 36 | return not lst or lst.count(lst[0]) == len(lst) 37 | -------------------------------------------------------------------------------- /tests/ast_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from stringfuzz.ast import * 4 | 5 | class TestAST(unittest.TestCase): 6 | 7 | def test_no_ast_node(self): 8 | self.assertRaises(NameError, lambda: ASTNode) 9 | 10 | # literals 11 | def test_literal_bool(self): 12 | true = BoolLitNode(True) 13 | false = BoolLitNode(False) 14 | 15 | self.assertIs(true.value, True) 16 | self.assertIs(false.value, False) 17 | self.assertRaises(AssertionError, BoolLitNode, 1) 18 | self.assertRaises(AssertionError, BoolLitNode, 0) 19 | self.assertRaises(AssertionError, BoolLitNode, 'true') 20 | 21 | def test_literal_int(self): 22 | five = IntLitNode(5) 23 | 24 | self.assertEqual(five.value, 5) 25 | self.assertRaises(AssertionError, IntLitNode, True) 26 | self.assertRaises(AssertionError, IntLitNode, '5') 27 | 28 | def test_literal_string(self): 29 | hello = StringLitNode('hello') 30 | 31 | self.assertEqual(hello.value, 'hello') 32 | self.assertRaises(AssertionError, StringLitNode, True) 33 | self.assertRaises(AssertionError, StringLitNode, 5) 34 | 35 | if __name__ == '__main__': 36 | unittest.main() 37 | -------------------------------------------------------------------------------- /tests/genetic_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | class GeneticTest(unittest.TestCase): 4 | pass 5 | 6 | if __name__ == '__main__': 7 | unittest.main() 8 | -------------------------------------------------------------------------------- /tests/parser_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from stringfuzz.scanner import SMT_20, SMT_20_STRING, SMT_25_STRING 4 | from stringfuzz.parser import parse, parse_file 5 | 6 | class TestParser(unittest.TestCase): 7 | 8 | def test_no_file(self): 9 | self.assertRaises(IOError, parse_file, '', SMT_20) 10 | 11 | def test_empty(self): 12 | self.assertListEqual([], parse('', SMT_20)) 13 | 14 | def test_bad_language(self): 15 | self.assertRaises(ValueError, parse, '', '') 16 | 17 | def test_good_languages(self): 18 | self.assertListEqual([], parse('', SMT_20)) 19 | self.assertListEqual([], parse('', SMT_20_STRING)) 20 | self.assertListEqual([], parse('', SMT_25_STRING)) 21 | 22 | def test_trivial(self): 23 | expressions = parse('(check-sat)', SMT_20) 24 | self.assertEqual(len(expressions), 1) 25 | self.assertEqual(expressions[0].symbol.name, 'check-sat') 26 | self.assertListEqual(expressions[0].body, []) 27 | 28 | def test_simple_smt_20(self): 29 | expressions = parse(''' 30 | (declare-fun X () String) 31 | (assert (= X "solution")) 32 | (check-sat) 33 | ''', SMT_20_STRING) 34 | 35 | self.assertEqual(len(expressions), 3) 36 | 37 | self.assertEqual(expressions[0].symbol.name, 'declare-fun') 38 | self.assertEqual(expressions[0].body[0].name, 'X') 39 | self.assertEqual(expressions[0].body[2].name, 'String') 40 | 41 | self.assertEqual(expressions[1].symbol.name, 'assert') 42 | self.assertEqual(expressions[1].body[0].symbol.name, '=') 43 | self.assertEqual(expressions[1].body[0].body[0].name, 'X') 44 | self.assertEqual(expressions[1].body[0].body[1].value, 'solution') 45 | 46 | self.assertEqual(expressions[2].symbol.name, 'check-sat') 47 | 48 | def test_simple_smt_25(self): 49 | expressions = parse(''' 50 | (declare-fun X () String) 51 | (assert (= X "solution")) 52 | (check-sat) 53 | ''', SMT_25_STRING) 54 | 55 | self.assertEqual(len(expressions), 3) 56 | 57 | self.assertEqual(expressions[0].symbol.name, 'declare-fun') 58 | self.assertEqual(expressions[0].body[0].name, 'X') 59 | self.assertEqual(expressions[0].body[2].name, 'String') 60 | 61 | self.assertEqual(expressions[1].symbol.name, 'assert') 62 | self.assertEqual(expressions[1].body[0].symbol.name, '=') 63 | self.assertEqual(expressions[1].body[0].body[0].name, 'X') 64 | self.assertEqual(expressions[1].body[0].body[1].value, 'solution') 65 | 66 | self.assertEqual(expressions[2].symbol.name, 'check-sat') 67 | 68 | if __name__ == '__main__': 69 | unittest.main() 70 | -------------------------------------------------------------------------------- /tests/scanner_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from stringfuzz.constants import SMT_20, SMT_20_STRING, SMT_25_STRING 4 | from stringfuzz.scanner import scan, scan_file 5 | 6 | class TestScanner(unittest.TestCase): 7 | 8 | def test_constants(self): 9 | self.assertEqual(SMT_20, 'smt2') 10 | self.assertEqual(SMT_20_STRING, 'smt20') 11 | self.assertEqual(SMT_25_STRING, 'smt25') 12 | 13 | def test_no_file(self): 14 | self.assertRaises(IOError, scan_file, '', SMT_20) 15 | 16 | def test_empty(self): 17 | self.assertListEqual([], scan('', SMT_20)) 18 | 19 | def test_bad_language(self): 20 | self.assertRaises(ValueError, scan, '', '') 21 | 22 | def test_good_languages(self): 23 | self.assertListEqual([], scan('', SMT_20)) 24 | self.assertListEqual([], scan('', SMT_20_STRING)) 25 | self.assertListEqual([], scan('', SMT_25_STRING)) 26 | 27 | def test_simple(self): 28 | tokens = scan('(check-sat)', SMT_20) 29 | self.assertEqual(len(tokens), 3) 30 | self.assertEqual(tokens[0].name, 'LPAREN') 31 | self.assertEqual(tokens[0].value, '(') 32 | self.assertEqual(tokens[1].name, 'IDENTIFIER') 33 | self.assertEqual(tokens[1].value, 'check-sat') 34 | self.assertEqual(tokens[2].name, 'RPAREN') 35 | self.assertEqual(tokens[2].value, ')') 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /tests/walker_tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dblotsky/stringfuzz/5507894ed5d94ed36098753357d33adee182b298/tests/walker_tests.py --------------------------------------------------------------------------------