├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── bin
    ├── smtparse
    ├── smtscan
    ├── stringbreak
    ├── stringfuzzg
    ├── stringfuzzx
    ├── stringmerge
    ├── stringstats
    ├── tryparse
    └── unprintable
├── setup.py
├── stringfuzz
    ├── __init__.py
    ├── analyser.py
    ├── ast.py
    ├── ast_walker.py
    ├── constants.py
    ├── fuzzers
    │   ├── __init__.py
    │   └── genetic.py
    ├── generator.py
    ├── generators
    │   ├── __init__.py
    │   ├── concats.py
    │   ├── equality.py
    │   ├── lengths.py
    │   ├── overlaps.py
    │   ├── random_ast.py
    │   ├── random_text.py
    │   └── regex.py
    ├── mergers
    │   ├── __init__.py
    │   └── simple.py
    ├── parser.py
    ├── scanner.py
    ├── smt.py
    ├── transformers
    │   ├── __init__.py
    │   ├── fuzz.py
    │   ├── graft.py
    │   ├── multiply.py
    │   ├── nop.py
    │   ├── reverse.py
    │   ├── rotate.py
    │   ├── translate.py
    │   └── unprintable.py
    ├── types.py
    └── util.py
└── tests
    ├── ast_tests.py
    ├── genetic_tests.py
    ├── parser_tests.py
    ├── scanner_tests.py
    └── walker_tests.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.pyc
3 | dist
4 | build


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | StringFuzz
 2 | Copyright (c) Dmitry Blotsky
 3 | All rights reserved. 
 4 | MIT License
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | TOTAL     = $(shell find . 					     -name "*.py" | xargs cat | wc -l)
 2 | GEN       = $(shell find stringfuzz/generators   -name "*.py" | xargs cat | wc -l)
 3 | TRANS     = $(shell find stringfuzz/transformers -name "*.py" | xargs cat | wc -l)
 4 | NUM_TOTAL = $(shell find . 					     -name "*.py" | wc -l)
 5 | NUM_GEN   = $(shell find stringfuzz/generators   -name "*.py" | wc -l)
 6 | NUM_TRANS = $(shell find stringfuzz/transformers -name "*.py" | wc -l)
 7 | PER_TOTAL = $(shell echo $$(( $(TOTAL) / $(NUM_TOTAL) )) )
 8 | PER_GEN   = $(shell echo $$(( $(GEN) / $(NUM_GEN) )) )
 9 | PER_TRANS = $(shell echo $$(( $(TRANS) / $(NUM_TRANS) )) )
10 | 
11 | help default all usage:
12 | 	@echo "Usage: don't use."
13 | 
14 | loc:
15 | 	@echo "total:" $(TOTAL) / $(NUM_TOTAL) = $(PER_TOTAL)
16 | 	@echo "gen:  " $(GEN) / $(NUM_GEN)     = $(PER_GEN)
17 | 	@echo "trans:" $(TRANS) / $(NUM_TRANS) = $(PER_TRANS)
18 | 
19 | run:
20 | 	stringfuzzx --help
21 | 	stringfuzzg --help
22 | 	stringstats --help
23 | 	stringbreak --help
24 | 
25 | cvc:
26 | 	stringbreak "cvc4-latest-release --lang smt2 --strings-exp"
27 | 
28 | z3:
29 | 	stringbreak "z3str3-develop-release smt.string_solver=z3str3 -in"
30 | 
31 | test:
32 | 	python3 -m unittest tests/*.py
33 | 
34 | develop: test
35 | 	python3 setup.py develop
36 | 
37 | install:
38 | 	python3 -m pip install --upgrade pip setuptools wheel
39 | 	python3 setup.py install
40 | 
41 | uninstall:
42 | 	yes | pip3 uninstall stringfuzz
43 | 
44 | reinstall: uninstall install
45 | 
46 | clean:
47 | 	$(RM) *.pyc
48 | 	$(RM) -r ./**/__pycache__
49 | 	$(RM) -r build
50 | 	$(RM) -r dist
51 | 	$(RM) -r *.egg-info
52 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Description
 2 | ===========
 3 | 
 4 | A collection of tools to manipulate and generate SMT-LIB 2.x problem instances.
 5 | There are four main tools:
 6 | 
 7 | - `stringfuzzg` to generate new instances
 8 | - `stringfuzzx` to transform existing instances
 9 | - `stringstats` to measure properties of instances
10 | - `stringmerge` to merge several instances into one
11 | 
12 | Installing
13 | ==========
14 | 
15 | Clone this repository, and run this command inside it:
16 | 
17 |     python3 setup.py install
18 | 
19 | Running
20 | =======
21 | 
22 | Without installing, the scripts can be run from the repository root as follows:
23 | 
24 |     ./bin/stringfuzzg --help
25 |     ./bin/stringfuzzx --help
26 |     ./bin/stringstats --help
27 | 
28 | If installed, they can be run from anywhere as follows:
29 | 
30 |     stringfuzzg --help
31 |     stringfuzzx --help
32 |     stringstats --help
33 | 
34 | Examples
35 | ========
36 | 
37 | To create a problem with concats nested 100 levels deep:
38 | 
39 |     ./bin/stringfuzzg concats --depth 100
40 | 
41 | To create the above problem and replace all characters with unprintable ones:
42 | 
43 |     ./bin/stringfuzzg concats --depth 100 | ./bin/stringfuzzx unprintable
44 | 
45 | To create and immediately feed a problem to Z3str3:
46 | 
47 |     ./bin/stringfuzzg concats --depth 100 | z3str3 -in
48 | 


--------------------------------------------------------------------------------
/bin/smtparse:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | '''
 4 | CLI for the parser. Outputs AST.
 5 | '''
 6 | 
 7 | import sys
 8 | import argparse
 9 | 
10 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING
11 | from stringfuzz.parser import parse
12 | 
13 | def main():
14 | 
15 |     # create arg parser
16 |     parser = argparse.ArgumentParser(description='Parse an SMT 2.* file.')
17 |     parser.add_argument(
18 |         'file',
19 |         nargs   = '?',
20 |         default = sys.stdin,
21 |         type    = argparse.FileType('r'),
22 |         help    = 'input file (default: stdin)'
23 |     )
24 |     parser.add_argument(
25 |         '--language',
26 |         '-l',
27 |         dest    = 'language',
28 |         type    = str,
29 |         choices = LANGUAGES,
30 |         default = SMT_25_STRING,
31 |         help    = 'input language (default: {})'.format(SMT_25_STRING)
32 |     )
33 | 
34 |     # parse args
35 |     args = parser.parse_args()
36 | 
37 |     # parse input
38 |     try:
39 |         expressions = parse(args.file.read(), args.language)
40 | 
41 |     # handle errors
42 |     except IndexError as e:
43 |         print(e, file=sys.stderr)
44 |         return 1
45 | 
46 |     # print result
47 |     for expression in expressions:
48 |         print(expression)
49 | 
50 |     return 0
51 | 
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------
/bin/smtscan:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | '''
 4 | CLI for the scanner. Outputs tokens.
 5 | '''
 6 | 
 7 | import sys
 8 | import argparse
 9 | 
10 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING
11 | from stringfuzz.scanner import scan
12 | 
13 | def main():
14 | 
15 |     # create arg parser
16 |     parser = argparse.ArgumentParser(description='Tokenize an SMT 2.* file.')
17 |     parser.add_argument(
18 |         'file',
19 |         nargs   = '?',
20 |         default = sys.stdin,
21 |         type    = argparse.FileType('r'),
22 |         help    = 'input file (default: stdin)'
23 |     )
24 |     parser.add_argument(
25 |         '--language',
26 |         '-l',
27 |         dest    = 'language',
28 |         type    = str,
29 |         choices = LANGUAGES,
30 |         default = SMT_25_STRING,
31 |         help    = 'input language (default: {})'.format(SMT_25_STRING)
32 |     )
33 | 
34 |     # parse args
35 |     args = parser.parse_args()
36 | 
37 |     # scan input
38 |     try:
39 |         tokens = scan(args.file.read(), language=args.language)
40 | 
41 |     # report result
42 |     except IndexError as e:
43 |         print(e)
44 |         return 1
45 | 
46 |     for token in tokens:
47 |         print(token.name, repr(token.value))
48 | 
49 |     return 0
50 | 
51 | if __name__ == '__main__':
52 |     main()
53 | 


--------------------------------------------------------------------------------
/bin/stringbreak:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import sys
  4 | import argparse
  5 | 
  6 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING
  7 | from stringfuzz.generators import random_ast
  8 | from stringfuzz.generator import generate
  9 | from stringfuzz.parser import parse
 10 | from stringfuzz.smt import smt_string_logic
 11 | 
 12 | from stringfuzz.fuzzers.genetic import simulate
 13 | 
 14 | DEFAULT_NUM_GENERATIONS = 200
 15 | DEFAULT_LOG_RESOLUTION  = 1
 16 | DEFAULT_WORLD_SIZE      = 10
 17 | 
 18 | def main():
 19 | 
 20 |     # create arg parser
 21 |     parser = argparse.ArgumentParser(description='Test an SMT string solver in an exploratory fashion.')
 22 |     parser.add_argument(
 23 |         'command',
 24 |         type = str,
 25 |         help = 'command to run the solver (if input fed on standard in)'
 26 |     )
 27 |     parser.add_argument(
 28 |         '--out-language',
 29 |         '-o',
 30 |         dest    = 'out_language',
 31 |         type    = str,
 32 |         choices = LANGUAGES,
 33 |         default = SMT_25_STRING,
 34 |         help    = 'solver language (default: {})'.format(SMT_25_STRING)
 35 |     )
 36 |     parser.add_argument(
 37 |         '--in-language',
 38 |         '-i',
 39 |         dest    = 'in_language',
 40 |         type    = str,
 41 |         choices = LANGUAGES,
 42 |         default = SMT_25_STRING,
 43 |         help    = 'seed problem language (default: {})'.format(SMT_25_STRING)
 44 |     )
 45 |     parser.add_argument(
 46 |         '--seed-problem',
 47 |         '-s',
 48 |         dest    = 'seed_problem',
 49 |         metavar = 'F',
 50 |         default = None,
 51 |         type    = argparse.FileType('r'),
 52 |         help    = 'input file (default: stdin)'
 53 |     )
 54 |     parser.add_argument(
 55 |         '--num-generations',
 56 |         '-g',
 57 |         dest    = 'num_generations',
 58 |         metavar = 'N',
 59 |         type    = int,
 60 |         default = DEFAULT_NUM_GENERATIONS,
 61 |         help    = 'number of generations (default: {})'.format(DEFAULT_NUM_GENERATIONS)
 62 |     )
 63 | 
 64 |     # parse args
 65 |     args = parser.parse_args()
 66 | 
 67 |     # create seed problem, or use an existing one
 68 |     if args.seed_problem is None:
 69 |         seed_problem = [smt_string_logic()] + random_ast(
 70 |             num_vars            = 1,
 71 |             num_asserts         = 5,
 72 |             depth               = 3,
 73 |             max_terms           = 5,
 74 |             max_str_lit_length  = 10,
 75 |             max_int_lit         = 30,
 76 |             literal_probability = 0.5,
 77 |             semantically_valid  = True
 78 |         )
 79 |     else:
 80 |         seed_problem = parse(args.seed_problem.read(), args.in_language)
 81 | 
 82 |     # print seed problem
 83 |     print('progenitor:')
 84 |     print('-----')
 85 |     print(generate(seed_problem, args.out_language))
 86 |     print('-----')
 87 | 
 88 |     # run the tester
 89 |     print('')
 90 |     print('simulating ...')
 91 |     surviving_problems = simulate(
 92 |         progenitor      = seed_problem,
 93 |         language        = args.out_language,
 94 |         saint_peter     = args.command,
 95 |         num_generations = args.num_generations,
 96 |         world_size      = DEFAULT_WORLD_SIZE,
 97 |         log_resolution  = DEFAULT_LOG_RESOLUTION
 98 |     )
 99 |     print('finished')
100 |     print('')
101 | 
102 |     # print out final population
103 |     for i, problem in enumerate(surviving_problems):
104 |         print('survivor #{}'.format(i))
105 |         print('-----')
106 |         print(generate(problem, args.out_language))
107 |         print('-----')
108 | 
109 | if __name__ == '__main__':
110 |     main()
111 | 


--------------------------------------------------------------------------------
/bin/stringfuzzg:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | '''
  4 | The fuzzer tool that generates new problems.
  5 | '''
  6 | 
  7 | import sys
  8 | import argparse
  9 | import random
 10 | 
 11 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING
 12 | from stringfuzz.parser import parse
 13 | from stringfuzz.generator import generate
 14 | from stringfuzz.smt import smt_get_model, smt_string_logic
 15 | 
 16 | from stringfuzz.generators import concats, SYNTACTIC_DEPTH, SEMANTIC_DEPTH
 17 | from stringfuzz.generators import overlaps
 18 | from stringfuzz.generators import lengths
 19 | from stringfuzz.generators import random_text
 20 | from stringfuzz.generators import random_ast
 21 | from stringfuzz.generators import equality
 22 | from stringfuzz.generators import regex, INCREASING_LITERALS, RANDOM_LITERALS, MEMBER_IN, MEMBER_NOT_IN, MEMBER_ALTERNATING, MEMBER_RANDOM, OPERATOR_STAR, OPERATOR_PLUS, OPERATOR_UNION, OPERATOR_INTER, OPERATOR_CONCAT, OPERATOR_ALTERNATING, OPERATOR_RANDOM
 23 | 
 24 | # constants
 25 | CONCATS     = 'concats'
 26 | LENGTHS     = 'lengths'
 27 | OVERLAPS    = 'overlaps'
 28 | RANDOM_TEXT = 'random-text'
 29 | RANDOM_AST  = 'random-ast'
 30 | REGEX       = 'regex'
 31 | EQUALITY    = 'equality'
 32 | 
 33 | GENERATORS = {
 34 |     CONCATS:     concats,
 35 |     LENGTHS:     lengths,
 36 |     OVERLAPS:    overlaps,
 37 |     RANDOM_TEXT: random_text,
 38 |     RANDOM_AST:  random_ast,
 39 |     REGEX:       regex,
 40 |     EQUALITY:    equality,
 41 | }
 42 | 
 43 | DEPTH_TYPES      = [SYNTACTIC_DEPTH, SEMANTIC_DEPTH]
 44 | LITERAL_TYPES    = [INCREASING_LITERALS, RANDOM_LITERALS]
 45 | MEMBERSHIP_TYPES = [MEMBER_IN, MEMBER_NOT_IN, MEMBER_ALTERNATING, MEMBER_RANDOM]
 46 | OPERATOR_LIST    = [OPERATOR_STAR, OPERATOR_PLUS, OPERATOR_UNION, OPERATOR_INTER, OPERATOR_CONCAT]
 47 | OPERATOR_TYPES   = [OPERATOR_ALTERNATING, OPERATOR_RANDOM]
 48 | 
 49 | # defaults
 50 | DEFAULT_SEED           = 0
 51 | DEFAULT_RANDOM         = False
 52 | DEFAULT_PRODUCE_MODELS = False
 53 | 
 54 | DEFAULT_LENGTH           = 10
 55 | DEFAULT_DEPTH            = 5
 56 | DEFAULT_SOLUTION         = None
 57 | DEFAULT_BALANCED         = False
 58 | DEFAULT_RANDOM_RELATIONS = False
 59 | DEFAULT_DEPTH_TYPE       = SYNTACTIC_DEPTH
 60 | DEFAULT_EXTRACTED        = 0
 61 | DEFAULT_EXTRACT_INDEX    = 10
 62 | 
 63 | DEFAULT_NUM_VARS    = 5
 64 | DEFAULT_MIN_LEN     = 0
 65 | DEFAULT_MAX_LEN     = 10
 66 | DEFAULT_NUM_CONCATS = 0
 67 | 
 68 | DEFAULT_NUM_OVERLAPPING  = 2
 69 | DEFAULT_LENGTH_OF_CONSTS = 5
 70 | 
 71 | DEFAULT_NUM_EQUAL_EXPRESSIONS   = 2
 72 | DEFAULT_TERMS_PER_EXPRESSION    = 7
 73 | DEFAULT_PREFIX_LENGTH           = 5
 74 | DEFAULT_SUFFIX_LENGTH           = 0
 75 | DEFAULT_INFIX_LENGTH            = 3
 76 | DEFAULT_INFIX_PROBABILITY       = 0.2
 77 | DEFAULT_ADD_INFIXES             = False
 78 | DEFAULT_EQUAL_RANDOMISE_LENGTHS = False
 79 | 
 80 | DEFAULT_LITERAL_TYPE    = RANDOM_LITERALS
 81 | DEFAULT_MEMBERSHIP_TYPE = MEMBER_IN
 82 | DEFAULT_REGEX_COINCIDE  = False
 83 | DEFAULT_NUM_REGEX_TERMS = 3
 84 | DEFAULT_NUM_REGEXES     = 1
 85 | DEFAULT_REGEX_LIT_MIN   = 1
 86 | DEFAULT_REGEX_LIT_MAX   = 3
 87 | DEFAULT_TERM_DEPTH      = 0
 88 | DEFAULT_REGEX_VAR_MIN   = None
 89 | DEFAULT_REGEX_VAR_MAX   = None
 90 | DEFAULT_OPERATORS       = OPERATOR_STAR + OPERATOR_PLUS + OPERATOR_UNION
 91 | DEFAULT_OPERATOR_TYPE   = OPERATOR_RANDOM
 92 | 
 93 | DEFAULT_NUM_RANDOM_VARS     = 5
 94 | DEFAULT_NUM_RANDOM_ASSERTS  = 10
 95 | DEFAULT_RANDOM_DEPTH        = 2
 96 | DEFAULT_MAX_RANDOM_TERMS    = 5
 97 | DEFAULT_MAX_RANDOM_STRINGS  = 10
 98 | DEFAULT_MAX_RANDOM_NUMBERS  = 10
 99 | DEFAULT_SEMANTICALLY_VALID  = False
100 | DEFAULT_LITERAL_PROBABILITY = 0.1
101 | 
102 | def main():
103 | 
104 |     # create arg parser
105 |     global_parser = argparse.ArgumentParser(description='SMTLIB 2.* problem generator.')
106 | 
107 |     # global args
108 |     global_parser.add_argument(
109 |         '--language',
110 |         '-l',
111 |         dest    = 'language',
112 |         type    = str,
113 |         choices = LANGUAGES,
114 |         default = SMT_25_STRING,
115 |         help    = 'output language (default: {})'.format(SMT_25_STRING)
116 |     )
117 |     global_parser.add_argument(
118 |         '--models',
119 |         '-m',
120 |         dest    = 'produce_models',
121 |         action  = 'store_true',
122 |         default = DEFAULT_PRODUCE_MODELS,
123 |         help    = 'append the SMT 2.x command to produce a model (default: {})'.format(DEFAULT_PRODUCE_MODELS)
124 |     )
125 |     seed_group = global_parser.add_mutually_exclusive_group()
126 |     seed_group.add_argument(
127 |         '--seed',
128 |         '-s',
129 |         dest    = 'seed',
130 |         metavar = 'S',
131 |         type    = int,
132 |         default = DEFAULT_SEED,
133 |         help    = 'seed for random number generator (default: {})'.format(DEFAULT_SEED)
134 |     )
135 |     seed_group.add_argument(
136 |         '--random',
137 |         '-r',
138 |         dest    = 'random',
139 |         action  = 'store_true',
140 |         default = DEFAULT_RANDOM,
141 |         help    = 'seed the random number generator with the current time (default: {})'.format(DEFAULT_RANDOM)
142 |     )
143 | 
144 |     # get subparsers
145 |     subparsers = global_parser.add_subparsers(dest='generator', help='generator choice')
146 |     subparsers.required = True
147 | 
148 |     # concats fuzzer
149 |     concats_parser = subparsers.add_parser(CONCATS, help='instance with deeply nested concats')
150 |     concats_parser.add_argument(
151 |         '--depth',
152 |         '-d',
153 |         dest    = 'depth',
154 |         metavar = 'D',
155 |         type    = int,
156 |         default = DEFAULT_DEPTH,
157 |         help    = 'depth of the concats (default: {})'.format(DEFAULT_DEPTH)
158 |     )
159 |     concats_parser.add_argument(
160 |         '--depth-type',
161 |         '-t',
162 |         dest    = 'depth_type',
163 |         type    = str,
164 |         choices = DEPTH_TYPES,
165 |         default = DEFAULT_DEPTH_TYPE,
166 |         help    = 'type of depth (default: {})'.format(DEFAULT_DEPTH_TYPE)
167 |     )
168 |     concats_parser.add_argument(
169 |         '--solution',
170 |         '-s',
171 |         dest    = 'solution',
172 |         metavar = 'S',
173 |         type    = str,
174 |         default = DEFAULT_SOLUTION,
175 |         help    = 'expected solution (default: {!r})'.format(DEFAULT_SOLUTION)
176 |     )
177 |     concats_parser.add_argument(
178 |         '--extract',
179 |         '-e',
180 |         dest    = 'num_extracts',
181 |         metavar = 'N',
182 |         type    = int,
183 |         default = DEFAULT_EXTRACTED,
184 |         help    = 'number of extracts to add (default: {})'.format(DEFAULT_EXTRACTED)
185 |     )
186 |     concats_parser.add_argument(
187 |         '--extract-max',
188 |         '-m',
189 |         dest    = 'max_extract_index',
190 |         metavar = 'N',
191 |         type    = int,
192 |         default = DEFAULT_EXTRACT_INDEX,
193 |         help    = 'max index from which to extract (default: {})'.format(DEFAULT_EXTRACT_INDEX)
194 |     )
195 |     concats_parser.add_argument(
196 |         '--balanced',
197 |         '-b',
198 |         action  = 'store_true',
199 |         default = DEFAULT_BALANCED,
200 |         help    = 'flag for balanced tree (default: {!r})'.format(DEFAULT_BALANCED)
201 |     )
202 | 
203 |     # lengths fuzzer
204 |     lengths_parser = subparsers.add_parser(LENGTHS, help='instance with length constraints')
205 |     lengths_parser.add_argument(
206 |         '--num-vars',
207 |         '-v',
208 |         dest    = 'num_vars',
209 |         metavar = 'N',
210 |         type    = int,
211 |         default = DEFAULT_NUM_VARS,
212 |         help    = 'number of variables to create (default: {})'.format(DEFAULT_NUM_VARS)
213 |     )
214 |     lengths_parser.add_argument(
215 |         '--min-length',
216 |         '-n',
217 |         dest    = 'min_length',
218 |         metavar = 'N',
219 |         type    = int,
220 |         default = DEFAULT_MIN_LEN,
221 |         help    = 'lower bound on length (default: {})'.format(DEFAULT_MIN_LEN)
222 |     )
223 |     lengths_parser.add_argument(
224 |         '--max-length',
225 |         '-x',
226 |         dest    = 'max_length',
227 |         metavar = 'N',
228 |         type    = int,
229 |         default = DEFAULT_MAX_LEN,
230 |         help    = 'upper bound on length (default: {})'.format(DEFAULT_MAX_LEN)
231 |     )
232 |     lengths_parser.add_argument(
233 |         '--num-concats',
234 |         '-c',
235 |         dest    = 'num_concats',
236 |         metavar = 'N',
237 |         type    = int,
238 |         default = DEFAULT_NUM_CONCATS,
239 |         help    = 'number of (binary) concats to add (default: {})'.format(DEFAULT_NUM_CONCATS)
240 |     )
241 |     lengths_parser.add_argument(
242 |         '--random-relations',
243 |         '-r',
244 |         action  = 'store_true',
245 |         dest    = 'random_relations',
246 |         default = DEFAULT_RANDOM_RELATIONS,
247 |         help    = 'use constraints other than "=" (default: {!r})'.format(DEFAULT_RANDOM_RELATIONS)
248 |     )
249 | 
250 |     # overlaps fuzzer
251 |     overlaps_parser = subparsers.add_parser(OVERLAPS, help='instance with overlapping variables')
252 |     overlaps_parser.add_argument(
253 |         '--num-vars',
254 |         '-n',
255 |         dest    = 'num_vars',
256 |         metavar = 'N',
257 |         type    = int,
258 |         default = DEFAULT_NUM_OVERLAPPING,
259 |         help    = 'number of overlapping variables to generate (default: {})'.format(DEFAULT_NUM_OVERLAPPING)
260 |     )
261 |     overlaps_parser.add_argument(
262 |         '--length-of-consts',
263 |         '-c',
264 |         dest    = 'length_of_consts',
265 |         metavar = 'N',
266 |         type    = int,
267 |         default = DEFAULT_LENGTH_OF_CONSTS,
268 |         help    = 'the length of the constant terms (default: {})'.format(DEFAULT_LENGTH_OF_CONSTS)
269 |     )
270 | 
271 |     # equality fuzzer
272 |     equality_parser = subparsers.add_parser(EQUALITY, help='instance with concatenated expressions (of mixed constants and variables) all equal to each other')
273 |     equality_parser.add_argument(
274 |         '--num-exprs',
275 |         '-n',
276 |         dest    = 'num_expressions',
277 |         metavar = 'N',
278 |         type    = int,
279 |         default = DEFAULT_NUM_EQUAL_EXPRESSIONS,
280 |         help    = 'number of equal expressions to generate (default: {})'.format(DEFAULT_NUM_EQUAL_EXPRESSIONS)
281 |     )
282 |     equality_parser.add_argument(
283 |         '--num-terms',
284 |         '-t',
285 |         dest    = 'num_terms',
286 |         metavar = 'N',
287 |         type    = int,
288 |         default = DEFAULT_TERMS_PER_EXPRESSION,
289 |         help    = 'number terms in each expression (default: {})'.format(DEFAULT_TERMS_PER_EXPRESSION)
290 |     )
291 |     equality_parser.add_argument(
292 |         '--prefix-len',
293 |         '-p',
294 |         dest    = 'prefix_length',
295 |         metavar = 'N',
296 |         type    = int,
297 |         default = DEFAULT_PREFIX_LENGTH,
298 |         help    = 'length of constant prefix (default: {})'.format(DEFAULT_PREFIX_LENGTH)
299 |     )
300 |     equality_parser.add_argument(
301 |         '--suffix-len',
302 |         '-s',
303 |         dest    = 'suffix_length',
304 |         metavar = 'N',
305 |         type    = int,
306 |         default = DEFAULT_SUFFIX_LENGTH,
307 |         help    = 'length of constant suffix (default: {})'.format(DEFAULT_SUFFIX_LENGTH)
308 |     )
309 |     equality_parser.add_argument(
310 |         '--infix-len',
311 |         '-i',
312 |         dest    = 'infix_length',
313 |         metavar = 'N',
314 |         type    = int,
315 |         default = DEFAULT_INFIX_LENGTH,
316 |         help    = 'lengths of constant infixes (default: {})'.format(DEFAULT_INFIX_LENGTH)
317 |     )
318 |     equality_parser.add_argument(
319 |         '--infix-chance',
320 |         '-c',
321 |         dest    = 'infix_probability',
322 |         metavar = 'P',
323 |         type    = float,
324 |         default = DEFAULT_INFIX_PROBABILITY,
325 |         help    = 'probability of constant infixes replacing variables (default: {})'.format(DEFAULT_INFIX_PROBABILITY)
326 |     )
327 |     equality_parser.add_argument(
328 |         '--add-infixes',
329 |         '-m',
330 |         dest    = 'add_infixes',
331 |         action  = 'store_true',
332 |         default = DEFAULT_ADD_INFIXES,
333 |         help    = 'add constant infixes to expressions with 50%% chance (default: {})'.format(DEFAULT_ADD_INFIXES)
334 |     )
335 |     equality_parser.add_argument(
336 |         '--random-lengths',
337 |         '-r',
338 |         dest    = 'randomise_lengths',
339 |         action  = 'store_true',
340 |         default = DEFAULT_EQUAL_RANDOMISE_LENGTHS,
341 |         help    = 'treat all length settings as upper bounds only (default: {})'.format(DEFAULT_EQUAL_RANDOMISE_LENGTHS)
342 |     )
343 | 
344 |     # regex-pair fuzzer
345 |     regex_parser = subparsers.add_parser(REGEX, help='instance testing one variable for regex membership')
346 |     regex_parser.add_argument(
347 |         '--literal-type',
348 |         '-a',
349 |         dest    = 'literal_type',
350 |         type    = str,
351 |         choices = LITERAL_TYPES,
352 |         default = DEFAULT_LITERAL_TYPE,
353 |         help    = 'way to generate regex literals (default: {})'.format(DEFAULT_LITERAL_TYPE)
354 |     )
355 |     regex_parser.add_argument(
356 |         '--coincide',
357 |         '-c',
358 |         dest    = 'reset_alphabet',
359 |         action  = 'store_true',
360 |         default = DEFAULT_REGEX_COINCIDE,
361 |         help    = 'try to make the regexes share alphabets (default: {})'.format(DEFAULT_REGEX_COINCIDE)
362 |     )
363 |     regex_parser.add_argument(
364 |         '--membership-type',
365 |         '-i',
366 |         dest    = 'membership_type',
367 |         type    = str,
368 |         choices = MEMBERSHIP_TYPES,
369 |         default = DEFAULT_MEMBERSHIP_TYPE,
370 |         help    = 'way to test regex membership (default: {})'.format(DEFAULT_MEMBERSHIP_TYPE)
371 |     )
372 |     regex_parser.add_argument(
373 |         '--num-terms',
374 |         '-t',
375 |         dest    = 'num_terms',
376 |         metavar = 'N',
377 |         type    = int,
378 |         default = DEFAULT_NUM_REGEX_TERMS,
379 |         help    = 'number of terms in each regex (default: {})'.format(DEFAULT_NUM_REGEX_TERMS)
380 |     )
381 |     regex_parser.add_argument(
382 |         '--lit-min',
383 |         '-m',
384 |         dest    = 'literal_min',
385 |         metavar = 'N',
386 |         type    = int,
387 |         default = DEFAULT_REGEX_LIT_MIN,
388 |         help    = 'min length of regex terms (default: {})'.format(DEFAULT_REGEX_LIT_MIN)
389 |     )
390 |     regex_parser.add_argument(
391 |         '--lit-max',
392 |         '-x',
393 |         dest    = 'literal_max',
394 |         metavar = 'N',
395 |         type    = int,
396 |         default = DEFAULT_REGEX_LIT_MAX,
397 |         help    = 'max length of regex terms (default: {})'.format(DEFAULT_REGEX_LIT_MAX)
398 |     )
399 |     regex_parser.add_argument(
400 |         '--depth',
401 |         '-d',
402 |         dest    = 'term_depth',
403 |         metavar = 'D',
404 |         type    = int,
405 |         default = DEFAULT_TERM_DEPTH,
406 |         help    = 'depth of terms (default: {})'.format(DEFAULT_TERM_DEPTH)
407 |     )
408 |     regex_parser.add_argument(
409 |         '--num-regexes',
410 |         '-r',
411 |         dest    = 'num_regexes',
412 |         metavar = 'N',
413 |         type    = int,
414 |         default = DEFAULT_NUM_REGEXES,
415 |         help    = 'number of regexes to test (default: {})'.format(DEFAULT_NUM_REGEXES)
416 |     )
417 |     regex_parser.add_argument(
418 |         '--var-min',
419 |         '-M',
420 |         dest    = 'min_var_length',
421 |         metavar = 'N',
422 |         type    = int,
423 |         default = DEFAULT_REGEX_VAR_MIN,
424 |         help    = 'min length of the variable (default: {})'.format(DEFAULT_REGEX_VAR_MIN)
425 |     )
426 |     regex_parser.add_argument(
427 |         '--var-max',
428 |         '-X',
429 |         dest    = 'max_var_length',
430 |         metavar = 'N',
431 |         type    = int,
432 |         default = DEFAULT_REGEX_VAR_MAX,
433 |         help    = 'max length of the variable (default: {})'.format(DEFAULT_REGEX_VAR_MAX)
434 |     )
435 |     regex_parser.add_argument(
436 |         '--operators',
437 |         '-o',
438 |         dest    = 'operators',
439 |         metavar = 'OPS',
440 |         type    = str,
441 |         default = DEFAULT_OPERATORS,
442 |         help    = 'operators to choose from for deep terms. OPS is of the form [{}]+ representing the operators star ({}), plus ({}), union ({}), intersection ({}), concatenation ({}) (default: {})'.format(''.join(OPERATOR_LIST), OPERATOR_STAR, OPERATOR_PLUS, OPERATOR_UNION, OPERATOR_INTER, OPERATOR_CONCAT, DEFAULT_OPERATORS)
443 |     )
444 |     regex_parser.add_argument(
445 |         '--operator-type',
446 |         '-O',
447 |         dest    = 'operator_type',
448 |         type    = str,
449 |         choices = OPERATOR_TYPES,
450 |         default = DEFAULT_OPERATOR_TYPE,
451 |         help    = 'way to choose operator for deep terms (default: {})'.format(DEFAULT_OPERATOR_TYPE)
452 |     )
453 | 
454 |     # random text fuzzer
455 |     random_parser = subparsers.add_parser(RANDOM_TEXT, help='totally random text')
456 |     random_parser.add_argument(
457 |         '--length',
458 |         '-l',
459 |         dest    = 'length',
460 |         metavar = 'L',
461 |         type    = int,
462 |         default = DEFAULT_LENGTH,
463 |         help    = 'length of the text (default: {})'.format(DEFAULT_LENGTH)
464 |     )
465 | 
466 |     # random_ast fuzzer
467 |     random_ast_parser = subparsers.add_parser(RANDOM_AST, help='random but syntactically valid problem')
468 |     random_ast_parser.add_argument(
469 |         '--num-vars',
470 |         '-v',
471 |         dest    = 'num_vars',
472 |         metavar = 'N',
473 |         type    = int,
474 |         default = DEFAULT_NUM_RANDOM_VARS,
475 |         help    = 'number of variables (of each sort) in the problem (default: {})'.format(DEFAULT_NUM_RANDOM_VARS)
476 |     )
477 |     random_ast_parser.add_argument(
478 |         '--num-asserts',
479 |         '-n',
480 |         dest    = 'num_asserts',
481 |         metavar = 'N',
482 |         type    = int,
483 |         default = DEFAULT_NUM_RANDOM_ASSERTS,
484 |         help    = 'number of asserts in the problem (default: {})'.format(DEFAULT_NUM_RANDOM_ASSERTS)
485 |     )
486 |     random_ast_parser.add_argument(
487 |         '--depth',
488 |         '-d',
489 |         dest    = 'depth',
490 |         metavar = 'D',
491 |         type    = int,
492 |         default = DEFAULT_RANDOM_DEPTH,
493 |         help    = 'depth of nested expressions (default: {})'.format(DEFAULT_RANDOM_DEPTH)
494 |     )
495 |     random_ast_parser.add_argument(
496 |         '--max-terms',
497 |         '-t',
498 |         dest    = 'max_terms',
499 |         metavar = 'N',
500 |         type    = int,
501 |         default = DEFAULT_MAX_RANDOM_TERMS,
502 |         help    = 'maximum number of terms for n-ary expressions, like concats (default: {})'.format(DEFAULT_MAX_RANDOM_TERMS)
503 |     )
504 |     random_ast_parser.add_argument(
505 |         '--max-string',
506 |         '-l',
507 |         dest    = 'max_str_lit_length',
508 |         metavar = 'N',
509 |         type    = int,
510 |         default = DEFAULT_MAX_RANDOM_STRINGS,
511 |         help    = 'maximum length of string literals (default: {})'.format(DEFAULT_MAX_RANDOM_STRINGS)
512 |     )
513 |     random_ast_parser.add_argument(
514 |         '--max-int',
515 |         '-x',
516 |         dest    = 'max_int_lit',
517 |         metavar = 'N',
518 |         type    = int,
519 |         default = DEFAULT_MAX_RANDOM_NUMBERS,
520 |         help    = 'maximum size of int literals (default: {})'.format(DEFAULT_MAX_RANDOM_NUMBERS)
521 |     )
522 |     random_ast_parser.add_argument(
523 |         '--meaningful',
524 |         '-m',
525 |         dest    = 'semantically_valid',
526 |         action  = 'store_true',
527 |         default = DEFAULT_SEMANTICALLY_VALID,
528 |         help    = 'generate semantically valid problems (default: {})'.format(DEFAULT_SEMANTICALLY_VALID)
529 |     )
530 |     random_ast_parser.add_argument(
531 |         '--literal-chance',
532 |         '-p',
533 |         dest    = 'literal_probability',
534 |         metavar = 'P',
535 |         type    = float,
536 |         default = DEFAULT_LITERAL_PROBABILITY,
537 |         help    = 'probability of creating literals instead of variables (default: {})'.format(DEFAULT_LITERAL_PROBABILITY)
538 |     )
539 | 
540 |     # parse args
541 |     args = global_parser.parse_args()
542 | 
543 |     # get the generator function based on args
544 |     generator_name = args.generator
545 |     generator      = GENERATORS[generator_name]
546 | 
547 |     # seed the RNG
548 |     if args.random is True:
549 |         random.seed()
550 |     else:
551 |         random.seed(args.seed)
552 | 
553 |     # get some flags that will get popped from args before they're used
554 |     produce_models = args.produce_models
555 |     language       = args.language
556 | 
557 |     # get args as a dict
558 |     # NOTE:
559 |     #      argparse's Namespace object (which 'args' is) returns itself as a
560 |     #      dict when vars() is called on it
561 |     generator_args = vars(args)
562 | 
563 |     # pop arguments that are specific to this script because
564 |     # they shouldn't be passed on to the generator
565 |     generator_args.pop('language')
566 |     generator_args.pop('produce_models')
567 |     generator_args.pop('generator')
568 |     generator_args.pop('seed')
569 |     generator_args.pop('random')
570 | 
571 |     # run the generator with the args
572 |     generated = generator(**generator_args)
573 | 
574 |     # prepend the logic setting
575 |     generated = [smt_string_logic()] + generated
576 | 
577 |     # the random text generator produces raw text
578 |     if (generator == random_text):
579 |         print(generated)
580 | 
581 |     # other generators produce ASTs
582 |     else:
583 | 
584 |         # add the model-getting node if needed
585 |         if produce_models is True:
586 |             generated.append(smt_get_model())
587 | 
588 |         print(generate(generated, language))
589 | 
590 | if __name__ == '__main__':
591 |     main()
592 | 


--------------------------------------------------------------------------------
/bin/stringfuzzx:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | '''
  4 | The fuzzer tool that transforms existing problems.
  5 | '''
  6 | 
  7 | import sys
  8 | import argparse
  9 | import random
 10 | 
 11 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING
 12 | from stringfuzz.transformers import unprintable, nop, rotate, fuzz, graft, translate, reverse, multiply
 13 | from stringfuzz.generator import generate
 14 | from stringfuzz.parser import parse, ParsingError
 15 | from stringfuzz.ast import SettingNode, ExpressionNode, MetaCommandNode
 16 | 
 17 | # constants
 18 | UNPRINTABLE = 'unprintable'
 19 | NOP         = 'nop'
 20 | ROTATE      = 'rotate'
 21 | FUZZ        = 'fuzz'
 22 | GRAFT       = 'graft'
 23 | TRANSLATE   = 'translate'
 24 | REVERSE     = 'reverse'
 25 | MULTIPLY    = 'multiply'
 26 | 
 27 | TRANSFORMERS = {
 28 |     UNPRINTABLE: unprintable,
 29 |     NOP:         nop,
 30 |     ROTATE:      rotate,
 31 |     FUZZ:        fuzz,
 32 |     GRAFT:       graft,
 33 |     TRANSLATE:   translate,
 34 |     REVERSE:     reverse,
 35 |     MULTIPLY:    multiply
 36 | }
 37 | 
 38 | # defaults
 39 | DEFAULT_SEED           = 0
 40 | DEFAULT_RANDOM         = False
 41 | DEFAULT_FACTOR         = 2
 42 | DEFAULT_INTEGER_FLAG   = False
 43 | DEFAULT_SKIP_RE_RANGE  = True
 44 | DEFAULT_SKIP_STR_TO_RE = True
 45 | 
 46 | GET_MODEL = "get-model"
 47 | GET_INFO  = "get-info"
 48 | TO_STRIP  = [GET_MODEL, GET_INFO]
 49 | 
 50 | def should_keep(expr):
 51 |     if isinstance(expr, SettingNode):
 52 |         return False
 53 |     if isinstance(expr, MetaCommandNode):
 54 |         return False
 55 |     if isinstance(expr, ExpressionNode):
 56 |         if expr.symbol in TO_STRIP:
 57 |             return False
 58 |     return True
 59 | 
 60 | def main():
 61 | 
 62 |     # create arg parser
 63 |     global_parser = argparse.ArgumentParser(description='SMTLIB 2.* problem transformer.')
 64 |     global_parser.add_argument(
 65 |         '--file',
 66 |         '-f',
 67 |         dest    = 'input_file',
 68 |         metavar = 'F',
 69 |         default = sys.stdin,
 70 |         type    = argparse.FileType('r'),
 71 |         help    = 'input file (default: stdin)'
 72 |     )
 73 |     global_parser.add_argument(
 74 |         '--in-lang',
 75 |         '-i',
 76 |         dest    = 'input_language',
 77 |         type    = str,
 78 |         choices = LANGUAGES,
 79 |         default = SMT_25_STRING,
 80 |         help    = 'input language (default: {})'.format(SMT_25_STRING)
 81 |     )
 82 |     global_parser.add_argument(
 83 |         '--out-lang',
 84 |         '-o',
 85 |         dest    = 'output_language',
 86 |         type    = str,
 87 |         choices = LANGUAGES,
 88 |         default = SMT_25_STRING,
 89 |         help    = 'output language (default: {})'.format(SMT_25_STRING)
 90 |     )
 91 | 
 92 |     seed_group = global_parser.add_mutually_exclusive_group()
 93 |     seed_group.add_argument(
 94 |         '--seed',
 95 |         '-s',
 96 |         dest    = 'seed',
 97 |         metavar = 'S',
 98 |         type    = int,
 99 |         default = DEFAULT_SEED,
100 |         help    = 'seed for random number generator (default: {})'.format(DEFAULT_SEED)
101 |     )
102 |     seed_group.add_argument(
103 |         '--random',
104 |         '-r',
105 |         dest    = 'random',
106 |         action  = 'store_true',
107 |         default = DEFAULT_RANDOM,
108 |         help    = 'seed the random number generator with the current time (default: {})'.format(DEFAULT_RANDOM)
109 |     )
110 | 
111 |     # get subparsers
112 |     subparsers = global_parser.add_subparsers(dest='transformer', help='transformer choice')
113 |     subparsers.required = True
114 | 
115 |     # fuzz transformer
116 |     fuzz_parser = subparsers.add_parser(FUZZ, help='fuzz transformer')
117 |     fuzz_parser.add_argument(
118 |         '--re-range',
119 |         dest    = 'skip_re_range',
120 |         action  = 'store_false',
121 |         default = DEFAULT_SKIP_RE_RANGE,
122 |         help    = 'Include re_range nodes in multiplication (default: {})'.format(DEFAULT_SKIP_RE_RANGE)
123 |     )
124 |     # graft transformer
125 |     graft_parser = subparsers.add_parser(GRAFT, help='graft transformer')
126 |     graft_parser.add_argument(
127 |         '--str-to-re',
128 |         dest    = 'skip_str_to_re',
129 |         action  = 'store_false',
130 |         default = DEFAULT_SKIP_STR_TO_RE,
131 |         help    = 'Include str_to_re nodes in grafting (default: {})'.format(DEFAULT_SKIP_STR_TO_RE)
132 |     )
133 | 
134 |     # multiply transformer
135 |     multiply_parser = subparsers.add_parser(MULTIPLY, help='multiply transformer')
136 |     multiply_parser.add_argument(
137 |         '--factor',
138 |         dest    = 'factor',
139 |         metavar = 'N',
140 |         type    = int,
141 |         default = DEFAULT_FACTOR,
142 |         help    = 'number to multiply literals by (default: {})'.format(DEFAULT_FACTOR)
143 |     )
144 |     multiply_parser.add_argument(
145 |         '--re-range',
146 |         dest    = 'skip_re_range',
147 |         action  = 'store_false',
148 |         default = DEFAULT_SKIP_RE_RANGE,
149 |         help    = 'Include re_range nodes in multiplication (default: {})'.format(DEFAULT_SKIP_RE_RANGE)
150 |     )
151 | 
152 |     # nop transformer
153 |     nop_parser = subparsers.add_parser(NOP, help='nop transformer')
154 | 
155 |     # reverse transformer
156 |     reverse_parser = subparsers.add_parser(REVERSE, help='reverse transformer')
157 | 
158 |     # rotate transformer
159 |     rotate_parser = subparsers.add_parser(ROTATE, help='rotate transformer')
160 | 
161 |     # translate transformer
162 |     translate_parser = subparsers.add_parser(TRANSLATE, help='translate transformer')
163 |     translate_parser.add_argument(
164 |         '--integers',
165 |         dest    = 'integer_flag',
166 |         action  = 'store_true',
167 |         default = DEFAULT_INTEGER_FLAG,
168 |         help    = 'Include integers in translation (default: {})'.format(DEFAULT_INTEGER_FLAG)
169 |     )
170 |     translate_parser.add_argument(
171 |         '--re-range',
172 |         dest    = 'skip_re_range',
173 |         action  = 'store_false',
174 |         default = DEFAULT_SKIP_RE_RANGE,
175 |         help    = 'Include re_range nodes in translation (default: {})'.format(DEFAULT_SKIP_RE_RANGE)
176 |     )
177 | 
178 |     # unprintable transformer
179 |     unprintable_parser = subparsers.add_parser(UNPRINTABLE, help='unprintable transformer')
180 | 
181 |     # parse args
182 |     args = global_parser.parse_args()
183 | 
184 |     # get the transformer function based on args
185 |     transformer_name = args.transformer
186 |     transformer      = TRANSFORMERS[transformer_name]
187 | 
188 |     # get some flags that will get popped from args before they're used
189 |     input_file      = args.input_file
190 |     input_language  = args.input_language
191 |     output_language = args.output_language
192 | 
193 |     # seed the RNG
194 |     if args.random is True:
195 |         random.seed()
196 |     else:
197 |         random.seed(args.seed)
198 | 
199 |     # read input
200 |     raw_in = args.input_file.read()
201 | 
202 |     # parse input
203 |     try:
204 |         ast = parse(raw_in, input_language)
205 |     except ParsingError as e:
206 |         print(e, file=sys.stderr)
207 |         return 1
208 | 
209 |     # the nop transformer should not modify anything
210 |     if transformer != nop:
211 | 
212 |         # filter out suppressed expressions
213 |         ast = list(filter(should_keep, ast))
214 | 
215 |     # get args as a dict
216 |     transformer_args = vars(args)
217 | 
218 |     # pop arguments that are specific to this script because
219 |     # they shouldn't be passed on to the transformer
220 |     transformer_args.pop('input_file')
221 |     transformer_args.pop('input_language')
222 |     transformer_args.pop('output_language')
223 |     transformer_args.pop('seed')
224 |     transformer_args.pop('random')
225 |     transformer_args.pop('transformer')
226 | 
227 |     # run the transformer with the args
228 |     transformed = transformer(ast, **transformer_args)
229 | 
230 |     # transformers produce ASTs
231 |     print(generate(transformed, output_language))
232 | 
233 | if __name__ == '__main__':
234 |     main()
235 | 


--------------------------------------------------------------------------------
/bin/stringmerge:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | '''
  4 | Merge two existing problems.
  5 | '''
  6 | 
  7 | import sys
  8 | import argparse
  9 | import random
 10 | 
 11 | from stringfuzz.constants import LANGUAGES, SMT_25_STRING
 12 | from stringfuzz.generator import generate
 13 | from stringfuzz.parser import parse, ParsingError
 14 | from stringfuzz.smt import smt_string_logic, smt_check_sat
 15 | from stringfuzz.ast import SettingNode, ExpressionNode, MetaCommandNode, GenericExpressionNode
 16 | from stringfuzz.mergers import simple
 17 | 
 18 | #constants
 19 | SIMPLE = 'simple'
 20 | 
 21 | MERGERS = {
 22 |     SIMPLE: simple
 23 | }
 24 | 
 25 | # defaults
 26 | DEFAULT_RENAME_IDS = False
 27 | DEFAULT_SEED       = 0
 28 | DEFAULT_RANDOM     = False
 29 | 
 30 | GET_MODEL     = "get-model"
 31 | GET_INFO      = "get-info"
 32 | EXPR_TO_STRIP = [GET_MODEL, GET_INFO]
 33 | CHECK_SAT     = "check-sat"
 34 | GEN_TO_STRIP  = [CHECK_SAT]
 35 | 
 36 | def should_keep(node):
 37 |     if isinstance(node, SettingNode):
 38 |         return False
 39 |     if isinstance(node, MetaCommandNode):
 40 |         return False
 41 |     if isinstance(node, ExpressionNode):
 42 |         if node.symbol in EXPR_TO_STRIP:
 43 |             return False
 44 |     if isinstance(node, GenericExpressionNode):
 45 |         if node.symbol.name in GEN_TO_STRIP:
 46 |             return False
 47 |     return True
 48 | 
 49 | # entry point
 50 | def main():
 51 | 
 52 |     # create arg parser
 53 |     global_parser = argparse.ArgumentParser(description='SMTLIB 2.* problem merger.')
 54 |     global_parser.add_argument(
 55 |         'files',
 56 |         nargs    = '+',
 57 |         metavar  = 'F',
 58 |         type     = argparse.FileType('r'),
 59 |         help     = 'input files'
 60 |     )
 61 |     global_parser.add_argument(
 62 |         '--in-lang',
 63 |         '-i',
 64 |         dest    = 'input_language',
 65 |         type    = str,
 66 |         choices = LANGUAGES,
 67 |         default = SMT_25_STRING,
 68 |         help    = 'input language (default: {})'.format(SMT_25_STRING)
 69 |     )
 70 |     global_parser.add_argument(
 71 |         '--out-lang',
 72 |         '-o',
 73 |         dest    = 'output_language',
 74 |         type    = str,
 75 |         choices = LANGUAGES,
 76 |         default = SMT_25_STRING,
 77 |         help    = 'output language (default: {})'.format(SMT_25_STRING)
 78 |     )
 79 |     seed_group = global_parser.add_mutually_exclusive_group()
 80 |     seed_group.add_argument(
 81 |         '--seed',
 82 |         '-s',
 83 |         dest    = 'seed',
 84 |         metavar = 'S',
 85 |         type    = int,
 86 |         default = DEFAULT_SEED,
 87 |         help    = 'seed for random number generator (default: {})'.format(DEFAULT_SEED)
 88 |     )
 89 |     seed_group.add_argument(
 90 |         '--random',
 91 |         '-r',
 92 |         dest    = 'random',
 93 |         action  = 'store_true',
 94 |         default = DEFAULT_RANDOM,
 95 |         help    = 'seed the random number generator with the current time (default: {})'.format(DEFAULT_RANDOM)
 96 |     )
 97 | 
 98 |     # get subparsers
 99 |     subparsers = global_parser.add_subparsers(dest='merger', help='merger choice')
100 |     subparsers.required = True
101 | 
102 |     # simple transformer
103 |     simple_parser = subparsers.add_parser(SIMPLE, help='simple transformer')
104 |     simple_parser.add_argument(
105 |         '--rename',
106 |         dest    = 'rename_ids',
107 |         action  = 'store_true',
108 |         default = DEFAULT_RENAME_IDS,
109 |         help    = 'Rename identifiers to avoid conflicts (default: {})'.format(DEFAULT_RENAME_IDS)
110 |     )
111 | 
112 |     # parse args
113 |     args = global_parser.parse_args()
114 | 
115 |     # get the merger function based on args
116 |     merger_name = args.merger
117 |     merger      = MERGERS[merger_name]
118 | 
119 |     # get general args
120 |     files           = args.files
121 |     input_language  = args.input_language
122 |     output_language = args.output_language
123 | 
124 |     # seed the RNG
125 |     if args.random is True:
126 |         random.seed()
127 |     else:
128 |         random.seed(args.seed)
129 | 
130 |     # get args as a dict and pop general args
131 |     merge_args = vars(args)
132 |     merge_args.pop('files')
133 |     merge_args.pop('input_language')
134 |     merge_args.pop('output_language')
135 |     merge_args.pop('seed')
136 |     merge_args.pop('random')
137 |     merge_args.pop('merger')
138 | 
139 |     # read input
140 |     raw_in = [f.read() for f in files]
141 | 
142 |     # parse input
143 |     try:
144 |         asts = [parse(raw, input_language) for raw in raw_in]
145 |     except ParsingError as e:
146 |         print(e, file=sys.stderr)
147 |         return 1
148 | 
149 |     # filter out suppressed expressions
150 |     asts = [list(filter(should_keep, ast)) for ast in asts]
151 |     # merge the two ASTs into a new AST
152 |     merged = merger(asts, **merge_args)
153 |     # add back the logic and get-sat
154 |     merged = [smt_string_logic()] + merged + [smt_check_sat()]
155 | 
156 |     # transformers produce ASTs
157 |     print(generate(merged, output_language))    
158 | 
159 | if __name__ == '__main__':
160 |     main()
161 | 


--------------------------------------------------------------------------------
/bin/stringstats:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | '''
 4 | Prints stats about problems.
 5 | '''
 6 | 
 7 | import sys
 8 | import argparse
 9 | 
10 | from stringfuzz.constants import LANGUAGES, SMT_20_STRING, SMT_25_STRING
11 | from stringfuzz.parser import parse
12 | from stringfuzz.analyser import analyse
13 | from stringfuzz.ast import StringLitNode, ConcatNode
14 | 
15 | def main():
16 | 
17 |     # create arg parser
18 |     parser = argparse.ArgumentParser(description='Analyse an SMT 2.* file.')
19 |     parser.add_argument(
20 |         'file',
21 |         nargs   = '?',
22 |         default = sys.stdin,
23 |         type    = argparse.FileType('r'),
24 |         help    = 'input file (default: stdin)'
25 |     )
26 |     parser.add_argument(
27 |         '--language',
28 |         '-l',
29 |         dest    = 'language',
30 |         type    = str,
31 |         choices = LANGUAGES,
32 |         default = SMT_25_STRING,
33 |         help    = 'input language (default: {})'.format(SMT_25_STRING)
34 |     )
35 | 
36 |     # parse args
37 |     args = parser.parse_args()
38 | 
39 |     # parse input
40 |     try:
41 |         expressions = parse(args.file.read(), args.language)
42 | 
43 |     # handle errors
44 |     except IndexError as e:
45 |         print(e, file=sys.stderr)
46 |         return 1
47 | 
48 |     # get stats
49 |     points, variables, literals = analyse(expressions)
50 |     str_literals = [l for l in literals if isinstance(l, StringLitNode)]
51 |     concat_points = [p for p in points if isinstance(p.expression, ConcatNode)]
52 | 
53 |     # compute stats
54 |     if len(str_literals) > 1:
55 |         avg_literal_length = sum(map(len, str_literals)) / len(str_literals)
56 |     else:
57 |         avg_literal_length = 0
58 | 
59 |     if len(points) > 1:
60 |         max_depth = max(p.depth for p in points)
61 |     else:
62 |         max_depth = 0
63 | 
64 |     if len(concat_points) > 1:
65 |         max_nesting = max(p.nesting for p in points if isinstance(p.expression, ConcatNode))
66 |     else:
67 |         max_nesting = 0
68 | 
69 |     # print stats
70 |     print('stats')
71 |     print('=========')
72 |     print('num. of expressions:      ', len(points))
73 |     print('num. of variables:        ', len(variables))
74 |     print('num. of literals:         ', len(literals))
75 |     print('num. of string literals:  ', len(str_literals))
76 |     print('avg. length of literals:  ', '{:.4f}'.format(avg_literal_length))
77 |     print('max expression depth:     ', max_depth)
78 |     print('max concat nesting level: ', max_nesting)
79 | 
80 |     return 0
81 | 
82 | if __name__ == '__main__':
83 |     main()
84 | 


--------------------------------------------------------------------------------
/bin/tryparse:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | '''
  4 | Try parsing all files in given problem lists, and print out the ones that failed.
  5 | '''
  6 | 
  7 | import os
  8 | import sys
  9 | import multiprocessing as mp
 10 | import time
 11 | import datetime
 12 | import traceback
 13 | import queue
 14 | import ctypes
 15 | 
 16 | from collections import deque
 17 | 
 18 | from stringfuzz.constants import SMT_20_STRING, SMT_25_STRING, LANGUAGES
 19 | from stringfuzz.scanner import scan, ScanningError
 20 | from stringfuzz.parser import parse_file, parse_tokens, ParsingError
 21 | from stringfuzz.generator import generate, NotSupported
 22 | 
 23 | # constants
 24 | DEFAULT_NUM_WORKERS = 8
 25 | BATCH_SIZE          = 50
 26 | MAX_HISTORY         = 500
 27 | ESC                 = '\033'
 28 | BACK_ONE_LINE       = ESC + '[1A'
 29 | ERASE_LINE          = ESC + '[2K'
 30 | FRAME_DURATION      = 0.05 # in seconds
 31 | POISON_PILL         = None
 32 | 
 33 | SMT_25_PATTERNS = [
 34 |     'dumpCVC4',
 35 |     'smt25',
 36 |     '/cvc4/',
 37 |     'kaluza25',
 38 | ]
 39 | 
 40 | # globals
 41 | io_lock = mp.Lock()
 42 | 
 43 | # helpers
 44 | def reset_cursor():
 45 |     return BACK_ONE_LINE + ERASE_LINE
 46 | 
 47 | def now():
 48 |     return datetime.datetime.now()
 49 | 
 50 | def sec2minsec(seconds):
 51 |     return (seconds // 60, seconds % 60)
 52 | 
 53 | def show_failure(message):
 54 |     with io_lock:
 55 |         print(reset_cursor() + message + '\n')
 56 | 
 57 | def show_progress(*args):
 58 |     with io_lock:
 59 |         print(*args, file=sys.stderr)
 60 | 
 61 | def is_smt25(file_path):
 62 |     return any(pattern in file_path for pattern in SMT_25_PATTERNS)
 63 | 
 64 | # functions
 65 | def consumer(q, num_done, i, crash):
 66 | 
 67 |     # run forever
 68 |     while True:
 69 | 
 70 |         # try to get a task
 71 |         batch = q.get()
 72 | 
 73 |         # stop running if got poison pill
 74 |         if batch is POISON_PILL:
 75 |             break
 76 | 
 77 |         # go through problem batch
 78 |         for problem_path in batch:
 79 | 
 80 |             try:
 81 |                 parse_problem(problem_path)
 82 | 
 83 |             # on uncaught exceptions
 84 |             except Exception as e:
 85 | 
 86 |                 # print stack trace
 87 |                 with io_lock:
 88 |                     traceback.print_exc()
 89 | 
 90 |                 # signal crash to parent process
 91 |                 with crash.get_lock():
 92 |                     crash.value = 1
 93 | 
 94 |             # signal completion
 95 |             with num_done.get_lock():
 96 |                 num_done.value += 1
 97 | 
 98 | def parse_problem(input_path):
 99 | 
100 |     # get start time
101 |     start_time = now()
102 | 
103 |     # figure out input language
104 |     if is_smt25(input_path):
105 |         language = SMT_25_STRING
106 |     else:
107 |         language = SMT_20_STRING
108 | 
109 |     # read in file
110 |     with open(input_path, 'r') as file:
111 |         text = file.read()
112 | 
113 |     # try to scan
114 |     try:
115 |         tokens = scan(text, language)
116 |     except ScanningError as e:
117 |         show_failure('{language:<5} failed scanning {problem}\n{error}'.format(
118 |             path     = input_path,
119 |             language = language,
120 |             error    = e
121 |         ))
122 | 
123 |     # if scanned, try to parse
124 |     else:
125 |         try:
126 |             expressions = parse_tokens(tokens, language, text)
127 |         except ParsingError as e:
128 |             show_failure('{language:<5} failed parsing {path}\n{error}'.format(
129 |                 path     = input_path,
130 |                 language = language,
131 |                 error    = e
132 |             ))
133 | 
134 |         # if parsed, try to generate
135 |         else:
136 |             for output_language in [SMT_25_STRING, SMT_20_STRING]:
137 |                 try:
138 |                     translated = generate(expressions, output_language)
139 |                 except NotSupported as e:
140 |                     show_failure('{language:<5} failed generating {path}\n{error}'.format(
141 |                         path     = input_path,
142 |                         language = language,
143 |                         error    = e
144 |                     ))
145 | 
146 |     # measure run time
147 |     run_time = now() - start_time
148 | 
149 | def usage():
150 |     print('Usage', sys.argv[0], 'problem_list [problem_list [...]]', file=sys.stderr)
151 | 
152 | def add_record(history, last_sample, current_sample):
153 | 
154 |     # create new record
155 |     new_record = current_sample - last_sample
156 | 
157 |     # discard last record if needed
158 |     if len(history) >= MAX_HISTORY:
159 |         history.popleft()
160 | 
161 |     history.append(new_record)
162 | 
163 | def print_status(history, done_so_far, num_problems):
164 | 
165 |     # get rate
166 |     history_size     = len(history)
167 |     history_problems = sum(history)
168 |     history_time     = history_size * FRAME_DURATION
169 | 
170 |     if history_problems > 0:
171 |         sec_per_problem = history_time / history_problems
172 |     else:
173 |         sec_per_problem = FRAME_DURATION
174 | 
175 |     # calculate progress
176 |     num_left           = num_problems - done_so_far
177 |     percent_done       = (float(done_so_far) / float(num_problems)) * 100.0
178 |     time_left          = int(float(sec_per_problem) * float(num_left))
179 |     min_left, sec_left = sec2minsec(time_left)
180 | 
181 |     # format progress
182 |     seconds_progress = '{:.0f}s'.format(sec_left)
183 |     minutes_progress = '{:.0f}m'.format(min_left)
184 |     time_progress    = seconds_progress
185 | 
186 |     if min_left > 0:
187 |         time_progress = minutes_progress + ' ' + time_progress
188 | 
189 |     progress = '{} / {} ({:.2f}%) done; {} left ({:.6f} s per)'.format(
190 |         done_so_far,
191 |         num_problems,
192 |         percent_done,
193 |         time_progress,
194 |         sec_per_problem
195 |     )
196 | 
197 |     # show progress
198 |     show_progress(reset_cursor() + progress)
199 | 
200 | def main():
201 | 
202 |     # record start time
203 |     start_time = now()
204 | 
205 |     # get args
206 |     list_paths = sys.argv[1:]
207 | 
208 |     # check args
209 |     if len(list_paths) < 1:
210 |         usage()
211 |         exit(1)
212 | 
213 |     # read input lists
214 |     problems = []
215 |     for list_path in list_paths:
216 |         with open(list_path, 'r') as list_file:
217 |             problems += [line.strip() for line in list_file.readlines()]
218 |     num_problems = len(problems)
219 | 
220 |     # create shared values
221 |     num_done = mp.Value(ctypes.c_ulong, 0)
222 |     crash    = mp.Value(ctypes.c_bool, 0)
223 |     q        = mp.Queue()
224 | 
225 |     # populate queue
226 |     for i in range(0, num_problems, BATCH_SIZE):
227 |         batch = problems[i:i + BATCH_SIZE]
228 |         q.put_nowait(batch)
229 | 
230 |     # calculate number of workers
231 |     num_workers = os.cpu_count()
232 |     if num_workers is None:
233 |         num_workers = DEFAULT_NUM_WORKERS
234 | 
235 |     # add poison pills to the end of the queue
236 |     for i in range(0, num_workers):
237 |         q.put_nowait(POISON_PILL)
238 | 
239 |     # create workers
240 |     workers = [mp.Process(target=consumer, args=(q, num_done, i, crash)) for i in range(num_workers)]
241 | 
242 |     # start workers
243 |     for worker in workers:
244 |         worker.start()
245 | 
246 |     # print newline to start update line
247 |     show_progress('')
248 | 
249 |     # set up bookkeeping
250 |     last_sample = num_done.value
251 |     history     = deque()
252 | 
253 |     # run until done
254 |     while num_done.value < num_problems:
255 | 
256 |         # sample number of done problems
257 |         current_sample = num_done.value
258 |         add_record(history, last_sample, current_sample)
259 | 
260 |         # print status
261 |         print_status(history, current_sample, num_problems)
262 | 
263 |         # update bookkeeping
264 |         last_sample = current_sample
265 | 
266 |         # sleep for a frame
267 |         time.sleep(FRAME_DURATION)
268 | 
269 |         # check for crash condition
270 |         if crash.value != 0:
271 | 
272 |             # terminate workers
273 |             for worker in workers:
274 |                 worker.terminate()
275 | 
276 |             exit(1)
277 | 
278 |     # wait for the workers to finish
279 |     for worker in workers:
280 |         worker.join()
281 | 
282 |     # print final results
283 |     end_time             = now()
284 |     run_time             = end_time - start_time
285 |     min_total, sec_total = sec2minsec(run_time.seconds)
286 |     sec_per_problem      = run_time.seconds / num_done.value
287 |     show_progress('finished in {}m {}s, {:.4f}s per run'.format(min_total, sec_total, sec_per_problem))
288 | 
289 | if __name__ == '__main__':
290 |     main()
291 | 


--------------------------------------------------------------------------------
/bin/unprintable:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | '''
 4 | Obsolete 'unprintable' transformer. Kept for reference.
 5 | '''
 6 | 
 7 | import sys
 8 | import re
 9 | import random
10 | 
11 | # constants
12 | EXCLUDED    = map(ord, '\n\t\x00')
13 | UNPRINTABLE = [i for i in range(32) if i not in EXCLUDED]
14 | 
15 | LITERAL_PATTERN = r'"((?:[^"]|"")*)"'
16 | 
17 | # globals
18 | unprintable_chars = None
19 | 
20 | # functions
21 | def _gen_unprintable():
22 |     while True:
23 |         yield random.choice(UNPRINTABLE)
24 | 
25 | def get_char():
26 |     global unprintable_chars
27 |     char = next(unprintable_chars)
28 |     return '\\x{:0>2x}'.format(char)
29 | 
30 | def get_string(n):
31 |     return ''.join(get_char() for i in range(n))
32 | 
33 | def replace_unprintable(match):
34 |     string = match.group(1)
35 |     replacement = '"' + get_string(len(string)) + '"'
36 |     return replacement
37 | 
38 | def main():
39 | 
40 |     global unprintable_chars
41 | 
42 |     # create generators
43 |     unprintable_chars = _gen_unprintable()
44 | 
45 |     # process input
46 |     program = sys.stdin.read()
47 |     program = re.sub(LITERAL_PATTERN, replace_unprintable, program)
48 |     sys.stdout.write(program)
49 | 
50 | if __name__ == '__main__':
51 |     main()
52 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | 
 5 | from setuptools import setup, find_packages
 6 | 
 7 | setup(
 8 |     name         = 'stringfuzz',
 9 |     version      = '0.1',
10 |     description  = 'Fuzzer for SMTLIB 2.x solvers.',
11 |     author       = 'Dmitry Blotsky, Federico Mora',
12 |     author_email = 'dmitry.blotsky@gmail.com, fmora@cs.toronto.edu',
13 |     url          = 'https://github.com/dblotsky/stringfuzz',
14 |     scripts      = [
15 |         'bin/stringfuzzx',
16 |         'bin/stringfuzzg',
17 |         'bin/stringstats',
18 |         'bin/stringmerge',
19 |         'bin/stringbreak'
20 |     ],
21 |     packages     = find_packages(),
22 |     package_dir  = {
23 |         'stringfuzz': 'stringfuzz',
24 |     },
25 | )
26 | 


--------------------------------------------------------------------------------
/stringfuzz/__init__.py:
--------------------------------------------------------------------------------
1 | from stringfuzz.scanner import ALPHABET, WHITESPACE
2 | 
3 | ALL_CHARS = ALPHABET + WHITESPACE


--------------------------------------------------------------------------------
/stringfuzz/analyser.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from collections import namedtuple
  4 | from stringfuzz.ast_walker import ASTWalker
  5 | 
  6 | __all__ = [
  7 |     'analyse',
  8 | ]
  9 | 
 10 | ZERO_DEPTH = 1
 11 | 
 12 | # NOTE:
 13 | #      depth   - depth in tree
 14 | #      nesting - nesting of the same expression in tree
 15 | Point = namedtuple('Point', ('expression', 'parent', 'depth', 'nesting'))
 16 | 
 17 | class StatsWalker(ASTWalker):
 18 | 
 19 |     def __init__(self, ast):
 20 |         super().__init__(ast)
 21 | 
 22 |         # bookkeeping
 23 |         self.expr_stack    = []
 24 |         self.point_stack   = []
 25 |         self.nesting_stack = []
 26 | 
 27 |         self.depth = ZERO_DEPTH
 28 | 
 29 |         # results
 30 |         self.points    = []
 31 |         self.variables = set()
 32 |         self.literals  = []
 33 | 
 34 |     def make_point(self, expression):
 35 |         return Point(
 36 |             expression = expression,
 37 |             parent     = self.parent,
 38 |             depth      = self.depth,
 39 |             nesting    = self.nesting,
 40 |         )
 41 | 
 42 |     @property
 43 |     def expression(self):
 44 |         assert len(self.expr_stack) > 0
 45 |         return self.expr_stack[-1]
 46 | 
 47 |     @property
 48 |     def point(self):
 49 |         assert len(self.point_stack) > 0
 50 |         return self.point_stack[-1]
 51 | 
 52 |     @property
 53 |     def parent(self):
 54 |         if len(self.expr_stack) > 1:
 55 |             return self.expr_stack[-2]
 56 |         return None
 57 | 
 58 |     @property
 59 |     def nesting(self):
 60 |         assert len(self.nesting_stack) > 0
 61 |         return self.nesting_stack[-1]
 62 | 
 63 |     def enter_expression(self, expression, parent):
 64 | 
 65 |         # push nesting if we're at least one expression deep
 66 |         if self.depth > 1:
 67 | 
 68 |             if self.expression.symbol == expression.symbol:
 69 |                 new_nesting = self.nesting + 1
 70 |             else:
 71 |                 new_nesting = ZERO_DEPTH
 72 | 
 73 |             self.nesting_stack.append(new_nesting)
 74 | 
 75 |         # otherwise, start off with no nesting
 76 |         else:
 77 |             self.nesting_stack.append(ZERO_DEPTH)
 78 | 
 79 |         # create a new point
 80 |         point = self.make_point(expression)
 81 |         self.points.append(point)
 82 | 
 83 |         # push point and expression
 84 |         self.point_stack.append(point)
 85 |         self.expr_stack.append(expression)
 86 | 
 87 |         # increase depth
 88 |         self.depth += 1
 89 | 
 90 |     def exit_expression(self, expression, parent):
 91 | 
 92 |         # decrease depth
 93 |         self.depth -= 1
 94 | 
 95 |         # pop all stacks
 96 |         self.point_stack.pop()
 97 |         self.expr_stack.pop()
 98 |         self.nesting_stack.pop()
 99 | 
100 |     def enter_literal(self, literal, parent):
101 |         assert self.point is not None
102 |         self.literals.append(literal)
103 | 
104 |     def enter_identifier(self, variable, parent):
105 |         assert self.point is not None
106 |         self.variables.add(variable.name)
107 | 
108 | def analyse(ast):
109 |     walker = StatsWalker(ast)
110 |     walker.walk()
111 |     return walker.points, walker.variables, walker.literals
112 | 


--------------------------------------------------------------------------------
/stringfuzz/ast.py:
--------------------------------------------------------------------------------
  1 | import string
  2 | import numbers
  3 | 
  4 | '''
  5 | The AST is a list of ASTNodes.
  6 | '''
  7 | 
  8 | __all__ = [
  9 |     'STRING_SORT',
 10 |     'INT_SORT',
 11 |     'BOOL_SORT',
 12 |     'REGEX_SORT',
 13 |     'UNIT_SORT',
 14 |     'ANY_SORT',
 15 |     'DECLARABLE_SORTS',
 16 | 
 17 |     'LiteralNode',
 18 |     'BoolLitNode',
 19 |     'IntLitNode',
 20 |     'StringLitNode',
 21 |     'AtomicSortNode',
 22 |     'CompoundSortNode',
 23 |     'SettingNode',
 24 |     'MetaDataNode',
 25 |     'IdentifierNode',
 26 |     'FunctionDeclarationNode',
 27 |     'FunctionDefinitionNode',
 28 |     'ConstantDeclarationNode',
 29 |     'SortedVarNode',
 30 |     'BracketsNode',
 31 |     'ExpressionNode',
 32 |     'GenericExpressionNode',
 33 |     'MetaCommandNode',
 34 |     'AssertNode',
 35 |     'CheckSatNode',
 36 |     'GetModelNode',
 37 |     'AndNode',
 38 |     'OrNode',
 39 |     'NotNode',
 40 |     'EqualNode',
 41 |     'GtNode',
 42 |     'LtNode',
 43 |     'GteNode',
 44 |     'LteNode',
 45 |     'ConcatNode',
 46 |     'ContainsNode',
 47 |     'AtNode',
 48 |     'LengthNode',
 49 |     'IndexOfNode',
 50 |     'IndexOf2Node',
 51 |     'PrefixOfNode',
 52 |     'SuffixOfNode',
 53 |     'StringReplaceNode',
 54 |     'SubstringNode',
 55 |     'FromIntNode',
 56 |     'ToIntNode',
 57 |     'InReNode',
 58 |     'StrToReNode',
 59 |     'ReConcatNode',
 60 |     'ReStarNode',
 61 |     'RePlusNode',
 62 |     'ReRangeNode',
 63 |     'ReUnionNode',
 64 |     'ReInterNode',
 65 |     'ReAllCharNode',
 66 | ]
 67 | 
 68 | # constants
 69 | STRING_SORT = 'String'
 70 | INT_SORT    = 'Int'
 71 | BOOL_SORT   = 'Bool'
 72 | REGEX_SORT  = 'Regex'
 73 | UNIT_SORT   = 'Unit'
 74 | ANY_SORT    = '*'
 75 | 
 76 | UNIT_SIGNATURE      = []
 77 | UNCHECKED_SIGNATURE = None
 78 | 
 79 | DECLARABLE_SORTS = [
 80 |     STRING_SORT,
 81 |     INT_SORT,
 82 |     BOOL_SORT,
 83 | ]
 84 | 
 85 | SORT_TYPE      = str
 86 | SIGNATURE_TYPE = list
 87 | 
 88 | # helpers
 89 | def with_spaces(terms):
 90 |     return ' '.join(map(repr, terms))
 91 | 
 92 | # data structures
 93 | class _ASTNode(object):
 94 |     def __eq__(self, other):
 95 |         return repr(self) == repr(other)
 96 | 
 97 |     def __hash__(self):
 98 |         return hash(repr(self))
 99 | 
100 | # "atoms"
101 | class SortNode(_ASTNode):
102 |     pass
103 | 
104 | class AtomicSortNode(SortNode):
105 |     def __init__(self, name):
106 |         self.name = name
107 | 
108 |     def __repr__(self):
109 |         return 'Sort<{}>'.format(self.name)
110 | 
111 | class CompoundSortNode(SortNode):
112 |     def __init__(self, constructor, sorts):
113 |         self.constructor = constructor
114 |         self.sorts = sorts
115 | 
116 |     def __repr__(self):
117 |         return 'Sort<{} {}>'.format(self.symbol, with_spaces(self.sorts))
118 | 
119 | class SettingNode(_ASTNode):
120 |     def __init__(self, name):
121 |         self.name = name
122 | 
123 |     def __repr__(self):
124 |         return 'Setting<{}>'.format(self.name)
125 | 
126 | class MetaDataNode(_ASTNode):
127 |     def __init__(self, value):
128 |         self.value = value
129 | 
130 |     def __repr__(self):
131 |         return 'MetaData<{}>'.format(self.value)
132 | 
133 | class IdentifierNode(_ASTNode):
134 |     def __init__(self, name):
135 |         self.name = name
136 | 
137 |     def __repr__(self):
138 |         return 'Id<{}>'.format(self.name)
139 | 
140 | class SortedVarNode(_ASTNode):
141 |     def __init__(self, var_name, var_sort):
142 |         self.var_name = var_name
143 |         self.var_sort = var_sort
144 | 
145 |     def __repr__(self):
146 |         return 'Decl<{} {}>'.format(self.var_name, self.var_sort)
147 | 
148 | class ReAllCharNode(_ASTNode):
149 |     def __repr__(self):
150 |         return 'ReAllChar<.>'
151 | 
152 | class BracketsNode(_ASTNode):
153 |     def __init__(self, body):
154 |         self.body = body
155 | 
156 |     def __repr__(self):
157 |         return '({})'.format(with_spaces(self.body))
158 | 
159 | # NOTE:
160 | #      sort-wise, we're treating everything as a function; even literals
161 | class _SortedASTNode(_ASTNode):
162 |     _signature = NotImplemented
163 |     _sort      = NotImplemented
164 | 
165 |     def __init__(self):
166 |         assert isinstance(self._sort, SORT_TYPE)
167 |         assert self._signature == UNCHECKED_SIGNATURE or isinstance(self._signature, SIGNATURE_TYPE)
168 | 
169 |     @classmethod
170 |     def get_signature(cls):
171 |         return cls._signature
172 | 
173 |     @classmethod
174 |     def get_sort(cls):
175 |         return cls._sort
176 | 
177 |     @classmethod
178 |     def is_terminal(cls):
179 |         return cls._signature == UNIT_SIGNATURE
180 | 
181 |     @classmethod
182 |     def accepts(cls, sort):
183 |         if cls._signature == UNCHECKED_SIGNATURE:
184 |             return False
185 |         return sort in cls._signature
186 | 
187 |     @classmethod
188 |     def returns(cls, sort):
189 |         return sort == cls._sort
190 | 
191 | # literals
192 | class LiteralNode(_SortedASTNode):
193 |     _signature = UNIT_SIGNATURE
194 | 
195 |     def __init__(self, value):
196 |         super().__init__()
197 |         self.value = value
198 | 
199 |     def __repr__(self):
200 |         return '{}<{}>'.format(self.get_sort(), self.value)
201 | 
202 | class BoolLitNode(LiteralNode):
203 |     _sort = BOOL_SORT
204 | 
205 |     def __init__(self, value):
206 |         assert isinstance(value, bool)
207 |         super().__init__(value)
208 | 
209 | class IntLitNode(LiteralNode):
210 |     _sort = INT_SORT
211 | 
212 |     def __init__(self, value):
213 |         assert isinstance(value, numbers.Real) and not isinstance(value, bool)
214 |         super().__init__(value)
215 | 
216 | class StringLitNode(LiteralNode):
217 |     _sort = STRING_SORT
218 | 
219 |     def __init__(self, value):
220 |         assert isinstance(value, str)
221 |         super().__init__(value)
222 | 
223 |     def __len__(self):
224 |         return len(self.value)
225 | 
226 | # expressions
227 | class ExpressionNode(_ASTNode):
228 |     _symbol = NotImplemented
229 | 
230 |     def __init__(self, body):
231 |         if isinstance(self._symbol, str):
232 |             self._symbol = IdentifierNode(self._symbol)
233 |         self.body = body
234 | 
235 |     @property
236 |     def symbol(self):
237 |         return self._symbol
238 | 
239 |     def __repr__(self):
240 |         return '(\'{}\' {})'.format(self.symbol, with_spaces(self.body))
241 | 
242 | class _SortedExpressionNode(ExpressionNode, _SortedASTNode):
243 |     def __init__(self, body):
244 |         # TODO:
245 |         #      enforce that the arguments are of correct types
246 |         _SortedASTNode.__init__(self)
247 |         ExpressionNode.__init__(self, body)
248 | 
249 | class _NullaryExpression(_SortedExpressionNode):
250 |     def __init__(self):
251 |         super().__init__([])
252 | 
253 | class _UnaryExpression(_SortedExpressionNode):
254 |     def __init__(self, a):
255 |         super().__init__([a])
256 | 
257 | class _BinaryExpression(_SortedExpressionNode):
258 |     def __init__(self, a, b):
259 |         super().__init__([a, b])
260 | 
261 | class _TernaryExpression(_SortedExpressionNode):
262 |     def __init__(self, a, b, c):
263 |         super().__init__([a, b, c])
264 | 
265 | class _QuaternaryExpression(_SortedExpressionNode):
266 |     def __init__(self, a, b, c, d):
267 |         super().__init__([a, b, c, d])
268 | 
269 | class _NaryExpression(_SortedExpressionNode):
270 |     def __init__(self, *args):
271 |         super().__init__(list(args))
272 | 
273 | class _RelationExpressionNode(_BinaryExpression):
274 |     _signature = [INT_SORT, INT_SORT]
275 |     _sort      = BOOL_SORT
276 | 
277 | class GenericExpressionNode(_NaryExpression):
278 |     _signature = UNCHECKED_SIGNATURE
279 |     _sort      = UNIT_SORT
280 | 
281 |     def __init__(self, symbol, *args):
282 |         self._symbol = symbol
283 |         super().__init__(*args)
284 | 
285 | # commands
286 | class _CommandNode(_SortedASTNode):
287 |     _sort = UNIT_SORT
288 | 
289 | class MetaCommandNode(_CommandNode, _NaryExpression):
290 |     _signature = UNCHECKED_SIGNATURE
291 | 
292 |     def __init__(self, symbol, *args):
293 |         self._symbol = symbol
294 |         super().__init__(*args)
295 | 
296 | class AssertNode(_CommandNode, _UnaryExpression):
297 |     _signature = [BOOL_SORT]
298 |     _symbol    = 'assert'
299 | 
300 | class CheckSatNode(_CommandNode, _NullaryExpression):
301 |     _signature = UNIT_SIGNATURE
302 |     _symbol    = 'check-sat'
303 | 
304 | class GetModelNode(_CommandNode, _NullaryExpression):
305 |     _signature = UNIT_SIGNATURE
306 |     _symbol    = 'get-model'
307 | 
308 | class FunctionDeclarationNode(_CommandNode, _TernaryExpression):
309 |     _signature = UNCHECKED_SIGNATURE
310 |     _symbol    = 'declare-fun'
311 | 
312 | class FunctionDefinitionNode(_CommandNode, _QuaternaryExpression):
313 |     _signature = UNCHECKED_SIGNATURE
314 |     _symbol    = 'define-fun'
315 | 
316 | class ConstantDeclarationNode(_CommandNode, _BinaryExpression):
317 |     _signature = UNCHECKED_SIGNATURE
318 |     _symbol    = 'declare-const'
319 | 
320 | # boolean expressions
321 | class AndNode(_BinaryExpression):
322 |     _signature = [BOOL_SORT, BOOL_SORT]
323 |     _sort      = BOOL_SORT
324 |     _symbol    = 'and'
325 | 
326 | class OrNode(_BinaryExpression):
327 |     _signature = [BOOL_SORT, BOOL_SORT]
328 |     _sort      = BOOL_SORT
329 |     _symbol    = 'or'
330 | 
331 | class NotNode(_UnaryExpression):
332 |     _signature = [BOOL_SORT]
333 |     _sort      = BOOL_SORT
334 |     _symbol    = 'not'
335 | 
336 | # relations
337 | class EqualNode(_RelationExpressionNode):
338 |     _signature = [ANY_SORT, ANY_SORT]
339 |     _symbol    = '='
340 | 
341 | class GtNode(_RelationExpressionNode):
342 |     _symbol = '>'
343 | 
344 | class LtNode(_RelationExpressionNode):
345 |     _symbol = '<'
346 | 
347 | class GteNode(_RelationExpressionNode):
348 | 
349 |     _symbol = '>='
350 | 
351 | class LteNode(_RelationExpressionNode):
352 | 
353 |     _symbol = '<='
354 | 
355 | # functions
356 | class ConcatNode(_BinaryExpression):
357 |     _signature = [STRING_SORT, STRING_SORT]
358 |     _sort      = STRING_SORT
359 |     _symbol    = 'Concat'
360 | 
361 | class ContainsNode(_BinaryExpression):
362 |     _signature = [STRING_SORT, STRING_SORT]
363 |     _sort      = BOOL_SORT
364 |     _symbol    = 'Contains'
365 | 
366 | class AtNode(_BinaryExpression):
367 |     _signature = [STRING_SORT, INT_SORT]
368 |     _sort      = STRING_SORT
369 |     _symbol    = 'At'
370 | 
371 | class LengthNode(_UnaryExpression):
372 |     _signature = [STRING_SORT]
373 |     _sort      = INT_SORT
374 |     _symbol    = 'Length'
375 | 
376 | class IndexOfNode(_BinaryExpression):
377 |     _signature = [STRING_SORT, STRING_SORT]
378 |     _sort      = INT_SORT
379 |     _symbol    = 'IndexOf'
380 | 
381 | class IndexOf2Node(_TernaryExpression):
382 |     _signature = [STRING_SORT, STRING_SORT, INT_SORT]
383 |     _sort      = INT_SORT
384 |     _symbol    = 'IndexOf2'
385 | 
386 | class PrefixOfNode(_BinaryExpression):
387 |     _signature = [STRING_SORT, STRING_SORT]
388 |     _sort      = BOOL_SORT
389 |     _symbol    = 'PrefixOf'
390 | 
391 | class SuffixOfNode(_BinaryExpression):
392 |     _signature = [STRING_SORT, STRING_SORT]
393 |     _sort      = BOOL_SORT
394 |     _symbol    = 'SuffixOf'
395 | 
396 | class StringReplaceNode(_TernaryExpression):
397 |     _signature = [STRING_SORT, STRING_SORT, STRING_SORT]
398 |     _sort      = STRING_SORT
399 |     _symbol    = 'Replace'
400 | 
401 | class SubstringNode(_TernaryExpression):
402 |     _signature = [STRING_SORT, INT_SORT, INT_SORT]
403 |     _sort      = STRING_SORT
404 |     _symbol    = 'Substring'
405 | 
406 | class FromIntNode(_UnaryExpression):
407 |     _signature = [INT_SORT]
408 |     _sort      = STRING_SORT
409 |     _symbol    = 'FromInt'
410 | 
411 | class ToIntNode(_UnaryExpression):
412 |     _signature = [STRING_SORT]
413 |     _sort      = INT_SORT
414 |     _symbol    = 'ToInt'
415 | 
416 | class InReNode(_BinaryExpression):
417 |     _signature = [STRING_SORT, REGEX_SORT]
418 |     _sort      = BOOL_SORT
419 |     _symbol    = 'InRegex'
420 | 
421 | class StrToReNode(_UnaryExpression):
422 |     _signature = [STRING_SORT]
423 |     _sort      = REGEX_SORT
424 |     _symbol    = 'Str2Re'
425 | 
426 | class ReConcatNode(_BinaryExpression):
427 |     _signature = [REGEX_SORT, REGEX_SORT]
428 |     _sort      = REGEX_SORT
429 |     _symbol    = 'ReConcat'
430 | 
431 | class ReStarNode(_UnaryExpression):
432 |     _signature = [REGEX_SORT]
433 |     _sort      = REGEX_SORT
434 |     _symbol    = 'ReStar'
435 | 
436 | class RePlusNode(_UnaryExpression):
437 |     _signature = [REGEX_SORT]
438 |     _sort      = REGEX_SORT
439 |     _symbol    = 'RePlus'
440 | 
441 | class ReRangeNode(_BinaryExpression):
442 |     _signature = [STRING_SORT, STRING_SORT]
443 |     _sort      = REGEX_SORT
444 |     _symbol    = 'ReRange'
445 | 
446 |     def __init__(self, a, b):
447 |         # TODO:
448 |         #      assert that arguments are literals
449 |         super().__init__(a, b)
450 | 
451 | class ReUnionNode(_BinaryExpression):
452 |     _signature = [REGEX_SORT, REGEX_SORT]
453 |     _sort      = REGEX_SORT
454 |     _symbol    = 'ReUnion'
455 | 
456 | class ReInterNode(_BinaryExpression):
457 |     _signature = [REGEX_SORT, REGEX_SORT]
458 |     _sort      = REGEX_SORT
459 |     _symbol    = 'ReInter'
460 | 


--------------------------------------------------------------------------------
/stringfuzz/ast_walker.py:
--------------------------------------------------------------------------------
 1 | from stringfuzz.ast import *
 2 | 
 3 | __all__ = [
 4 |     'ASTWalker'
 5 | ]
 6 | 
 7 | class ASTWalker(object):
 8 | 
 9 |     def __init__(self, ast):
10 |         super().__init__()
11 |         self.__ast = ast
12 | 
13 |     # public API
14 |     def walk(self):
15 |         for expression in self.__ast:
16 |             self.walk_expression(expression, None)
17 | 
18 |         return self.__ast
19 | 
20 |     # walks
21 |     def walk_expression(self, expression, parent):
22 | 
23 |         self.enter_expression(expression, parent)
24 | 
25 |         for sub_expression in expression.body:
26 |             if isinstance(sub_expression, ExpressionNode):
27 |                 self.walk_expression(sub_expression, expression)
28 | 
29 |             if isinstance(sub_expression, IdentifierNode):
30 |                 self.walk_identifier(sub_expression, expression)
31 | 
32 |             if isinstance(sub_expression, LiteralNode):
33 |                 self.walk_literal(sub_expression, expression)
34 | 
35 |         self.exit_expression(expression, parent)
36 | 
37 |     def walk_literal(self, literal, parent):
38 |         self.enter_literal(literal, parent)
39 |         self.exit_literal(literal, parent)
40 | 
41 |     def walk_identifier(self, identifier, parent):
42 |         self.enter_identifier(identifier, parent)
43 |         self.exit_identifier(identifier, parent)
44 | 
45 |     # enters/exits
46 |     def enter_expression(self, expression, parent):
47 |         pass
48 | 
49 |     def exit_expression(self, expression, parent):
50 |         pass
51 | 
52 |     def enter_literal(self, literal, parent):
53 |         pass
54 | 
55 |     def exit_literal(self, literal, parent):
56 |         pass
57 | 
58 |     def enter_identifier(self, identifier, parent):
59 |         pass
60 | 
61 |     def exit_identifier(self, identifier, parent):
62 |         pass
63 | 
64 | 


--------------------------------------------------------------------------------
/stringfuzz/constants.py:
--------------------------------------------------------------------------------
 1 | SMT_20        = 'smt2'
 2 | SMT_20_STRING = 'smt20'
 3 | SMT_25_STRING = 'smt25'
 4 | 
 5 | LANGUAGES = [
 6 |     SMT_20,
 7 |     SMT_20_STRING,
 8 |     SMT_25_STRING,
 9 | ]
10 | 


--------------------------------------------------------------------------------
/stringfuzz/fuzzers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dblotsky/stringfuzz/5507894ed5d94ed36098753357d33adee182b298/stringfuzz/fuzzers/__init__.py


--------------------------------------------------------------------------------
/stringfuzz/fuzzers/genetic.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import os
  3 | import sys
  4 | import subprocess
  5 | import threading
  6 | import datetime
  7 | import signal
  8 | import statistics
  9 | 
 10 | from heapq import heappush, heappop
 11 | 
 12 | from stringfuzz.transformers import fuzz, graft
 13 | from stringfuzz.generators import random_ast
 14 | from stringfuzz.generator import generate
 15 | from stringfuzz.ast import AssertNode, CheckSatNode
 16 | from stringfuzz.util import coin_toss
 17 | 
 18 | __all__ = [
 19 |     'simulate'
 20 | ]
 21 | 
 22 | # constants
 23 | DEFAULT_MUTATION_ROUNDS = 4
 24 | DEFAULT_TIMEOUT         = 5
 25 | MAX_NUM_ASSERTS         = 20
 26 | NUM_RUNS                = 8
 27 | 
 28 | # globals
 29 | _language = None
 30 | _timeout  = DEFAULT_TIMEOUT
 31 | 
 32 | # helpers
 33 | def mutate_fuzz(ast):
 34 |     return ast
 35 |     # return fuzz(ast, skip_re_range=False)
 36 | 
 37 | def decompose(ast):
 38 |     head    = []
 39 |     asserts = []
 40 |     tail    = []
 41 |     for e in ast:
 42 |         if isinstance(e, AssertNode):
 43 |             asserts.append(e)
 44 |         elif isinstance(e, CheckSatNode):
 45 |             tail.append(e)
 46 |         else:
 47 |             head.append(e)
 48 |     return head, asserts, tail
 49 | 
 50 | def mutate_add(ast):
 51 | 
 52 |     if len(ast) >= MAX_NUM_ASSERTS:
 53 |         return ast
 54 | 
 55 |     # decompose existing AST
 56 |     head, asserts, tail = decompose(ast)
 57 | 
 58 |     # create random AST with one assert
 59 |     new_ast = random_ast(
 60 |         num_vars            = 1,
 61 |         num_asserts         = 1,
 62 |         depth               = 5,
 63 |         max_terms           = 5,
 64 |         max_str_lit_length  = 10,
 65 |         max_int_lit         = 30,
 66 |         literal_probability = 0.5,
 67 |         semantically_valid  = True
 68 |     )
 69 | 
 70 |     # isolate just the new assert
 71 |     _, new_asserts, _ = decompose(new_ast)
 72 | 
 73 |     # return with the new asserts added
 74 |     return head + asserts + new_asserts + tail
 75 | 
 76 | def mutate_pop(ast):
 77 |     head, asserts, tail = decompose(ast)
 78 |     return head + asserts[:-1] + tail
 79 | 
 80 | def mutate_graft(ast):
 81 |     return ast
 82 |     # return graft(ast, skip_str_to_re=False)
 83 | 
 84 | def mutate(ast):
 85 |     choice = random.randint(1, 4)
 86 | 
 87 |     if choice == 1:
 88 |         return mutate_fuzz(ast)
 89 | 
 90 |     if choice == 2:
 91 |         return mutate_pop(ast)
 92 | 
 93 |     if choice == 3:
 94 |         return mutate_add(ast)
 95 | 
 96 |     if choice == 4:
 97 |         return mutate_graft(ast)
 98 | 
 99 | def vegetative_mate(parent, num_mutation_rounds=DEFAULT_MUTATION_ROUNDS):
100 |     child = parent
101 |     for i in range(num_mutation_rounds):
102 |         child = mutate(child)
103 |     return child
104 | 
105 | def mate(parents):
106 |     return vegetative_mate(random.choice(parents))
107 | 
108 | def time_solver(command, problem, timeout, verbose=False, debug=False):
109 | 
110 |     # print command that will be run
111 |     if verbose is True or debug is True:
112 |         print('RUNNING:', repr(command), file=sys.stderr)
113 | 
114 |     # get start time
115 |     start = datetime.datetime.now().timestamp()
116 | 
117 |     # run command
118 |     process = subprocess.Popen(
119 |         command,
120 |         shell              = True,
121 |         stdin              = subprocess.PIPE,
122 |         stdout             = subprocess.PIPE,
123 |         stderr             = subprocess.PIPE,
124 |         preexec_fn         = os.setsid,
125 |         universal_newlines = True
126 |     )
127 | 
128 |     # feed it the problem and wait for it to complete
129 |     try:
130 |         stdout, stderr = process.communicate(input=problem, timeout=timeout)
131 | 
132 |     # if it times out ...
133 |     except subprocess.TimeoutExpired as e:
134 | 
135 |         # if verbose is True:
136 |         print('TIMED OUT:', repr(command), '... killing', process.pid, file=sys.stderr)
137 | 
138 |         # kill it
139 |         os.killpg(os.getpgid(process.pid), signal.SIGINT)
140 | 
141 |         # set timeout result
142 |         elapsed = timeout
143 | 
144 |         # print output
145 |         # if verbose is True:
146 |         print('STDOUT:', process.stdout.read(), file=sys.stderr, end='')
147 |         print('STDERR:', process.stderr.read(), file=sys.stderr, end='')
148 | 
149 |     # if it completes in time ...
150 |     else:
151 | 
152 |         # measure run time
153 |         end     = datetime.datetime.now().timestamp()
154 |         elapsed = end - start
155 | 
156 |         if stderr != '':
157 |             print('STDERR IS NOT EMPTY!:', stderr, file=sys.stderr, end='')
158 |             print('PROBLEM: \n', problem, file=sys.stderr, end='')
159 | 
160 |         # print output
161 |         if debug is True:
162 |             print('STDOUT:', stdout, file=sys.stderr, end='')
163 |             print('STDERR:', stderr, file=sys.stderr, end='')
164 | 
165 |     return elapsed
166 | 
167 | def reproduce(survivors, world_size):
168 | 
169 |     # create offspring
170 |     num_offspring = world_size - len(survivors)
171 |     offspring     = [mate(survivors) for i in range(num_offspring)]
172 | 
173 |     # return new population
174 |     new_population = survivors + offspring
175 |     return new_population
176 | 
177 | def generate_problem(problem):
178 |     global _language
179 |     return generate(problem, _language)
180 | 
181 | def normalise(bottom, top, value):
182 |     width = top - bottom
183 |     return value / width
184 | 
185 | def time_in_thread(index, times, **kwargs):
186 |     time         = time_solver(**kwargs)
187 |     times[index] = time
188 | 
189 | def get_score(organism, saint_peter):
190 |     global _timeout
191 | 
192 |     # get average run time
193 |     times   = [0 for i in range(NUM_RUNS)]
194 |     threads = []
195 |     for i in range(NUM_RUNS):
196 |         thread = threading.Thread(
197 |             target = time_in_thread,
198 |             args   = (i, times),
199 |             kwargs = {
200 |                 'command': saint_peter,
201 |                 'timeout': _timeout,
202 |                 'problem': generate_problem(organism)
203 |             }
204 |         )
205 |         threads.append(thread)
206 | 
207 |     # run experiments in parallel
208 |     for thread in threads:
209 |         thread.start()
210 | 
211 |     for thread in threads:
212 |         thread.join()
213 | 
214 |     # return median run time
215 |     score = statistics.median(times)
216 |     return score
217 | 
218 | def judge(population, saint_peter):
219 |     for organism in population:
220 |         yield get_score(organism, saint_peter)
221 | 
222 | def cull(population, scores):
223 | 
224 |     # annotate specimens with their scores
225 |     global _timeout
226 |     indices   = range(len(population))
227 |     annotated = zip([(_timeout - s) for s in scores], indices)
228 | 
229 |     # create a min-heap out of annotated specimens
230 |     heap = []
231 |     for entry in annotated:
232 |         heappush(heap, entry)
233 | 
234 |     # get best specimens
235 |     print('population', ' '.join(['p[{i}]={s}'.format(s=len(e), i=i) for i, e in enumerate(population)]))
236 |     best_entries = [heappop(heap) for i in range(3)]
237 |     print('best entries', best_entries)
238 |     best_indices = [entry[1] for entry in best_entries]
239 |     print('best indices', best_indices)
240 |     best         = [population[i] for i in best_indices]
241 |     print('best:', ' '.join(['p[{i}]={s} for {t}'.format(t=e[0], s=len(population[e[1]]), i=e[1]) for e in best_entries]))
242 |     print('')
243 | 
244 |     return best
245 | 
246 | def time_to_log(generation, resolution):
247 |     return (generation % resolution) == 0
248 | 
249 | # public API
250 | def simulate(progenitor, language, saint_peter, num_generations, world_size, log_resolution):
251 | 
252 |     # set global config
253 |     global _language
254 |     _language = language
255 | 
256 |     # create initial population
257 |     population = [progenitor]
258 | 
259 |     # run simulation
260 |     for g in range(num_generations):
261 | 
262 |         # log generation progress
263 |         if time_to_log(g, log_resolution):
264 |             print('generation {}'.format(g))
265 | 
266 |         # sanity check: there should be organisms
267 |         assert len(population) > 0
268 | 
269 |         # populate world
270 |         population = reproduce(population, world_size)
271 | 
272 |         # measure performance of each organism
273 |         scores = judge(population, saint_peter)
274 | 
275 |         # keep only the "best" organisms
276 |         population = cull(population, scores)
277 | 
278 |     # return final population
279 |     return population
280 | 


--------------------------------------------------------------------------------
/stringfuzz/generator.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from stringfuzz.constants import SMT_20, SMT_20_STRING, SMT_25_STRING
  4 | from stringfuzz.scanner import scan, ALPHABET, WHITESPACE
  5 | from stringfuzz.ast import *
  6 | 
  7 | __all__ = [
  8 |     'generate',
  9 |     'generate_file',
 10 |     'NotSupported',
 11 | ]
 12 | 
 13 | # exceptions
 14 | class NotSupported(ValueError):
 15 |     def __init__(self, e, language):
 16 |         message = 'can\'t generate {!r} in language {!r}'.format(e, language)
 17 |         super().__init__(message)
 18 | 
 19 | # functions
 20 | def needs_encoding(c):
 21 |     return c not in ALPHABET
 22 | 
 23 | def encode_char(c, language):
 24 |     if c == '"':
 25 |         if language == SMT_25_STRING:
 26 |             return '""'
 27 |         else:
 28 |             return '\\"'
 29 |     elif c == '\\':
 30 |         return '\\\\'
 31 |     elif c in WHITESPACE:
 32 |         return repr(c)
 33 |     elif needs_encoding(c):
 34 |         return '\\x{:0>2x}'.format(ord(c))
 35 |     return c
 36 | 
 37 | def encode_string(s, language):
 38 |     encoded = ''.join(encode_char(c, language) for c in s)
 39 |     return '"' + encoded + '"'
 40 | 
 41 | def generate_node(node, language):
 42 | 
 43 |     # generate each known node
 44 |     if isinstance(node, ExpressionNode):
 45 |         return generate_expr(node, language)
 46 | 
 47 |     if isinstance(node, SortedVarNode):
 48 |         return '({} {})'.format(generate_node(node.name, language), generate_node(node.sort, language))
 49 | 
 50 |     if isinstance(node, LiteralNode):
 51 |         return generate_lit(node, language)
 52 | 
 53 |     if isinstance(node, IdentifierNode):
 54 |         return node.name
 55 | 
 56 |     if isinstance(node, AtomicSortNode):
 57 |         return node.name
 58 | 
 59 |     if isinstance(node, CompoundSortNode):
 60 |         return '({} {})'.format(generate_node(node.symbol, language), ' '.join(generate_node(s, language) for s in node.sorts))
 61 | 
 62 |     if isinstance(node, BracketsNode):
 63 |         return '({})'.format(' '.join(generate_node(s, language) for s in node.body))
 64 | 
 65 |     if isinstance(node, SettingNode):
 66 |         return '{}'.format(generate_node(node.name, language))
 67 | 
 68 |     if isinstance(node, MetaDataNode):
 69 |         return node.value
 70 | 
 71 |     if isinstance(node, ReAllCharNode):
 72 |         if language == SMT_25_STRING:
 73 |             return 're.allchar'
 74 |         else:
 75 |             raise NotSupported(node, language)
 76 | 
 77 |     if isinstance(node, str):
 78 |         return node
 79 | 
 80 |     # error out on all others
 81 |     raise NotImplementedError('no generator for {}'.format(type(node)))
 82 | 
 83 | def generate_lit(lit, language):
 84 |     if isinstance(lit, StringLitNode):
 85 |         return encode_string(lit.value, language)
 86 | 
 87 |     if isinstance(lit, BoolLitNode):
 88 |         return str(lit.value).lower()
 89 | 
 90 |     if isinstance(lit, IntLitNode):
 91 |         if (lit.value < 0):
 92 |             return '(- {})'.format(lit.value)
 93 |         return str(lit.value)
 94 | 
 95 |     raise NotImplementedError('unknown literal type {!r}'.format(lit))
 96 | 
 97 | def generate_expr(e, language):
 98 |     components = []
 99 | 
100 |     # special expressions
101 |     if isinstance(e, ConcatNode):
102 |         if language == SMT_20_STRING:
103 |             components.append('Concat')
104 |         elif language == SMT_25_STRING:
105 |             components.append('str.++')
106 |         else:
107 |             raise NotSupported(e, language)
108 | 
109 |     elif isinstance(e, ContainsNode):
110 |         if language == SMT_20_STRING:
111 |             components.append('Contains')
112 |         elif language == SMT_25_STRING:
113 |             components.append('str.contains')
114 |         else:
115 |             raise NotSupported(e, language)
116 | 
117 |     elif isinstance(e, AtNode):
118 |         if language == SMT_20_STRING:
119 |             components.append('CharAt')
120 |         elif language == SMT_25_STRING:
121 |             components.append('str.at')
122 |         else:
123 |             raise NotSupported(e, language)
124 | 
125 |     elif isinstance(e, LengthNode):
126 |         if language == SMT_20_STRING:
127 |             components.append('Length')
128 |         elif language == SMT_25_STRING:
129 |             components.append('str.len')
130 |         else:
131 |             raise NotSupported(e, language)
132 | 
133 |     elif isinstance(e, IndexOfNode):
134 |         if language == SMT_20_STRING:
135 |             components.append('IndexOf')
136 |         elif language == SMT_25_STRING:
137 |             components.append('str.indexof')
138 |         else:
139 |             raise NotSupported(e, language)
140 | 
141 |     elif isinstance(e, IndexOf2Node):
142 |         if language == SMT_20_STRING:
143 |             components.append('IndexOf2')
144 |         elif language == SMT_25_STRING:
145 |             components.append('str.indexof')
146 |         else:
147 |             raise NotSupported(e, language)
148 | 
149 |     elif isinstance(e, PrefixOfNode):
150 |         if language == SMT_20_STRING:
151 |             components.append('StartsWith')
152 |         elif language == SMT_25_STRING:
153 |             components.append('str.prefixof')
154 |         else:
155 |             raise NotSupported(e, language)
156 | 
157 |     elif isinstance(e, SuffixOfNode):
158 |         if language == SMT_20_STRING:
159 |             components.append('EndsWith')
160 |         elif language == SMT_25_STRING:
161 |             components.append('str.suffixof')
162 |         else:
163 |             raise NotSupported(e, language)
164 | 
165 |     elif isinstance(e, StringReplaceNode):
166 |         if language == SMT_20_STRING:
167 |             components.append('Replace')
168 |         elif language == SMT_25_STRING:
169 |             components.append('str.replace')
170 |         else:
171 |             raise NotSupported(e, language)
172 | 
173 |     elif isinstance(e, SubstringNode):
174 |         if language == SMT_20_STRING:
175 |             components.append('Substring')
176 |         elif language == SMT_25_STRING:
177 |             components.append('str.substr')
178 |         else:
179 |             raise NotSupported(e, language)
180 | 
181 |     elif isinstance(e, FromIntNode):
182 |         if language == SMT_25_STRING:
183 |             components.append('str.from.int')
184 |         else:
185 |             raise NotSupported(e, language)
186 | 
187 |     elif isinstance(e, ToIntNode):
188 |         if language == SMT_25_STRING:
189 |             components.append('str.to.int')
190 |         else:
191 |             raise NotSupported(e, language)
192 | 
193 |     elif isinstance(e, StrToReNode):
194 |         if language == SMT_20_STRING:
195 |             components.append('Str2Reg')
196 |         elif language == SMT_25_STRING:
197 |             components.append('str.to.re')
198 |         else:
199 |             raise NotSupported(e, language)
200 | 
201 |     elif isinstance(e, InReNode):
202 |         if language == SMT_20_STRING:
203 |             components.append('RegexIn')
204 |         elif language == SMT_25_STRING:
205 |             components.append('str.in.re')
206 |         else:
207 |             raise NotSupported(e, language)
208 | 
209 |     elif isinstance(e, ReConcatNode):
210 |         if language == SMT_20_STRING:
211 |             components.append('RegexConcat')
212 |         elif language == SMT_25_STRING:
213 |             components.append('re.++')
214 |         else:
215 |             raise NotSupported(e, language)
216 | 
217 |     elif isinstance(e, ReStarNode):
218 |         if language == SMT_20_STRING:
219 |             components.append('RegexStar')
220 |         elif language == SMT_25_STRING:
221 |             components.append('re.*')
222 |         else:
223 |             raise NotSupported(e, language)
224 | 
225 |     elif isinstance(e, RePlusNode):
226 |         if language == SMT_20_STRING:
227 |             components.append('RegexPlus')
228 |         elif language == SMT_25_STRING:
229 |             components.append('re.+')
230 |         else:
231 |             raise NotSupported(e, language)
232 | 
233 |     elif isinstance(e, ReRangeNode):
234 |         if language == SMT_20_STRING:
235 |             components.append('RegexCharRange')
236 |         elif language == SMT_25_STRING:
237 |             components.append('re.range')
238 |         else:
239 |             raise NotSupported(e, language)
240 | 
241 |     elif isinstance(e, ReUnionNode):
242 |         if language == SMT_20_STRING:
243 |             components.append('RegexUnion')
244 |         elif language == SMT_25_STRING:
245 |             components.append('re.union')
246 |         else:
247 |             raise NotSupported(e, language)
248 | 
249 |     elif isinstance(e, ReInterNode):
250 |         if language == SMT_25_STRING:
251 |             components.append('re.inter')
252 |         else:
253 |             raise NotSupported(e, language)
254 | 
255 |     # all other expressions
256 |     else:
257 |         components.append(generate_node(e.symbol, language))
258 | 
259 |     # generate args
260 |     components.extend(generate_node(n, language) for n in e.body)
261 | 
262 |     return '({})'.format(' '.join(components))
263 | 
264 | # public API
265 | def generate_file(ast, language, path):
266 |     with open(path, 'w+') as file:
267 |         file.write(generate(ast, language))
268 | 
269 | def generate(ast, language):
270 |     return '\n'.join(generate_node(e, language) for e in ast)
271 | 


--------------------------------------------------------------------------------
/stringfuzz/generators/__init__.py:
--------------------------------------------------------------------------------
1 | from stringfuzz.generators.concats import *
2 | from stringfuzz.generators.lengths import *
3 | from stringfuzz.generators.overlaps import *
4 | from stringfuzz.generators.random_ast import *
5 | from stringfuzz.generators.random_text import *
6 | from stringfuzz.generators.regex import *
7 | from stringfuzz.generators.equality import *
8 | 


--------------------------------------------------------------------------------
/stringfuzz/generators/concats.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | from stringfuzz.scanner import ALPHABET
  4 | from stringfuzz.smt import *
  5 | 
  6 | __all__ = [
  7 |     'concats',
  8 |     'SYNTACTIC_DEPTH',
  9 |     'SEMANTIC_DEPTH',
 10 | ]
 11 | 
 12 | # constants
 13 | SYNTACTIC_DEPTH = 'syntactic'
 14 | SEMANTIC_DEPTH  = 'semantic'
 15 | 
 16 | # functions
 17 | def set_equal(a, b):
 18 |     return smt_assert(smt_equal(a, b))
 19 | 
 20 | def set_concat(result, a, b):
 21 |     return set_equal(result, smt_concat(a, b))
 22 | 
 23 | def extract(character, string, index):
 24 |     return set_equal(character, smt_at(string, index))
 25 | 
 26 | def make_semantic_concats(depth, balanced):
 27 | 
 28 |     if balanced is True:
 29 |         raise ValueError('balanced trees with semantic concats are unsupported')
 30 | 
 31 |     # compute number of variables
 32 |     num_vars = (depth * 2) + 1
 33 | 
 34 |     # make variable names
 35 |     variables = [smt_var(i) for i in range(num_vars)]
 36 | 
 37 |     # make concats
 38 |     expressions = []
 39 |     for i in range(0, len(variables) - 2, 2):
 40 |         expression = set_concat(variables[i], variables[i + 1], variables[i + 2])
 41 |         expressions.append(expression)
 42 | 
 43 |     return variables, [], expressions
 44 | 
 45 | def make_syntactic_concats(depth, balanced):
 46 | 
 47 |     def concats_helper(depth, balanced):
 48 | 
 49 |         # base case
 50 |         if depth < 1:
 51 |             new_var = smt_new_var()
 52 |             return [new_var], new_var
 53 | 
 54 |         # make right side
 55 |         right_vars, right_expr = concats_helper(depth - 1, balanced)
 56 | 
 57 |         # make left side
 58 |         if balanced is True:
 59 |             left_vars, left_expr = concats_helper(depth - 1, balanced)
 60 |         else:
 61 |             left_vars, left_expr = concats_helper(0, balanced)
 62 | 
 63 |         # build return value
 64 |         all_vars = left_vars + right_vars
 65 |         concat   = smt_concat(left_expr, right_expr)
 66 | 
 67 |         return all_vars, concat
 68 | 
 69 |     # make first variable
 70 |     first_var = smt_new_var()
 71 | 
 72 |     # create return values
 73 |     variables   = [first_var]
 74 |     constants   = []
 75 |     expressions = []
 76 | 
 77 |     # make deep concat
 78 |     if depth > 0:
 79 |         concat_variables, concat_expr = concats_helper(depth, balanced)
 80 | 
 81 |         variables  += concat_variables
 82 |         expressions = [set_equal(first_var, concat_expr)]
 83 | 
 84 |     return variables, constants, expressions
 85 | 
 86 | def make_concats(depth, depth_type, solution, balanced, num_extracts, max_extract_index):
 87 | 
 88 |     # generate concats
 89 |     if depth_type == SEMANTIC_DEPTH:
 90 |         variables, constants, expressions = make_semantic_concats(depth, balanced)
 91 | 
 92 |     else:
 93 |         variables, constants, expressions = make_syntactic_concats(depth, balanced)
 94 | 
 95 |     # get first variable
 96 |     first_var = variables[0]
 97 | 
 98 |     # validate args
 99 |     max_num_extracts      = max_extract_index + 1
100 |     num_chars_in_vars     = max_num_extracts * len(variables)
101 |     num_chars_in_consts   = sum(map(len, constants))
102 |     num_possible_extracts = num_chars_in_vars + num_chars_in_consts
103 |     if num_extracts > num_possible_extracts:
104 |         raise ValueError('number of requested extracts exceeds number of possible unique extracts')
105 | 
106 |     # set first variable to expected solution if one was given
107 |     if solution is not None:
108 |         expressions.append(set_equal(first_var, smt_str_lit(solution)))
109 | 
110 |     # add extracts if required
111 |     if num_extracts > 0:
112 | 
113 |         # create model to avoid contradictions
114 |         extract_model  = {var : list(range(max_num_extracts)) for var in variables}
115 |         remaining_vars = list(variables)
116 | 
117 |         # shuffle indices in model
118 |         for indices in extract_model.values():
119 |             random.shuffle(indices)
120 | 
121 |         # create the extracts
122 |         for i in range(num_extracts):
123 | 
124 |             # randomly pick a variable and a char to extract from it
125 |             var_index = random.randrange(len(remaining_vars))
126 |             var       = remaining_vars[var_index]
127 |             char      = smt_str_lit(random.choice(ALPHABET))
128 | 
129 |             # pop the first index from which to extract, without replacement
130 |             index = smt_int_lit(extract_model[var].pop())
131 | 
132 |             # remove the variable if it can no longer be extracted from
133 |             num_remaining_indices = len(extract_model[var])
134 |             if num_remaining_indices < 1:
135 |                 remaining_vars.pop(var_index)
136 | 
137 |             # add extract
138 |             expressions.append(extract(char, var, index))
139 | 
140 |     # create definitions
141 |     definitions = []
142 |     definitions.extend([smt_declare_var(v) for v in variables])
143 |     definitions.extend([smt_declare_const(v) for v in constants])
144 | 
145 |     # add sat-check
146 |     expressions.append(smt_check_sat())
147 | 
148 |     return definitions + expressions
149 | 
150 | # public API
151 | def concats(*args, **kwargs):
152 |     smt_reset_counters()
153 |     return make_concats(*args, **kwargs)
154 | 


--------------------------------------------------------------------------------
/stringfuzz/generators/equality.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from stringfuzz.scanner import ALPHABET
 4 | from stringfuzz.smt import *
 5 | from stringfuzz.util import join_terms_with, random_string
 6 | 
 7 | __all__ = [
 8 |     'equality',
 9 | ]
10 | 
11 | def get_length(max_length, randomise):
12 |     if randomise is False:
13 |         return max_length
14 |     return random.randint(0, max_length)
15 | 
16 | def randomly_add_infix(probability):
17 |     return random.random() < probability
18 | 
19 | def make_equality(num_expressions, num_terms, prefix_length, suffix_length, add_infixes, infix_length, randomise_lengths, infix_probability):
20 | 
21 |     # check args
22 |     if num_expressions < 1:
23 |         raise ValueError('the number of expressions must be at least 1')
24 | 
25 |     if num_terms < 2:
26 |         raise ValueError('the number of terms per expression must be at least 2')
27 | 
28 |     if infix_probability < 0.0 or 1.0 < infix_probability:
29 |         raise ValueError('the probability of infixes must be between 0.0 and 1.0')
30 | 
31 |     # result values
32 |     expressions = []
33 |     variables   = []
34 | 
35 |     # create root variable
36 |     root = smt_new_var()
37 |     variables.append(root)
38 | 
39 |     # create expressions
40 |     for i in range(num_expressions):
41 | 
42 |         # prefix and suffix
43 |         prefix = smt_str_lit(random_string(get_length(prefix_length, randomise_lengths)))
44 |         suffix = smt_str_lit(random_string(get_length(suffix_length, randomise_lengths)))
45 | 
46 |         # keep track of new variables
47 |         new_variables = []
48 | 
49 |         # create middle
50 |         middle = []
51 |         for i in range(num_terms - 2):
52 | 
53 |             # if infixes are enabled, add them with the given probability
54 |             if add_infixes is True and randomly_add_infix(infix_probability) is True:
55 |                 new_term = smt_str_lit(random_string(get_length(infix_length, randomise_lengths)))
56 | 
57 |             # otherwise, just add variables
58 |             else:
59 |                 new_term = smt_new_var()
60 |                 new_variables.append(new_term)
61 | 
62 |             middle.append(new_term)
63 | 
64 |         # compose full expression
65 |         terms    = [prefix] + middle + [suffix]
66 |         concat   = join_terms_with(terms, smt_concat)
67 |         equality = smt_assert(smt_equal(root, concat))
68 | 
69 |         # remember variables and expressions
70 |         variables += new_variables
71 |         expressions.append(equality)
72 | 
73 |     # add check sat
74 |     expressions.append(smt_check_sat())
75 | 
76 |     # create variable declarations
77 |     declarations = []
78 |     for v in variables:
79 |         declarations.append(smt_declare_var(v))
80 | 
81 |     return declarations + expressions
82 | 
83 | # public API
84 | def equality(*args, **kwargs):
85 |     smt_reset_counters()
86 |     return make_equality(*args, **kwargs)
87 | 


--------------------------------------------------------------------------------
/stringfuzz/generators/lengths.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from collections import namedtuple
 4 | 
 5 | from stringfuzz.smt import *
 6 | 
 7 | __all__ = [
 8 |     'lengths',
 9 | ]
10 | 
11 | # data structures
12 | Variable = namedtuple('Variable', ['length'])
13 | 
14 | # functions
15 | def new_model(min_length, max_length):
16 |     length = random.randint(min_length, max_length)
17 |     return Variable(length)
18 | 
19 | def set_equal(a, b):
20 |     return smt_assert(smt_equal(a, b))
21 | 
22 | def make_lengths(num_vars, min_length, max_length, num_concats, random_relations):
23 | 
24 |     # make list of possible relations to use in constraints
25 |     if random_relations is True:
26 |         def choose_relation():
27 |             return random.choice([smt_equal, smt_gt, smt_lt])
28 |     else:
29 |         def choose_relation():
30 |             return smt_equal
31 | 
32 |     # create variables
33 |     variables = [smt_new_var() for i in range(num_vars)]
34 | 
35 |     # create model
36 |     model = {v : new_model(min_length, max_length) for v in variables}
37 | 
38 |     # create length constraints
39 |     expressions = []
40 |     for v in variables:
41 | 
42 |         # pick a relation
43 |         chosen_relation = choose_relation()
44 | 
45 |         # build constraint
46 |         model_length  = smt_int_lit(model[v].length)
47 |         actual_length = smt_len(v)
48 |         constraint    = smt_assert(chosen_relation(model_length, actual_length))
49 | 
50 |         # add constraint
51 |         expressions.append(constraint)
52 | 
53 |     # validate args
54 |     max_num_concats = num_vars // 2
55 |     if num_concats > max_num_concats:
56 |         raise ValueError('can\'t add more concats than the number of variables divided by 2 (that is, {})'.format(max_num_concats))
57 | 
58 |     # if concats are required, add them
59 |     if num_concats > 0:
60 | 
61 |         # copy and shuffle variable list to use in concats
62 |         unused_variables = list(variables)
63 |         random.shuffle(unused_variables)
64 | 
65 |         # generate the concats
66 |         for i in range(num_concats):
67 | 
68 |             # pick operands
69 |             a          = unused_variables.pop()
70 |             b          = unused_variables.pop()
71 |             concat     = smt_concat(a, b)
72 |             sum_length = model[a].length + model[b].length
73 | 
74 |             # pick a relation
75 |             chosen_relation = choose_relation()
76 | 
77 |             # build constraint
78 |             sum_length_lit = smt_int_lit(sum_length)
79 |             actual_length  = smt_len(concat)
80 |             constraint     = smt_assert(chosen_relation(sum_length_lit, actual_length))
81 | 
82 |             # add constraint
83 |             expressions.append(constraint)
84 | 
85 |     # add sat-check
86 |     expressions.append(smt_check_sat())
87 | 
88 |     # create declarations
89 |     declarations = [smt_declare_var(v) for v in variables]
90 | 
91 |     return declarations + expressions
92 | 
93 | # public API
94 | def lengths(*args, **kwargs):
95 |     smt_reset_counters()
96 |     return make_lengths(*args, **kwargs)
97 | 


--------------------------------------------------------------------------------
/stringfuzz/generators/overlaps.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from stringfuzz.scanner import ALPHABET
 4 | from stringfuzz.smt import *
 5 | from stringfuzz.util import join_terms_with, random_string
 6 | 
 7 | __all__ = [
 8 |     'overlaps',
 9 | ]
10 | 
11 | def make_overlaps(num_vars, length_of_consts):
12 | 
13 |     # check args
14 |     if num_vars < 1:
15 |         raise ValueError('the number of variables must be at least 1')
16 | 
17 |     # create constants
18 |     left  = smt_str_lit(random_string(length_of_consts))
19 |     right = smt_str_lit(random_string(length_of_consts))
20 | 
21 |     # create middle variables
22 |     middle_vars = [smt_new_var() for i in range(num_vars)]
23 |     middle      = join_terms_with(middle_vars, smt_concat)
24 | 
25 |     # create overlapping constraint
26 |     left_concat     = smt_concat(left, middle)
27 |     right_concat    = smt_concat(middle, right)
28 |     concat_equality = smt_assert(smt_equal(left_concat, right_concat))
29 | 
30 |     # add constraint and sat-check
31 |     expressions = [
32 |         concat_equality,
33 |         smt_check_sat()
34 |     ]
35 | 
36 |     # create variable declarations
37 |     declarations = []
38 |     for v in middle_vars:
39 |         declarations.append(smt_declare_var(v))
40 | 
41 |     return declarations + expressions
42 | 
43 | # public API
44 | def overlaps(*args, **kwargs):
45 |     smt_reset_counters()
46 |     return make_overlaps(*args, **kwargs)
47 | 


--------------------------------------------------------------------------------
/stringfuzz/generators/random_ast.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import inspect
  3 | 
  4 | from stringfuzz.ast import *
  5 | from stringfuzz.smt import smt_new_var, smt_reset_counters, smt_declare_var
  6 | from stringfuzz.util import random_string, coin_toss
  7 | 
  8 | __all__ = [
  9 |     'random_ast'
 10 | ]
 11 | 
 12 | # constants
 13 | # nodes that have no inputs
 14 | TERMINALS = [
 15 |     ReAllCharNode,
 16 | ]
 17 | 
 18 | # nodes that can take expressions
 19 | NONTERMINALS = [
 20 |     NotNode,
 21 |     GtNode,
 22 |     LtNode,
 23 |     GteNode,
 24 |     LteNode,
 25 |     ContainsNode,
 26 |     AtNode,
 27 |     LengthNode,
 28 |     # IndexOfNode,
 29 |     IndexOf2Node,
 30 |     PrefixOfNode,
 31 |     SuffixOfNode,
 32 |     StringReplaceNode,
 33 |     SubstringNode,
 34 |     InReNode,
 35 |     ReStarNode,
 36 |     RePlusNode,
 37 |     # FromIntNode,
 38 |     # ToIntNode,
 39 | ]
 40 | 
 41 | # nodes that can take only terminals
 42 | ALMOST_TERMINALS = [
 43 |     StrToReNode,
 44 |     ReRangeNode,
 45 | ]
 46 | 
 47 | N_ARY_NONTERMINALS = [
 48 |     ConcatNode,
 49 |     ReConcatNode,
 50 |     AndNode,
 51 |     OrNode,
 52 |     EqualNode,
 53 |     ReUnionNode,
 54 |     ReInterNode,
 55 | ]
 56 | 
 57 | EXPRESSION_SORTS = DECLARABLE_SORTS + [REGEX_SORT]
 58 | 
 59 | # global config
 60 | _max_terms           = 0
 61 | _max_str_lit_length  = 0
 62 | _max_int_lit         = 0
 63 | _literal_probability = 0.0
 64 | _semantically_valid  = False
 65 | 
 66 | # helpers
 67 | def get_all_returning_a(sort, nodes):
 68 |     return list(filter(lambda node: node.returns(sort), nodes))
 69 | 
 70 | def get_terminals(nodes):
 71 |     return filter(lambda node: node.is_terminal(), nodes)
 72 | 
 73 | def make_random_literal(sort):
 74 |     if sort == STRING_SORT:
 75 |         return StringLitNode(random_string(_max_str_lit_length))
 76 | 
 77 |     if sort == INT_SORT:
 78 |         return IntLitNode(random.randint(0, _max_int_lit))
 79 | 
 80 |     if sort == BOOL_SORT:
 81 |         return BoolLitNode(coin_toss())
 82 | 
 83 |     raise ValueError('unknown sort {}'.format(sort))
 84 | 
 85 | def should_choose_literal():
 86 |     global _literal_probability
 87 |     return random.random() < _literal_probability
 88 | 
 89 | def make_random_terminal(variables, sort):
 90 | 
 91 |     if sort == REGEX_SORT:
 92 |         return ReAllCharNode()
 93 | 
 94 |     # randomly choose between a variable or a literal
 95 |     if should_choose_literal():
 96 |         return make_random_literal(sort)
 97 | 
 98 |     return random.choice(variables[sort])
 99 | 
100 | def make_random_expression(variables, sort, depth):
101 |     global _semantically_valid
102 | 
103 |     # if semantics are going to hell, then randomly reinvent the sort
104 |     if _semantically_valid is False:
105 |         sort = random.choice(EXPRESSION_SORTS)
106 | 
107 |     # at depth 0, make a terminal
108 |     if depth < 1:
109 |         return make_random_terminal(variables, sort)
110 | 
111 |     # randomly shrink the depth
112 |     shrunken_depth = random.randint(0, depth - 1)
113 | 
114 |     # get random expression generator
115 |     candidate_nodes = get_all_returning_a(sort, NONTERMINALS)
116 |     expression_node = random.choice(candidate_nodes)
117 |     signature       = expression_node.get_signature()
118 |     num_args        = len(signature)
119 | 
120 |     # if the expression takes any sort, pick one
121 |     if expression_node.accepts(ANY_SORT):
122 |         collapsed_sort = random.choice(EXPRESSION_SORTS)
123 |         signature      = [collapsed_sort for i in range(num_args)]
124 | 
125 |     # generate random arguments
126 |     random_args = [make_random_expression(variables, arg_sort, shrunken_depth) for arg_sort in signature]
127 | 
128 |     # build expression
129 |     expression = expression_node(*random_args)
130 | 
131 |     return expression
132 | 
133 | def generate_assert(variables, depth):
134 |     expression = make_random_expression(variables, BOOL_SORT, depth)
135 |     return AssertNode(expression)
136 | 
137 | def make_random_ast(num_vars, num_asserts, depth, max_terms, max_str_lit_length, max_int_lit, literal_probability, semantically_valid):
138 |     global _max_terms
139 |     global _max_str_lit_length
140 |     global _max_int_lit
141 |     global _literal_probability
142 |     global _semantically_valid
143 | 
144 |     # set global config
145 |     _max_terms           = max_terms
146 |     _max_str_lit_length  = max_str_lit_length
147 |     _max_int_lit         = max_int_lit
148 |     _literal_probability = literal_probability
149 |     _semantically_valid  = semantically_valid
150 | 
151 |     # create variables
152 |     variables = {s: [smt_new_var() for i in range(num_vars)] for s in DECLARABLE_SORTS}
153 | 
154 |     # create declarations
155 |     declarations = []
156 |     for s in DECLARABLE_SORTS:
157 |         new_declarations = [smt_declare_var(v, sort=s) for v in variables[s]]
158 |         declarations.extend(new_declarations)
159 | 
160 |     # create asserts
161 |     asserts = [generate_assert(variables, depth) for i in range(num_asserts)]
162 | 
163 |     # add check-sat
164 |     expressions = asserts + [CheckSatNode()]
165 | 
166 |     return declarations + expressions
167 | 
168 | # public API
169 | def random_ast(*args, **kwargs):
170 |     smt_reset_counters()
171 |     return make_random_ast(*args, **kwargs)
172 | 


--------------------------------------------------------------------------------
/stringfuzz/generators/random_text.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from stringfuzz.scanner import ALPHABET, WHITESPACE
 4 | 
 5 | __all__ = [
 6 |     'random_text',
 7 | ]
 8 | 
 9 | # constants
10 | ALL_CHARS = ALPHABET + WHITESPACE
11 | 
12 | # functions
13 | def make_random_text(length):
14 |     return ''.join(random.choice(ALL_CHARS) for i in range(length))
15 | 
16 | # public API
17 | def random_text(*args, **kwargs):
18 |     return make_random_text(*args, **kwargs)
19 | 


--------------------------------------------------------------------------------
/stringfuzz/generators/regex.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import re
  3 | 
  4 | from stringfuzz.scanner import ALPHABET
  5 | from stringfuzz.smt import *
  6 | from stringfuzz.util import join_terms_with, random_string, coin_toss
  7 | 
  8 | __all__ = [
  9 |     'regex',
 10 |     'INCREASING_LITERALS',
 11 |     'RANDOM_LITERALS',
 12 |     'MEMBER_IN',
 13 |     'MEMBER_NOT_IN',
 14 |     'MEMBER_ALTERNATING',
 15 |     'MEMBER_RANDOM',
 16 |     'OPERATOR_STAR',
 17 |     'OPERATOR_PLUS',
 18 |     'OPERATOR_UNION',
 19 |     'OPERATOR_INTER',
 20 |     'OPERATOR_CONCAT',
 21 |     'OPERATOR_ALTERNATING',
 22 |     'OPERATOR_RANDOM',
 23 | ]
 24 | 
 25 | # constants
 26 | INCREASING_LITERALS = 'increasing'
 27 | RANDOM_LITERALS     = 'random'
 28 | 
 29 | LITERAL_TYPES = [
 30 |     INCREASING_LITERALS,
 31 |     RANDOM_LITERALS,
 32 | ]
 33 | 
 34 | MEMBER_IN          = 'in'
 35 | MEMBER_NOT_IN      = 'not-in'
 36 | MEMBER_ALTERNATING = 'alternating'
 37 | MEMBER_RANDOM      = 'random'
 38 | 
 39 | MEMBERSHIP_TYPES = [
 40 |     MEMBER_IN,
 41 |     MEMBER_NOT_IN,
 42 |     MEMBER_ALTERNATING,
 43 |     MEMBER_RANDOM,
 44 | ]
 45 | 
 46 | OPERATOR_STAR   = 's'
 47 | OPERATOR_PLUS   = 'p'
 48 | OPERATOR_UNION  = 'u'
 49 | OPERATOR_INTER  = 'i'
 50 | OPERATOR_CONCAT = 'c'
 51 | 
 52 | OPERATOR_LIST = [
 53 |     OPERATOR_STAR,
 54 |     OPERATOR_PLUS,
 55 |     OPERATOR_UNION,
 56 |     OPERATOR_INTER,
 57 |     OPERATOR_CONCAT,
 58 | ]
 59 | 
 60 | OPERATOR_ALTERNATING = 'alternating'
 61 | OPERATOR_RANDOM      = 'random'
 62 | 
 63 | OPERATOR_TYPES = [
 64 |     OPERATOR_ALTERNATING,
 65 |     OPERATOR_RANDOM,
 66 | ]
 67 | 
 68 | # global config
 69 | # NOTE:
 70 | #      using globals because it's annoying to pass around a bunch of variables
 71 | _cursor       = 0
 72 | _literal_type = None
 73 | _literal_min  = 1
 74 | _literal_max  = 1
 75 | 
 76 | # helpers
 77 | def fill_string(character, length):
 78 |     return character * length
 79 | 
 80 | def get_char_and_advance():
 81 |     global _cursor
 82 |     character = ALPHABET[_cursor]
 83 |     _cursor   = (_cursor + 1) % len(ALPHABET)
 84 |     return character
 85 | 
 86 | def make_regex_string(min_length, max_length):
 87 |     global _literal_type
 88 | 
 89 |     chosen_length = random.randint(min_length, max_length)
 90 | 
 91 |     # use a fixed-length string of one character, each time using
 92 |     # the next character from the alphabet
 93 |     if _literal_type == INCREASING_LITERALS:
 94 |         filler = get_char_and_advance()
 95 |         string = fill_string(filler, chosen_length)
 96 | 
 97 |     # generate a random string
 98 |     elif _literal_type == RANDOM_LITERALS:
 99 |         string = random_string(chosen_length)
100 | 
101 |     return smt_str_to_re(smt_str_lit(string))
102 | 
103 | def make_random_term(depth, operator_index):
104 |     if depth == 0:
105 |         return make_regex_string(_literal_min, _literal_max)
106 | 
107 |     if _operator_type == OPERATOR_ALTERNATING:
108 |         next_operator_index = operator_index + 1
109 |     else:
110 |         next_operator_index = random.randrange(len(_operator_list))
111 | 
112 |     operator = get_operator_at_index(operator_index)
113 |     subterm = make_random_term(depth - 1, next_operator_index)
114 | 
115 |     if operator == OPERATOR_STAR:
116 |         return smt_regex_star(subterm)
117 | 
118 |     if operator == OPERATOR_PLUS:
119 |         return smt_regex_plus(subterm)
120 | 
121 |     if operator == OPERATOR_UNION:
122 |         second_subterm = make_random_term(depth - 1, next_operator_index)
123 |         return smt_regex_union(subterm, second_subterm)
124 | 
125 |     if operator == OPERATOR_INTER:
126 |         second_subterm = make_random_term(depth - 1, next_operator_index)
127 |         return smt_regex_inter(subterm, second_subterm)
128 | 
129 |     if operator == OPERATOR_CONCAT:
130 |         second_subterm = make_random_term(depth - 1, next_operator_index)
131 |         return smt_regex_concat(subterm, second_subterm)
132 | 
133 | def make_random_terms(num_terms, depth):
134 |     if _operator_type == OPERATOR_ALTERNATING:
135 |         terms = [make_random_term(depth, 0) for i in range(num_terms)]
136 |     else:
137 |         terms = [make_random_term(depth, random.randrange(len(_operator_list))) for i in range(num_terms)]
138 | 
139 |     regex = join_terms_with(terms, smt_regex_concat)
140 |     return regex
141 | 
142 | def toggle_membership_type(t):
143 |     if t == MEMBER_IN:
144 |         return MEMBER_NOT_IN
145 |     return MEMBER_IN
146 | 
147 | def get_operator_at_index(index):
148 |     global _operator_list
149 | 
150 |     return _operator_list[index % len(_operator_list)]
151 | 
152 | def make_constraint(variable, r):
153 |     global _configured_membership
154 |     global _current_membership
155 | 
156 |     # if random, set the membership type randomly
157 |     if _configured_membership == MEMBER_RANDOM:
158 |         if coin_toss():
159 |             _current_membership = MEMBER_IN
160 |         else:
161 |             _current_membership = MEMBER_NOT_IN
162 | 
163 |     # if toggle, toggle membership type
164 |     elif _configured_membership == MEMBER_ALTERNATING:
165 |         _current_membership = toggle_membership_type(_current_membership)
166 | 
167 |     # create constraint
168 |     constraint = smt_regex_in(variable, r)
169 | 
170 |     # negate it if required
171 |     if _current_membership == MEMBER_NOT_IN:
172 |         constraint = smt_not(constraint)
173 | 
174 |     return constraint
175 | 
176 | def make_regex(
177 |     num_regexes,
178 |     num_terms,
179 |     literal_min,
180 |     literal_max,
181 |     term_depth,
182 |     literal_type,
183 |     membership_type,
184 |     reset_alphabet,
185 |     max_var_length,
186 |     min_var_length,
187 |     operators,
188 |     operator_type,
189 | ):
190 | 
191 |     # check args
192 |     if num_regexes < 1:
193 |         raise ValueError('number of regexes must be greater than 0')
194 | 
195 |     if num_terms < 1:
196 |         raise ValueError('number of terms must be greater than 0')
197 | 
198 |     if literal_min < 1:
199 |         raise ValueError('min literal length must be greater than 0')
200 | 
201 |     if literal_max < 1:
202 |         raise ValueError('max literal length must be greater than 0')
203 | 
204 |     if literal_max < literal_min:
205 |         raise ValueError('max literal length must not be less than min literal length')
206 | 
207 |     if term_depth < 0:
208 |         raise ValueError('depths of terms must not be less than 0')
209 | 
210 |     if literal_type not in LITERAL_TYPES:
211 |         raise ValueError('unknown literal type: {!r}'.format(literal_type))
212 | 
213 |     if membership_type not in MEMBERSHIP_TYPES:
214 |         raise ValueError('unknown membership type: {!r}'.format(membership_type))
215 | 
216 |     if min_var_length is not None and min_var_length < 0:
217 |         raise ValueError('min variable length must not be less than 0')
218 | 
219 |     if max_var_length is not None and max_var_length < 0:
220 |         raise ValueError('max variable length must not be less than 0')
221 | 
222 |     if len(operators) < 1 or any(map(lambda x: x not in OPERATOR_LIST, operators)):
223 |         raise ValueError('invalid operators: {!r}'.format(operators))
224 | 
225 |     if operator_type not in OPERATOR_TYPES:
226 |         raise ValueError('unknown operator type: {!r}'.format(operator_type))
227 | 
228 |     # set globals
229 |     global _cursor
230 |     global _literal_type
231 |     global _configured_membership
232 |     global _current_membership
233 |     global _literal_min
234 |     global _literal_max
235 |     global _operator_list
236 |     global _operator_type
237 | 
238 |     _cursor                = 0
239 |     _literal_type          = literal_type
240 |     _configured_membership = membership_type
241 |     _current_membership    = _configured_membership
242 |     _literal_min           = literal_min
243 |     _literal_max           = literal_max
244 |     _operator_list         = []
245 |     _operator_type         = operator_type
246 | 
247 |     # parse operator list in order, in case user wants a custom alternation order
248 |     for c in operators:
249 |         if c not in _operator_list:
250 |             _operator_list.append(c)
251 | 
252 |     # create variable
253 |     matched = smt_new_var()
254 | 
255 |     # create regexes
256 |     regexes = []
257 |     for i in range(num_regexes):
258 | 
259 |         # reset alphabet for every regex if required
260 |         if reset_alphabet is True:
261 |             _cursor = 0
262 | 
263 |         new_regex = make_random_terms(num_terms, term_depth)
264 |         regexes.append(new_regex)
265 | 
266 |     # create regex constraints
267 |     expressions = []
268 |     for r in regexes:
269 |         constraint = make_constraint(matched, r)
270 |         expressions.append(smt_assert(constraint))
271 | 
272 |     # create length constraints if required
273 |     if min_var_length is not None:
274 |         min_bound = smt_int_lit(min_var_length)
275 |         equality  = smt_lte(min_bound, smt_len(matched))
276 |         expressions.append(smt_assert(equality))
277 | 
278 |     if max_var_length is not None:
279 |         max_bound = smt_int_lit(max_var_length)
280 |         equality  = smt_lte(smt_len(matched), max_bound)
281 |         expressions.append(smt_assert(equality))
282 | 
283 |     # add sat check
284 |     expressions.append(smt_check_sat())
285 | 
286 |     # create declarations
287 |     declarations = [
288 |         smt_declare_var(matched)
289 |     ]
290 | 
291 |     return declarations + expressions
292 | 
293 | # public API
294 | def regex(*args, **kwargs):
295 |     smt_reset_counters()
296 |     return make_regex(*args, **kwargs)
297 | 


--------------------------------------------------------------------------------
/stringfuzz/mergers/__init__.py:
--------------------------------------------------------------------------------
1 | from stringfuzz.mergers.simple import *
2 | 


--------------------------------------------------------------------------------
/stringfuzz/mergers/simple.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | 
 3 | from stringfuzz.ast import ExpressionNode, SortNode, IdentifierNode, FunctionDeclarationNode, SortedVarNode
 4 | from stringfuzz.ast_walker import ASTWalker
 5 | 
 6 | __all__ = [
 7 |     'simple'
 8 | ]
 9 | 
10 | def alternate_merge(asts, merged):
11 |     for ast in asts:
12 |         if ast:
13 |             node = ast.pop(0)
14 |             if not node in merged:
15 |                 merged.append(node)
16 |     if any(asts):
17 |         merged = alternate_merge(asts, merged)
18 |     return merged
19 | 
20 | class RenameIDWalker(ASTWalker):
21 |     def __init__(self, ast, suffix):
22 |         super(RenameIDWalker, self).__init__(ast)
23 |         self.suffix = suffix
24 | 
25 |     def exit_identifier(self, identifier, parent):
26 |         identifier.name += "_{}".format(self.suffix)
27 | 
28 | def simple(asts, rename_ids):
29 |     if rename_ids:
30 |         for i in range(len(asts)):
31 |             asts[i] = RenameIDWalker(asts[i], i).walk()
32 |     merged = alternate_merge(asts, [])
33 |     return merged
34 | 


--------------------------------------------------------------------------------
/stringfuzz/parser.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from stringfuzz.scanner import scan
  4 | from stringfuzz.ast import *
  5 | from stringfuzz.util import join_terms_with
  6 | 
  7 | __all__ = [
  8 |     'parse',
  9 |     'parse_file',
 10 |     'parse_tokens',
 11 |     'ParsingError',
 12 | ]
 13 | 
 14 | # constants
 15 | MAX_ERROR_SIZE = 200
 16 | UNDERLINE      = '-'
 17 | 
 18 | MESSAGE_FORMAT = '''Parsing error on line {number}:
 19 | 
 20 | {context}{actual_value}
 21 | {underline}^
 22 | {filler}expected {expected}, got {actual_type} {actual_value!r}'''
 23 | 
 24 | # data structures
 25 | class Stream(object):
 26 | 
 27 |     def __init__(self, tokens, text):
 28 |         self.text = text
 29 |         self.current_token = None
 30 |         self.stream = (t for t in tokens)
 31 | 
 32 |     def advance(self):
 33 |         self.current_token = next(self.stream, None)
 34 | 
 35 |     def accept(self, name):
 36 |         if self.current_token is not None and self.current_token.name == name:
 37 |             self.advance()
 38 |             return True
 39 |         return False
 40 | 
 41 |     def peek(self):
 42 |         return self.current_token
 43 | 
 44 |     def expect(self, expected):
 45 |         previous = self.current_token
 46 |         if self.accept(expected):
 47 |             return previous
 48 |         raise ParsingError(expected, self)
 49 | 
 50 | class ParsingError(IndexError):
 51 |     def __init__(self, expected, stream):
 52 | 
 53 |         # compute actual value
 54 |         actual_token = stream.current_token
 55 |         if actual_token is not None:
 56 |             actual_type  = actual_token.name
 57 |             actual_value = actual_token.value
 58 |             error_index  = actual_token.position
 59 |         else:
 60 |             actual_type  = 'nothing'
 61 |             actual_value = ''
 62 |             error_index  = len(stream.text) - 1
 63 | 
 64 |         # get error context
 65 |         parsed_text = stream.text[0:error_index]
 66 |         context     = parsed_text[-MAX_ERROR_SIZE:]
 67 | 
 68 |         if len(context) < len(parsed_text):
 69 |             context = '... ' + context
 70 | 
 71 |         # find row and column of error
 72 |         try:
 73 |             latest_newline_index = parsed_text.rindex('\n')
 74 |         except ValueError as e:
 75 |             latest_newline_index = 0
 76 | 
 77 |         error_row    = parsed_text.count('\n') + 1
 78 |         error_column = error_index - latest_newline_index - 1
 79 | 
 80 |         # compose message
 81 |         message = MESSAGE_FORMAT.format(
 82 |             number       = error_row,
 83 |             context      = context,
 84 |             underline    = (UNDERLINE * error_column),
 85 |             filler       = (' ' * error_column),
 86 |             expected     = expected,
 87 |             actual_type  = actual_type,
 88 |             actual_value = actual_value,
 89 |         )
 90 | 
 91 |         # pass message to superclass
 92 |         super().__init__(message)
 93 | 
 94 | # parsers
 95 | def accept_arg(s):
 96 |     token = s.peek()
 97 | 
 98 |     # nested expression
 99 |     if s.accept('LPAREN'):
100 |         expression = expect_expression(s)
101 |         s.expect('RPAREN')
102 |         return expression
103 | 
104 |     # literal
105 |     if s.accept('BOOL_LIT'):
106 |         if token.value == 'true':
107 |             return BoolLitNode(True)
108 |         elif token.value == 'false':
109 |             return BoolLitNode(False)
110 | 
111 |     if s.accept('INT_LIT'):
112 |         return IntLitNode(int(token.value))
113 | 
114 |     if s.accept('STRING_LIT'):
115 |         return StringLitNode(token.value)
116 | 
117 |     # others
118 |     if s.accept('RE_ALLCHAR'):
119 |         return ReAllCharNode()
120 | 
121 |     if s.accept('IDENTIFIER'):
122 |         return IdentifierNode(token.value)
123 | 
124 |     if s.accept('SETTING'):
125 |         return SettingNode(token.value)
126 | 
127 |     return None
128 | 
129 | def accept_meta_arg(s):
130 |     arg = s.peek()
131 | 
132 |     if (
133 |         s.accept('BOOL_LIT') or
134 |         s.accept('INT_LIT') or
135 |         s.accept('STRING_LIT') or
136 |         s.accept('IDENTIFIER')
137 |     ):
138 |         return MetaDataNode(arg.value)
139 | 
140 |     if s.accept('SETTING'):
141 |         return SettingNode(arg.value)
142 | 
143 |     return None
144 | 
145 | def expect_identifier(s):
146 |     token = s.expect('IDENTIFIER')
147 |     return IdentifierNode(token.value)
148 | 
149 | def expect_arg(s):
150 |     result = accept_arg(s)
151 | 
152 |     if result is None:
153 |         raise ParsingError('an argument', s)
154 | 
155 |     return result
156 | 
157 | def expect_sort(s):
158 |     result = accept_sort(s)
159 | 
160 |     if result is None:
161 |         raise ParsingError('a sort', s)
162 | 
163 |     return result
164 | 
165 | def repeat_star(s, getter):
166 |     terms = []
167 | 
168 |     while True:
169 |         term = getter(s)
170 | 
171 |         # break on no term
172 |         if term is None:
173 |             break
174 | 
175 |         terms.append(term)
176 | 
177 |     return terms
178 | 
179 | def accept_sort(s):
180 | 
181 |     # compound sort
182 |     if s.accept('LPAREN'):
183 |         symbol = expect_identifier(s)
184 |         sorts  = [expect_sort(s)]
185 |         sorts += repeat_star(s, accept_sort)
186 |         s.expect('RPAREN')
187 |         return CompoundSortNode(symbol, sorts)
188 | 
189 |     # atomic sort
190 |     token = s.peek()
191 |     if s.accept('IDENTIFIER'):
192 |         return AtomicSortNode(token.value)
193 | 
194 |     return None
195 | 
196 | def accept_sorted_var(s):
197 |     if s.accept('LPAREN'):
198 |         name = expect_identifier(s)
199 |         sort = expect_sort(s)
200 |         s.expect('RPAREN')
201 |         return SortedVarNode(name, sort)
202 | 
203 |     return None
204 | 
205 | def expect_expression(s):
206 | 
207 |     if s.accept('ASSERT'):
208 |         assertion = expect_arg(s)
209 |         return AssertNode(assertion)
210 | 
211 |     # declarations and definitions
212 |     if s.accept('DECLARE_FUN'):
213 |         name = expect_identifier(s)
214 | 
215 |         s.expect('LPAREN')
216 |         signature = repeat_star(s, accept_sort)
217 |         s.expect('RPAREN')
218 | 
219 |         return_sort = expect_sort(s)
220 | 
221 |         return FunctionDeclarationNode(name, BracketsNode(signature), return_sort)
222 | 
223 |     if s.accept('DEFINE_FUN'):
224 |         name = expect_identifier(s)
225 | 
226 |         s.expect('LPAREN')
227 |         signature = repeat_star(s, accept_sorted_var)
228 |         s.expect('RPAREN')
229 | 
230 |         return_sort = expect_sort(s)
231 | 
232 |         s.expect('LPAREN')
233 |         body = expect_expression(s)
234 |         s.expect('RPAREN')
235 | 
236 |         return FunctionDefinitionNode(name, BracketsNode(signature), return_sort, body)
237 | 
238 |     if s.accept('DECLARE_CONST'):
239 |         name        = expect_identifier(s)
240 |         return_sort = expect_sort(s)
241 |         return ConstantDeclarationNode(name, return_sort)
242 | 
243 |     # special expression cases
244 |     if s.accept('CONCAT'):
245 | 
246 |         # first two args are mandatory
247 |         a = expect_arg(s)
248 |         b = expect_arg(s)
249 | 
250 |         # more args are optional
251 |         other_args = repeat_star(s, accept_arg)
252 | 
253 |         # re-format n-ary concats into binary concats
254 |         concat = join_terms_with([a, b] + other_args, ConcatNode)
255 | 
256 |         return concat
257 | 
258 |     if s.accept('CONTAINS'):
259 |         a = expect_arg(s)
260 |         b = expect_arg(s)
261 |         return ContainsNode(a, b)
262 | 
263 |     if s.accept('AT'):
264 |         a = expect_arg(s)
265 |         b = expect_arg(s)
266 |         return AtNode(a, b)
267 | 
268 |     if s.accept('LENGTH'):
269 |         a = expect_arg(s)
270 |         return LengthNode(a)
271 | 
272 |     if s.accept('INDEXOFVAR'):
273 | 
274 |         # two arguments are expected
275 |         a = expect_arg(s)
276 |         b = expect_arg(s)
277 | 
278 |         # the third argument may or may not be there
279 |         c = accept_arg(s)
280 | 
281 |         if c is not None:
282 |             return IndexOf2Node(a, b, c)
283 | 
284 |         return IndexOfNode(a, b)
285 | 
286 |     if s.accept('INDEXOF'):
287 |         a = expect_arg(s)
288 |         b = expect_arg(s)
289 |         return IndexOfNode(a, b)
290 | 
291 |     if s.accept('INDEXOF2'):
292 |         a = expect_arg(s)
293 |         b = expect_arg(s)
294 |         c = expect_arg(s)
295 |         return IndexOf2Node(a, b, c)
296 | 
297 |     if s.accept('PREFIXOF'):
298 |         a = expect_arg(s)
299 |         b = expect_arg(s)
300 |         return PrefixOfNode(a, b)
301 | 
302 |     if s.accept('SUFFIXOF'):
303 |         a = expect_arg(s)
304 |         b = expect_arg(s)
305 |         return SuffixOfNode(a, b)
306 | 
307 |     if s.accept('REPLACE'):
308 |         a = expect_arg(s)
309 |         b = expect_arg(s)
310 |         c = expect_arg(s)
311 |         return StringReplaceNode(a, b, c)
312 | 
313 |     if s.accept('SUBSTRING'):
314 |         a = expect_arg(s)
315 |         b = expect_arg(s)
316 |         c = expect_arg(s)
317 |         return SubstringNode(a, b, c)
318 | 
319 |     if s.accept('FROM_INT'):
320 |         a = expect_arg(s)
321 |         return FromIntNode(a)
322 | 
323 |     if s.accept('TO_INT'):
324 |         a = expect_arg(s)
325 |         return ToIntNode(a)
326 | 
327 |     if s.accept('IN_RE'):
328 |         a = expect_arg(s)
329 |         b = expect_arg(s)
330 |         return InReNode(a, b)
331 | 
332 |     if s.accept('STR_TO_RE'):
333 |         a = expect_arg(s)
334 |         return StrToReNode(a)
335 | 
336 |     if s.accept('RE_CONCAT'):
337 |         # first two args are mandatory
338 |         a = expect_arg(s)
339 |         b = expect_arg(s)
340 | 
341 |         # more args are optional
342 |         other_args = repeat_star(s, accept_arg)
343 | 
344 |         # re-format n-ary concats into binary concats
345 |         concat = join_terms_with([a, b] + other_args, ReConcatNode)
346 | 
347 |         return concat
348 | 
349 |     if s.accept('RE_STAR'):
350 |         a = expect_arg(s)
351 |         return ReStarNode(a)
352 | 
353 |     if s.accept('RE_PLUS'):
354 |         a = expect_arg(s)
355 |         return RePlusNode(a)
356 | 
357 |     if s.accept('RE_RANGE'):
358 |         a = expect_arg(s)
359 |         b = expect_arg(s)
360 |         return ReRangeNode(a, b)
361 | 
362 |     if s.accept('RE_UNION'):
363 | 
364 |         # first two args are mandatory
365 |         a = expect_arg(s)
366 |         b = expect_arg(s)
367 | 
368 |         # more args are optional
369 |         other_args = repeat_star(s, accept_arg)
370 | 
371 |         # re-format n-ary unions into binary unions
372 |         union = join_terms_with([a, b] + other_args, ReUnionNode)
373 | 
374 |         return union
375 | 
376 |     if s.accept('RE_INTER'):
377 | 
378 |         # first two args are mandatory
379 |         a = expect_arg(s)
380 |         b = expect_arg(s)
381 | 
382 |         # more args are optional
383 |         other_args = repeat_star(s, accept_arg)
384 | 
385 |         # re-format n-ary intersections into binary intersections
386 |         inter = join_terms_with([a, b] + other_args, ReInterNode)
387 | 
388 |         return inter
389 | 
390 |     token = s.peek()
391 |     if s.accept('META_COMMAND'):
392 |         body = repeat_star(s, accept_meta_arg)
393 |         return MetaCommandNode(token.value, *body)
394 | 
395 |     # generic expression case
396 |     name = expect_identifier(s)
397 |     body = repeat_star(s, accept_arg)
398 | 
399 |     return GenericExpressionNode(name, *body)
400 | 
401 | def get_expressions(s):
402 | 
403 |     expressions = []
404 |     s.advance()
405 | 
406 |     while s.peek() is not None:
407 |         s.expect('LPAREN')
408 |         expressions.append(expect_expression(s))
409 |         s.expect('RPAREN')
410 | 
411 |     return expressions
412 | 
413 | # public API
414 | def parse_file(path, language):
415 |     with open(path, 'r') as file:
416 |         return parse(file.read(), language)
417 | 
418 | def parse(text, language):
419 |     return parse_tokens(scan(text, language), language, text)
420 | 
421 | def parse_tokens(tokens, language, text):
422 |     return get_expressions(Stream(tokens, text))
423 | 


--------------------------------------------------------------------------------
/stringfuzz/scanner.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import string
  3 | 
  4 | from stringfuzz.constants import *
  5 | 
  6 | __all__ = [
  7 |     'scan',
  8 |     'scan_file',
  9 |     'ScanningError',
 10 |     'ALPHABET',
 11 |     'WHITESPACE',
 12 | ]
 13 | 
 14 | # data structures
 15 | class ScanningError(ValueError):
 16 |     pass
 17 | 
 18 | class Token(object):
 19 | 
 20 |     def __init__(self, name, value, position):
 21 |         self.name = name
 22 |         self.value = value
 23 |         self.position = position
 24 | 
 25 |     def __str__(self):
 26 |         return self.value
 27 | 
 28 |     def __repr__(self):
 29 |         return '{} {!r} @ {}'.format(self.name, self.value, self.position)
 30 | 
 31 | # helpers
 32 | def strip_quotes(string_literal):
 33 |     return string_literal[1:-1]
 34 | 
 35 | def unescape(string_literal):
 36 |     return string_literal.encode().decode('unicode_escape')
 37 | 
 38 | def replace_double_quotes(string_literal):
 39 |     return string_literal.replace("\"\"", "\"")
 40 | 
 41 | # token functions
 42 | def make_whitespace(s, w): return Token('WHITESPACE', w,       s.match.start())
 43 | def make_identifier(s, w): return Token('IDENTIFIER', w,       s.match.start())
 44 | def make_lparen(s, w):     return Token('LPAREN',     w,       s.match.start())
 45 | def make_rparen(s, w):     return Token('RPAREN',     w,       s.match.start())
 46 | def make_setting(s, w):    return Token('SETTING',    w,       s.match.start())
 47 | def make_bool_lit(s, w):   return Token('BOOL_LIT',   w,       s.match.start())
 48 | def make_int_lit(s, w):    return Token('INT_LIT',    w,       s.match.start())
 49 | def make_sym(s, w):        return Token('IDENTIFIER', w,       s.match.start())
 50 | 
 51 | def make_string_lit(s, w):
 52 |     literal = unescape(strip_quotes(w))
 53 |     return Token('STRING_LIT', literal, s.match.start())
 54 | 
 55 | def make_string_lit_25(s, w):
 56 |     literal = replace_double_quotes(unescape(strip_quotes(w)))
 57 |     return Token('STRING_LIT', literal, s.match.start())
 58 | 
 59 | # specific symbol tokens
 60 | def make_meta_command(s, w):     return Token('META_COMMAND',  w, s.match.start())
 61 | def make_declare_fun(s, w):      return Token('DECLARE_FUN',   w, s.match.start())
 62 | def make_define_fun(s, w):       return Token('DEFINE_FUN',    w, s.match.start())
 63 | def make_declare_const(s, w):    return Token('DECLARE_CONST', w, s.match.start())
 64 | def make_assert(s, w):           return Token('ASSERT',        w, s.match.start())
 65 | def make_contains(s, w):         return Token('CONTAINS',      w, s.match.start())
 66 | def make_concat(s, w):           return Token('CONCAT',        w, s.match.start())
 67 | def make_at(s, w):               return Token('AT',            w, s.match.start())
 68 | def make_indexof_var_args(s, w): return Token('INDEXOFVAR',    w, s.match.start())
 69 | def make_indexof_2_args(s, w):   return Token('INDEXOF',       w, s.match.start())
 70 | def make_indexof_3_args(s, w):   return Token('INDEXOF2',      w, s.match.start())
 71 | def make_prefixof(s, w):         return Token('PREFIXOF',      w, s.match.start())
 72 | def make_suffixof(s, w):         return Token('SUFFIXOF',      w, s.match.start())
 73 | def make_replace(s, w):          return Token('REPLACE',       w, s.match.start())
 74 | def make_substring(s, w):        return Token('SUBSTRING',     w, s.match.start())
 75 | def make_str_from_int(s, w):     return Token('FROM_INT',      w, s.match.start())
 76 | def make_str_to_int(s, w):       return Token('TO_INT',        w, s.match.start())
 77 | def make_length(s, w):           return Token('LENGTH',        w, s.match.start())
 78 | def make_in_re(s, w):            return Token('IN_RE',         w, s.match.start())
 79 | def make_str_to_re(s, w):        return Token('STR_TO_RE',     w, s.match.start())
 80 | def make_re_allchar(s, w):       return Token('RE_ALLCHAR',    w, s.match.start())
 81 | def make_re_concat(s, w):        return Token('RE_CONCAT',     w, s.match.start())
 82 | def make_re_star(s, w):          return Token('RE_STAR',       w, s.match.start())
 83 | def make_re_plus(s, w):          return Token('RE_PLUS',       w, s.match.start())
 84 | def make_re_range(s, w):         return Token('RE_RANGE',      w, s.match.start())
 85 | def make_re_union(s, w):         return Token('RE_UNION',      w, s.match.start())
 86 | def make_re_inter(s, w):         return Token('RE_INTER',      w, s.match.start())
 87 | 
 88 | # constants
 89 | ALPHABET     = string.digits + string.ascii_letters + string.punctuation
 90 | WHITESPACE   = string.whitespace
 91 | ID_CHAR      = r'[\w._\+\-\*\=%?!$_~&^<>@/|:\\]'
 92 | SETTING_CHAR = r'[\w._\+\-\*\=%?!$_~&^<>@/|:]'
 93 | 
 94 | # token lists
 95 | # NOTE:
 96 | #      more specific patterns (e.g. reserved words) have to come before more
 97 | #      general patterns (e.g. identifiers) because otherwise the more general
 98 | #      pattern will match before the more specific one
 99 | SMT_20_TOKENS = [
100 | 
101 |     # Boolean functions
102 |     (r'ite', make_sym),
103 |     (r'not', make_sym),
104 |     (r'and', make_sym),
105 |     (r'or',  make_sym),
106 | 
107 |     # commands
108 |     (r'set-logic',        make_meta_command),
109 |     (r'set-option',       make_meta_command),
110 |     (r'set-info',         make_meta_command),
111 |     (r'declare-sort',     make_sym),
112 |     (r'define-sort',      make_sym),
113 |     (r'declare-fun',      make_declare_fun),
114 |     (r'define-fun',       make_define_fun),
115 |     (r'declare-const',    make_sym),
116 |     (r'define-const',     make_declare_const),
117 |     (r'declare-variable', make_sym),
118 |     (r'define-variable',  make_sym),
119 |     (r'push',             make_sym),
120 |     (r'pop',              make_sym),
121 |     (r'assert',           make_assert),
122 |     (r'check-sat',        make_sym),
123 |     (r'get-assertions',   make_sym),
124 |     (r'get-proof',        make_sym),
125 |     (r'get-model',        make_sym),
126 |     (r'get-unsat-core',   make_sym),
127 |     (r'get-value',        make_sym),
128 |     (r'get-assignment',   make_sym),
129 |     (r'get-option',       make_sym),
130 |     (r'get-info',         make_sym),
131 |     (r'exit',             make_sym),
132 | 
133 |     # math operators
134 |     (r'\+',  make_sym),
135 |     (r'-',   make_sym),
136 |     (r'\*',  make_sym),
137 |     (r'=',   make_sym),
138 |     (r'<=',  make_sym),
139 |     (r'<',   make_sym),
140 |     (r'>=',  make_sym),
141 |     (r'>',   make_sym),
142 |     (r'div', make_sym),
143 | 
144 |     # whitespace
145 |     (r'\s+', make_whitespace),
146 | 
147 |     # parens
148 |     (r'\(', make_lparen),
149 |     (r'\)', make_rparen),
150 | 
151 |     # boolean literals
152 |     (r'true',  make_bool_lit),
153 |     (r'false', make_bool_lit),
154 | 
155 |     # int literals: digits not followed by identifier characters
156 |     (r'\d+(?!' + ID_CHAR + r')', make_int_lit),
157 | 
158 |     # comments
159 |     (r';[^\n]*', make_whitespace),
160 |     (r'//[^\n]*', make_whitespace),
161 | 
162 |     # settings: can use most characters, and start with colons
163 |     (r':' + SETTING_CHAR + r'+', make_setting),
164 | 
165 |     # identifiers: can use most characters, but can't start with digits
166 |     (ID_CHAR + r'(?<![\d])' + ID_CHAR + r'*', make_identifier),
167 | ]
168 | 
169 | SMT_20_STRING_TOKENS = [
170 | 
171 |     # string
172 |     (r'Concat',     make_concat),
173 |     (r'CharAt',     make_at),
174 |     (r'Contains',   make_contains),
175 |     (r'Length',     make_length),
176 |     (r'Indexof2',   make_indexof_3_args),
177 |     (r'IndexOf2',   make_indexof_3_args),
178 |     (r'StartsWith', make_prefixof),
179 |     (r'EndsWith',   make_suffixof),
180 |     (r'Replace',    make_replace),
181 |     (r'Substring',  make_substring),
182 | 
183 |     # regex
184 |     (r'Str2Reg',        make_str_to_re),
185 |     (r'RegexIn',        make_in_re),
186 |     (r'RegexStar',      make_re_star),
187 |     (r'RegexConcat',    make_re_concat),
188 |     (r'RegexPlus',      make_re_plus),
189 |     (r'RegexCharRange', make_re_range),
190 |     (r'RegexUnion',     make_re_union),
191 |     (r'RegexInter',     make_re_inter),
192 | 
193 |     # unique
194 |     (r'Indexof',     make_indexof_2_args),
195 |     (r'IndexOf',     make_indexof_2_args),
196 |     (r'RegexDigit',  make_sym),
197 |     (r'LastIndexOf', make_sym),
198 |     (r'LastIndexof', make_sym),
199 | 
200 |     # quotes
201 |     (r'"(?:\\.|[^\\"])*"', make_string_lit),
202 | ]
203 | 
204 | SMT_25_STRING_TOKENS = [
205 | 
206 |     # string
207 |     (r'str\.\+\+',     make_concat),
208 |     (r'str\.at',       make_at),
209 |     (r'str\.contains', make_contains),
210 |     (r'str\.len',      make_length),
211 |     (r'str\.indexof',  make_indexof_var_args),
212 |     (r'str\.prefixof', make_prefixof),
213 |     (r'str\.suffixof', make_suffixof),
214 |     (r'str\.replace',  make_replace),
215 |     (r'str\.substr',   make_substring),
216 | 
217 |     # regex
218 |     (r'str\.to\.re',   make_str_to_re),
219 |     (r'str\.in\.re',   make_in_re),
220 |     (r're\.\*',        make_re_star),
221 |     (r're\.\+\+',      make_re_concat),
222 |     (r're\.\+',        make_re_plus),
223 |     (r're\.range',     make_re_range),
224 |     (r're\.union',     make_re_union),
225 |     (r're\.inter',     make_re_inter),
226 |     (r're\.allchar',   make_re_allchar),
227 |     (r're\.all',       make_re_allchar),
228 | 
229 |     # integer
230 |     (r'str\.from\.int', make_str_from_int),
231 |     (r'str\.to\.int',   make_str_to_int),
232 | 
233 |     # quotes
234 |     (r'"(?:""|[^"])*"', make_string_lit_25),
235 | ]
236 | 
237 | # lexicons
238 | SMT_20_LEXICON        = SMT_20_TOKENS
239 | SMT_20_STRING_LEXICON = SMT_20_STRING_TOKENS + SMT_20_LEXICON
240 | SMT_25_STRING_LEXICON = SMT_25_STRING_TOKENS + SMT_20_LEXICON
241 | 
242 | # scanners
243 | smt_20_scanner        = re.Scanner(SMT_20_LEXICON)
244 | smt_20_string_scanner = re.Scanner(SMT_20_STRING_LEXICON)
245 | smt_25_string_scanner = re.Scanner(SMT_25_STRING_LEXICON)
246 | 
247 | # public API
248 | def scan(string, language):
249 |     if language == SMT_20:
250 |         tokens, remainder = smt_20_scanner.scan(string)
251 |     elif language == SMT_20_STRING:
252 |         tokens, remainder = smt_20_string_scanner.scan(string)
253 |     elif language == SMT_25_STRING:
254 |         tokens, remainder = smt_25_string_scanner.scan(string)
255 |     else:
256 |         raise ScanningError('invalid language: {!r}'.format(language))
257 | 
258 |     if len(remainder) > 0:
259 |         token_context = '\n'.join('    {} {!r}'.format(t.name, t.value) for t in tokens[-5:])
260 |         text_context  = remainder[:100]
261 |         raise ScanningError('scanning error:\n{}\n    {!r}...'.format(token_context, text_context))
262 | 
263 |     return [t for t in tokens if t.name != 'WHITESPACE']
264 | 
265 | def scan_file(path, language):
266 |     with open(path, 'r') as file:
267 |         return scan(file.read(), language)
268 | 


--------------------------------------------------------------------------------
/stringfuzz/smt.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Functions for creating ASTs.
  3 | '''
  4 | 
  5 | from stringfuzz.ast import *
  6 | 
  7 | __all__ = [
  8 |     'smt_var',
  9 |     'smt_const',
 10 |     'smt_new_var',
 11 |     'smt_new_const',
 12 |     'smt_str_lit',
 13 |     'smt_int_lit',
 14 |     'smt_bool_lit',
 15 |     'smt_assert',
 16 |     'smt_equal',
 17 |     'smt_gt',
 18 |     'smt_lt',
 19 |     'smt_gte',
 20 |     'smt_lte',
 21 |     'smt_concat',
 22 |     'smt_at',
 23 |     'smt_len',
 24 |     'smt_declare_var',
 25 |     'smt_declare_const',
 26 |     'smt_check_sat',
 27 |     'smt_get_model',
 28 |     'smt_reset_counters',
 29 |     'smt_str_to_re',
 30 |     'smt_regex_in',
 31 |     'smt_regex_concat',
 32 |     'smt_regex_star',
 33 |     'smt_regex_plus',
 34 |     'smt_regex_range',
 35 |     'smt_regex_union',
 36 |     'smt_regex_inter',
 37 |     'smt_and',
 38 |     'smt_or',
 39 |     'smt_not',
 40 |     'smt_string_logic',
 41 |     'smt_is_sat',
 42 |     'smt_is_unsat',
 43 | ]
 44 | 
 45 | # constants
 46 | VAR_PREFIX   = 'var'
 47 | CONST_PREFIX = 'const'
 48 | 
 49 | # globals
 50 | var_counter   = 0
 51 | const_counter = 0
 52 | 
 53 | # helper functions
 54 | def smt_var(suffix):
 55 |     return IdentifierNode('{}{}'.format(VAR_PREFIX, suffix))
 56 | 
 57 | def smt_const(suffix):
 58 |     return IdentifierNode('{}{}'.format(CONST_PREFIX, suffix))
 59 | 
 60 | def smt_new_var():
 61 |     global var_counter
 62 |     returned = var_counter
 63 |     var_counter += 1
 64 |     return smt_var(returned)
 65 | 
 66 | def smt_new_const():
 67 |     global const_counter
 68 |     returned = const_counter
 69 |     const_counter += 1
 70 |     return smt_const(returned)
 71 | 
 72 | def smt_reset_counters():
 73 |     global const_counter
 74 |     global var_counter
 75 |     const_counter = 0
 76 |     var_counter = 0
 77 | 
 78 | # leaf expressions
 79 | def smt_str_lit(value):
 80 |     return StringLitNode(value)
 81 | 
 82 | def smt_int_lit(value):
 83 |     return IntLitNode(value)
 84 | 
 85 | def smt_bool_lit(value):
 86 |     return BoolLitNode(value)
 87 | 
 88 | # node expressions
 89 | def smt_and(a, b):
 90 |     return AndNode(a, b)
 91 | 
 92 | def smt_or(a, b):
 93 |     return OrNode(a, b)
 94 | 
 95 | def smt_not(a):
 96 |     return NotNode(a)
 97 | 
 98 | def smt_equal(a, b):
 99 |     return EqualNode(a, b)
100 | 
101 | def smt_gt(a, b):
102 |     return GtNode(a, b)
103 | 
104 | def smt_lt(a, b):
105 |     return LtNode(a, b)
106 | 
107 | def smt_gte(a, b):
108 |     return GteNode(a, b)
109 | 
110 | def smt_lte(a, b):
111 |     return LteNode(a, b)
112 | 
113 | def smt_concat(a, b):
114 |     return ConcatNode(a, b)
115 | 
116 | def smt_at(s, i):
117 |     return AtNode(s, i)
118 | 
119 | def smt_len(a):
120 |     return LengthNode(a)
121 | 
122 | def smt_str_to_re(s):
123 |     return StrToReNode(s)
124 | 
125 | def smt_regex_in(s, r):
126 |     return InReNode(s, r)
127 | 
128 | def smt_regex_concat(a, b):
129 |     return ReConcatNode(a, b)
130 | 
131 | def smt_regex_plus(a):
132 |     return RePlusNode(a)
133 | 
134 | def smt_regex_range(a, b):
135 |     return ReRangeNode(a, b)
136 | 
137 | def smt_regex_star(a):
138 |     return ReStarNode(a)
139 | 
140 | def smt_regex_union(a, b):
141 |     return ReUnionNode(a, b)
142 | 
143 | def smt_regex_inter(a, b):
144 |     return ReInterNode(a, b)
145 | 
146 | # commands
147 | def smt_assert(exp):
148 |     return AssertNode(exp)
149 | 
150 | def smt_declare_var(identifier, sort='String'):
151 |     return FunctionDeclarationNode(identifier, BracketsNode([]), AtomicSortNode(sort))
152 | 
153 | def smt_declare_const(identifier, sort='String'):
154 |     return ConstantDeclarationNode(identifier, AtomicSortNode(sort))
155 | 
156 | def smt_check_sat():
157 |     return CheckSatNode()
158 | 
159 | def smt_get_model():
160 |     return GetModelNode()
161 | 
162 | def _smt_status(status):
163 |     return MetaCommandNode(IdentifierNode('set-info'), SettingNode('status'), MetaDataNode(status))
164 | 
165 | def smt_is_sat():
166 |     return _smt_status('sat')
167 | 
168 | def smt_is_unsat():
169 |     return _smt_status('unsat')
170 | 
171 | def smt_string_logic():
172 |     return MetaCommandNode(IdentifierNode('set-logic'), IdentifierNode('QF_S'))
173 | 


--------------------------------------------------------------------------------
/stringfuzz/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | from stringfuzz.transformers.unprintable import *
2 | from stringfuzz.transformers.nop import *
3 | from stringfuzz.transformers.rotate import *
4 | from stringfuzz.transformers.fuzz import *
5 | from stringfuzz.transformers.graft import *
6 | from stringfuzz.transformers.translate import *
7 | from stringfuzz.transformers.reverse import *
8 | from stringfuzz.transformers.multiply import *
9 | 


--------------------------------------------------------------------------------
/stringfuzz/transformers/fuzz.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | The Fuzz transformer performs two types of transformations.
 3 | The first is for literals. The second is for operators.
 4 | 
 5 | Literals are fuzzed to similar literals. For example,
 6 | an integer literal x will be replaced with x+r where
 7 | r is a random number between -x and x. String literals
 8 | are processed character by character. Each character can either
 9 | remain in the updated string, be replaced by a random string,
10 | or be deleted with equal probability.
11 | 
12 | Operators are fuzzed, with 50% probability, to a new operator
13 | with the same function type. For example, regex * can be fuzzed
14 | to regex +.
15 | '''
16 | 
17 | import random
18 | 
19 | from stringfuzz.ast import IntLitNode, StringLitNode, ReRangeNode
20 | from stringfuzz.types import REPLACEABLE_OPS
21 | from stringfuzz.ast_walker import ASTWalker
22 | from stringfuzz.generators import random_text
23 | 
24 | __all__ = [
25 |     'fuzz',
26 | ]
27 | 
28 | def fuzz_char(c):
29 | 
30 |     # with equal probability: replace, keep, add, or delete a character
31 |     operation = random.randint(1,4)
32 | 
33 |     # replace it
34 |     if operation == 1:
35 |         return random_text(1)
36 | 
37 |     # keep it the same
38 |     if operation == 2:
39 |         return c
40 | 
41 |     # add a new character
42 |     if operation == 3:
43 |         return c + random_text(1)
44 | 
45 |     # delete it
46 |     return ''
47 | 
48 | def fuzz_string(string):
49 |     return ''.join(fuzz_char(c) for c in string)
50 | 
51 | class LitTransformer(ASTWalker):
52 |     def __init__(self, ast, skip_re_range):
53 |         super().__init__(ast)
54 |         self.skip_re_range = skip_re_range
55 | 
56 |     def exit_literal(self, literal, parent):
57 | 
58 |         # int literal
59 |         if isinstance(literal, IntLitNode):
60 | 
61 |             # maintain sign of literal
62 |             literal.value += random.randint(-literal.value, literal.value)
63 | 
64 |         # string literal
65 |         elif isinstance(literal, StringLitNode):
66 | 
67 |             # skip children of regex range if required
68 |             if isinstance(parent, ReRangeNode) and self.skip_re_range:
69 |                 return
70 | 
71 |             # create new value for literal
72 |             new_val = fuzz_string(literal.value)
73 | 
74 |             # replace old value with new value
75 |             literal.value = new_val
76 | 
77 |     def exit_expression(self, expr, parent):
78 |         for type_list in REPLACEABLE_OPS:
79 |             for i in range(len(expr.body)):
80 | 
81 |                 # check if it's a replaceable type; if so, randomly replace it
82 |                 replaceable = [isinstance(expr.body[i], C) for C in type_list]
83 |                 if any(replaceable):
84 |                     choice       = random.choice(type_list)
85 |                     expr.body[i] = choice(*expr.body[i].body)
86 | 
87 | # public API
88 | def fuzz(ast, skip_re_range):
89 |     transformed = LitTransformer(ast, skip_re_range).walk()
90 |     return transformed
91 | 


--------------------------------------------------------------------------------
/stringfuzz/transformers/graft.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | The graft transform picks a subtree and a leaf at random
  3 | and swaps them for each type.
  4 | '''
  5 | 
  6 | import random
  7 | 
  8 | from stringfuzz.ast import StringLitNode, BoolLitNode, IntLitNode, StrToReNode
  9 | from stringfuzz.types import STR_RET, INT_RET, BOOL_RET, RX_RET
 10 | from stringfuzz.ast_walker import ASTWalker
 11 | 
 12 | __all__ = [
 13 |     'graft',
 14 | ]
 15 | 
 16 | class GraftTransformer(ASTWalker):
 17 |     def __init__(self, ast, pairs):
 18 |         super().__init__(ast)
 19 |         self.pairs = pairs
 20 | 
 21 |     def enter_expression(self, expr, parent):
 22 |         for i in range(len(expr.body)):
 23 |             for pair in self.pairs:
 24 |                 if expr.body[i] == pair[0]:
 25 |                     expr.body[i] = pair[1]
 26 |                 elif expr.body[i] == pair[1]:
 27 |                     expr.body[i] = pair[0]
 28 | 
 29 | class GraftFinder(ASTWalker):
 30 |     def __init__(self, ast, skip_str_to_re):
 31 |         super().__init__(ast)
 32 |         self.skip_str_to_re = skip_str_to_re
 33 |         #            expr, lit
 34 |         self.str  = [None, None]
 35 |         self.bool = [None, None]
 36 |         self.int  = [None, None]
 37 |         self.rx   = [None, None]
 38 | 
 39 |     @property
 40 |     def pairs(self):
 41 |         pairs = []
 42 |         if all(self.str):
 43 |             pairs.append(self.str)
 44 |         if all(self.bool):
 45 |             pairs.append(self.bool)
 46 |         if all(self.int):
 47 |             pairs.append(self.int)
 48 |         if all(self.rx):
 49 |             pairs.append(self.rx)
 50 |         return pairs
 51 | 
 52 |     def enter_literal(self, literal, parent):
 53 |         replace = random.choice([True, False])
 54 |         if isinstance(literal, StringLitNode):
 55 |             if isinstance(parent, StrToReNode) and self.skip_str_to_re:
 56 |                 return
 57 |             if self.str[1]:
 58 |                 if replace:
 59 |                     self.str[1] = literal
 60 |             else:
 61 |                 self.str[1] = literal
 62 |         elif isinstance(literal, BoolLitNode):
 63 |             if self.bool[1]:
 64 |                 if replace:
 65 |                     self.bool[1] = literal
 66 |             else:
 67 |                 self.bool[1] = literal
 68 |         elif isinstance(literal, IntLitNode):
 69 |             if self.int[1]:
 70 |                 if replace:
 71 |                     self.int[1] = literal
 72 |             else:
 73 |                 self.int[1] = literal
 74 | 
 75 |     def enter_identifier(self, ident, parent):
 76 |         #TODO How to check type of identifiers?
 77 |         # if self.str[1]:
 78 |         #     if random.random() < 0.5:
 79 |         #         self.str[1] = ident
 80 |         # else:
 81 |         #     self.str[1] = ident
 82 |         pass
 83 | 
 84 |     def enter_expression(self, expr, parent):
 85 |         replace = random.choice([True, False])
 86 |         if isinstance(expr, StrToReNode):
 87 |             # take StrToReNode's to be literals for RX
 88 |             if self.rx[1]:
 89 |                 if replace:
 90 |                     self.rx[1] = expr
 91 |             else:
 92 |                 self.rx[1] = expr
 93 | 
 94 |         # assign expr part of pair
 95 |         elif any([isinstance(expr, C) for C in STR_RET]):
 96 |             if self.str[0]:
 97 |                 if replace:
 98 |                     self.str[0] = expr
 99 |             else:
100 |                 self.str[0] = expr
101 |         elif any([isinstance(expr, C) for C in INT_RET]):
102 |             if self.int[0]:
103 |                 if replace:
104 |                     self.int[0] = expr
105 |             else:
106 |                 self.int[0] = expr
107 |         elif any([isinstance(expr, C) for C in BOOL_RET]):
108 |             if self.bool[0]:
109 |                 if replace:
110 |                     self.bool[0] = expr
111 |             else:
112 |                 self.bool[0] = expr
113 |         elif any([isinstance(expr, C) for C in RX_RET]):
114 |             if self.rx[0]:
115 |                 if replace:
116 |                     self.rx[0] = expr
117 |             else:
118 |                 self.rx[0] = expr
119 | 
120 | 
121 | # public API
122 | def graft(ast, skip_str_to_re):
123 |     finder = GraftFinder(ast, skip_str_to_re)
124 |     finder.walk()
125 |     transformed = GraftTransformer(ast, finder.pairs).walk()
126 |     return transformed
127 | 


--------------------------------------------------------------------------------
/stringfuzz/transformers/multiply.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Multiplying every integer literal by n and repeating
 3 | every character in a string literal n times for some n
 4 | '''
 5 | 
 6 | from stringfuzz.ast import StringLitNode, IntLitNode, ReRangeNode
 7 | from stringfuzz.ast_walker import ASTWalker
 8 | 
 9 | __all__ = [
10 |     'multiply',
11 | ]
12 | 
13 | class MultiplyTransformer(ASTWalker):
14 |     def __init__(self, ast, factor, skip_re_range):
15 |         super().__init__(ast)
16 |         self.factor = factor
17 |         self.skip_re_range = skip_re_range
18 | 
19 |     def exit_literal(self, literal, parent):
20 |         if isinstance(literal, StringLitNode):
21 |             if isinstance(parent, ReRangeNode) and self.skip_re_range:
22 |                 return
23 |             new_val = ""
24 |             for char in literal.value:
25 |                 new_val += char * self.factor
26 |             literal.value = new_val
27 |         elif isinstance(literal, IntLitNode):
28 |             literal.value = literal.value * self.factor
29 | 
30 | # public API
31 | def multiply(ast, factor, skip_re_range):
32 |     transformed = MultiplyTransformer(ast, factor, skip_re_range).walk()
33 |     return transformed
34 | 


--------------------------------------------------------------------------------
/stringfuzz/transformers/nop.py:
--------------------------------------------------------------------------------
 1 | from stringfuzz.parser import parse
 2 | 
 3 | __all__ = [
 4 |     'nop',
 5 | ]
 6 | 
 7 | # public API
 8 | def nop(ast):
 9 |     return ast
10 | 


--------------------------------------------------------------------------------
/stringfuzz/transformers/reverse.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Reversing every string literal
 3 | '''
 4 | 
 5 | from stringfuzz.ast import StringLitNode, ConcatNode, ReConcatNode
 6 | from stringfuzz.ast_walker import ASTWalker
 7 | 
 8 | __all__ = [
 9 |     'reverse',
10 | ]
11 | 
12 | class ReverseTransformer(ASTWalker):
13 |     def __init__(self, ast):
14 |         super().__init__(ast)
15 | 
16 |     def exit_literal(self, literal, parent):
17 |         if isinstance(literal, StringLitNode):
18 |             literal.value = literal.value[::-1]
19 | 
20 |     def exit_expression(self, expr, parent):
21 |         if isinstance(expr, (ConcatNode, ReConcatNode)):
22 |             expr.body = reversed(expr.body)
23 | 
24 | # public API
25 | def reverse(ast):
26 |     transformed = ReverseTransformer(ast).walk()
27 |     return transformed
28 | 


--------------------------------------------------------------------------------
/stringfuzz/transformers/rotate.py:
--------------------------------------------------------------------------------
 1 | from stringfuzz.types import ALL_INT_ARGS, ALL_RX_ARGS, ALL_STR_ARGS
 2 | from stringfuzz.ast_walker import ASTWalker
 3 | 
 4 | __all__ = [
 5 |     'rotate',
 6 | ]
 7 | 
 8 | class RotateTransformer(ASTWalker):
 9 |     def __init__(self, ast):
10 |         super().__init__(ast)
11 | 
12 |     def exit_expression(self, expr, parent):
13 |         for uniform in [ALL_INT_ARGS, ALL_RX_ARGS, ALL_STR_ARGS]:
14 |             # need at least two top level children
15 |             uniform_expr = [isinstance(expr, C) for C in uniform]
16 |             if any(uniform_expr) and len(expr.body) > 1:
17 |                 for i in range(len(expr.body)):
18 |                     uniform_child = [isinstance(expr.body[i], C) for C in uniform]
19 |                     if any(uniform_child):
20 |                         # rotate clockwise
21 |                         # j is the other top level child
22 |                         if i == len(expr.body)-1:
23 |                             j = 0
24 |                         else:
25 |                             j = len(expr.body)-1
26 |                         temp = expr.body[j]
27 |                         expr.body[j] = expr.body[i].body[0]
28 |                         new_body = expr.body[i].body[1:] + [temp]
29 |                         expr.body[i].body = new_body
30 | 
31 | # public API
32 | def rotate(ast):
33 |     transformed = RotateTransformer(ast).walk()
34 |     return transformed
35 | 


--------------------------------------------------------------------------------
/stringfuzz/transformers/translate.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Permuting the alphabet in every string literal.
 3 | '''
 4 | 
 5 | import random
 6 | import copy
 7 | 
 8 | from stringfuzz.ast import StringLitNode, ReRangeNode
 9 | from stringfuzz.ast_walker import ASTWalker
10 | from stringfuzz import ALL_CHARS
11 | 
12 | __all__ = [
13 |     'translate'
14 | ]
15 | 
16 | WITH_INTEGERS    = list(ALL_CHARS)
17 | WITHOUT_INTEGERS = [c for c in ALL_CHARS if not c.isdecimal()]
18 | 
19 | class TranslateTransformer(ASTWalker):
20 |     def __init__(self, ast, character_set, skip_re_range):
21 |         super().__init__(ast)
22 |         self.table = self.make_table(character_set)
23 |         self.skip_re_range = skip_re_range
24 | 
25 |     def make_table(self, character_set):
26 |         shuffled = copy.copy(character_set)
27 |         random.shuffle(shuffled)
28 |         shuffled = ''.join(shuffled)
29 |         character_set = ''.join(character_set)
30 |         return str.maketrans(character_set, shuffled)
31 | 
32 |     def exit_literal(self, literal, parent):
33 |         if isinstance(literal, StringLitNode):
34 |             if isinstance(parent, ReRangeNode) and self.skip_re_range:
35 |                 return
36 |             literal.value = literal.value.translate(self.table)
37 | 
38 | # public API
39 | def translate(ast, integer_flag, skip_re_range):
40 |     if integer_flag:
41 |         character_set = WITH_INTEGERS
42 |     else:
43 |         character_set = WITHOUT_INTEGERS
44 |     transformed = TranslateTransformer(ast, character_set, skip_re_range).walk()
45 |     return transformed
46 | 


--------------------------------------------------------------------------------
/stringfuzz/transformers/unprintable.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import sys
 3 | import random
 4 | import string
 5 | 
 6 | from stringfuzz.ast import ExpressionNode, StringLitNode
 7 | 
 8 | __all__ = [
 9 |     'unprintable',
10 | ]
11 | 
12 | EXCLUDED_CHARS    = '\n\t\x00'
13 | UNPRINTABLE_CHARS = [chr(i) for i in range(32) if chr(i) not in EXCLUDED_CHARS]
14 | ALL_CHARS         = string.printable
15 | 
16 | # TODO:
17 | #      sanity check that the transformation should not inject
18 | # if len(UNPRINTABLE_CHARS) < len(ALL_CHARS):
19 | #     print("REALLY BAD ERROR: 'unprintable' transformation loses data", file=sys.stderr)
20 | #     exit(1)
21 | 
22 | # TODO:
23 | #      fix pick_unprintable to pick without replacement
24 | def pick_unprintable():
25 |     return random.choice(UNPRINTABLE_CHARS)
26 | 
27 | def make_charmap():
28 |     return {c : pick_unprintable() for c in ALL_CHARS}
29 | 
30 | def make_unprintable_string(s, charmap):
31 |     return ''.join(charmap[c] for c in s)
32 | 
33 | def make_unprintable_expression(expression, charmap):
34 | 
35 |     for i in range(len(expression.body)):
36 | 
37 |         arg = expression.body[i]
38 | 
39 |         # recurse down expressions
40 |         if isinstance(arg, ExpressionNode):
41 |             make_unprintable_expression(arg, charmap)
42 | 
43 |         # replace string literals
44 |         elif isinstance(arg, StringLitNode):
45 | 
46 |             # create new string
47 |             old_string = arg.value
48 |             new_string = make_unprintable_string(old_string, charmap)
49 | 
50 |             # assign new literal
51 |             expression.body[i] = StringLitNode(new_string)
52 | 
53 | # public API
54 | def unprintable(ast):
55 |     charmap = make_charmap()
56 | 
57 |     for expression in ast:
58 |         make_unprintable_expression(expression, charmap)
59 | 
60 |     return ast
61 | 


--------------------------------------------------------------------------------
/stringfuzz/types.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Groupings operators by types for transformers.
 3 | '''
 4 | 
 5 | from stringfuzz.ast import *
 6 | 
 7 | # Groups of replaceable_OPS operators by Function Type
 8 | # ARG_...RET
 9 | # e.g. STR_STR_STR means takes two strings and returns a string
10 | STR_STR_STR     = [ConcatNode]
11 | STR_STR_BOOL    = [ContainsNode, PrefixOfNode, SuffixOfNode]
12 | STR_INT_STR     = [AtNode]
13 | STR_INT         = [LengthNode, ToIntNode]
14 | STR_STR_INT     = [IndexOfNode]
15 | STR_STR_INT_INT = [IndexOf2Node]
16 | STR_STR_STR_STR = [StringReplaceNode]
17 | STR_INT_INT_STR = [SubstringNode]
18 | INT_STR         = [FromIntNode]
19 | STR_RX_BOOL     = [InReNode]
20 | STR_RX          = [StrToReNode]
21 | RX_RX_RX        = [ReConcatNode, ReUnionNode, ReInterNode]
22 | RX_RX           = [ReStarNode, RePlusNode]
23 | INT_INT_RX      = [ReRangeNode]
24 | 
25 | # types with more than one inhabitant for fuzzing
26 | REPLACEABLE_OPS = [STR_STR_BOOL, STR_INT, RX_RX_RX, RX_RX]
27 | 
28 | # all the same argument types for rotating
29 | ALL_STR_ARGS    = STR_STR_STR_STR + STR_STR_STR + STR_STR_INT + STR_STR_BOOL + STR_INT + STR_RX
30 | ALL_RX_ARGS     = RX_RX_RX + RX_RX
31 | ALL_INT_ARGS    = INT_STR + INT_INT_RX
32 | 
33 | # all the same return type for cutting
34 | STR_RET         = STR_STR_STR + STR_STR_STR_STR + INT_STR + STR_INT_STR + STR_INT_INT_STR
35 | INT_RET         = STR_INT + STR_STR_INT + STR_STR_INT_INT
36 | BOOL_RET        = STR_STR_BOOL + STR_RX_BOOL
37 | RX_RET          = STR_RX + RX_RX + RX_RX_RX + INT_INT_RX
38 | 


--------------------------------------------------------------------------------
/stringfuzz/util.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | from stringfuzz.scanner import ALPHABET
 4 | from stringfuzz.ast import ConcatNode, ReConcatNode
 5 | 
 6 | __all__ = [
 7 |     'coin_toss',
 8 |     'random_string',
 9 |     'join_terms_with',
10 |     'all_same',
11 | ]
12 | 
13 | # public API
14 | def coin_toss():
15 |     return random.choice([True, False])
16 | 
17 | def random_string(length):
18 |     return ''.join(random.choice(ALPHABET) for i in range(length))
19 | 
20 | def join_terms_with(terms, concatenator):
21 |     assert len(terms) > 0
22 | 
23 |     # initialise result to the last term (i.e. first in reversed list)
24 |     reversed_terms = reversed(terms)
25 |     result         = next(reversed_terms)
26 | 
27 |     # keep appending preceding terms to the result
28 |     for term in reversed_terms:
29 |         result = concatenator(term, result)
30 | 
31 |     return result
32 | 
33 | # CREDIT:
34 | #        https://stackoverflow.com/questions/3844801/check-if-all-elements-in-a-list-are-identical
35 | def all_same(lst):
36 |     return not lst or lst.count(lst[0]) == len(lst)
37 | 


--------------------------------------------------------------------------------
/tests/ast_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from stringfuzz.ast import *
 4 | 
 5 | class TestAST(unittest.TestCase):
 6 | 
 7 |     def test_no_ast_node(self):
 8 |         self.assertRaises(NameError, lambda: ASTNode)
 9 | 
10 |     # literals
11 |     def test_literal_bool(self):
12 |         true = BoolLitNode(True)
13 |         false = BoolLitNode(False)
14 | 
15 |         self.assertIs(true.value, True)
16 |         self.assertIs(false.value, False)
17 |         self.assertRaises(AssertionError, BoolLitNode, 1)
18 |         self.assertRaises(AssertionError, BoolLitNode, 0)
19 |         self.assertRaises(AssertionError, BoolLitNode, 'true')
20 | 
21 |     def test_literal_int(self):
22 |         five = IntLitNode(5)
23 | 
24 |         self.assertEqual(five.value, 5)
25 |         self.assertRaises(AssertionError, IntLitNode, True)
26 |         self.assertRaises(AssertionError, IntLitNode, '5')
27 | 
28 |     def test_literal_string(self):
29 |         hello = StringLitNode('hello')
30 | 
31 |         self.assertEqual(hello.value, 'hello')
32 |         self.assertRaises(AssertionError, StringLitNode, True)
33 |         self.assertRaises(AssertionError, StringLitNode, 5)
34 | 
35 | if __name__ == '__main__':
36 |     unittest.main()
37 | 


--------------------------------------------------------------------------------
/tests/genetic_tests.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | 
3 | class GeneticTest(unittest.TestCase):
4 |     pass
5 | 
6 | if __name__ == '__main__':
7 |     unittest.main()
8 | 


--------------------------------------------------------------------------------
/tests/parser_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from stringfuzz.scanner import SMT_20, SMT_20_STRING, SMT_25_STRING
 4 | from stringfuzz.parser import parse, parse_file
 5 | 
 6 | class TestParser(unittest.TestCase):
 7 | 
 8 |     def test_no_file(self):
 9 |         self.assertRaises(IOError, parse_file, '', SMT_20)
10 | 
11 |     def test_empty(self):
12 |         self.assertListEqual([], parse('', SMT_20))
13 | 
14 |     def test_bad_language(self):
15 |         self.assertRaises(ValueError, parse, '', '')
16 | 
17 |     def test_good_languages(self):
18 |         self.assertListEqual([], parse('', SMT_20))
19 |         self.assertListEqual([], parse('', SMT_20_STRING))
20 |         self.assertListEqual([], parse('', SMT_25_STRING))
21 | 
22 |     def test_trivial(self):
23 |         expressions = parse('(check-sat)', SMT_20)
24 |         self.assertEqual(len(expressions), 1)
25 |         self.assertEqual(expressions[0].symbol.name, 'check-sat')
26 |         self.assertListEqual(expressions[0].body, [])
27 | 
28 |     def test_simple_smt_20(self):
29 |         expressions = parse('''
30 |             (declare-fun X () String)
31 |             (assert (= X "solution"))
32 |             (check-sat)
33 |         ''', SMT_20_STRING)
34 | 
35 |         self.assertEqual(len(expressions), 3)
36 | 
37 |         self.assertEqual(expressions[0].symbol.name, 'declare-fun')
38 |         self.assertEqual(expressions[0].body[0].name, 'X')
39 |         self.assertEqual(expressions[0].body[2].name, 'String')
40 | 
41 |         self.assertEqual(expressions[1].symbol.name, 'assert')
42 |         self.assertEqual(expressions[1].body[0].symbol.name, '=')
43 |         self.assertEqual(expressions[1].body[0].body[0].name, 'X')
44 |         self.assertEqual(expressions[1].body[0].body[1].value, 'solution')
45 | 
46 |         self.assertEqual(expressions[2].symbol.name, 'check-sat')
47 | 
48 |     def test_simple_smt_25(self):
49 |         expressions = parse('''
50 |             (declare-fun X () String)
51 |             (assert (= X "solution"))
52 |             (check-sat)
53 |         ''', SMT_25_STRING)
54 | 
55 |         self.assertEqual(len(expressions), 3)
56 | 
57 |         self.assertEqual(expressions[0].symbol.name, 'declare-fun')
58 |         self.assertEqual(expressions[0].body[0].name, 'X')
59 |         self.assertEqual(expressions[0].body[2].name, 'String')
60 | 
61 |         self.assertEqual(expressions[1].symbol.name, 'assert')
62 |         self.assertEqual(expressions[1].body[0].symbol.name, '=')
63 |         self.assertEqual(expressions[1].body[0].body[0].name, 'X')
64 |         self.assertEqual(expressions[1].body[0].body[1].value, 'solution')
65 | 
66 |         self.assertEqual(expressions[2].symbol.name, 'check-sat')
67 | 
68 | if __name__ == '__main__':
69 |     unittest.main()
70 | 


--------------------------------------------------------------------------------
/tests/scanner_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from stringfuzz.constants import SMT_20, SMT_20_STRING, SMT_25_STRING
 4 | from stringfuzz.scanner import scan, scan_file
 5 | 
 6 | class TestScanner(unittest.TestCase):
 7 | 
 8 |     def test_constants(self):
 9 |         self.assertEqual(SMT_20, 'smt2')
10 |         self.assertEqual(SMT_20_STRING, 'smt20')
11 |         self.assertEqual(SMT_25_STRING, 'smt25')
12 | 
13 |     def test_no_file(self):
14 |         self.assertRaises(IOError, scan_file, '', SMT_20)
15 | 
16 |     def test_empty(self):
17 |         self.assertListEqual([], scan('', SMT_20))
18 | 
19 |     def test_bad_language(self):
20 |         self.assertRaises(ValueError, scan, '', '')
21 | 
22 |     def test_good_languages(self):
23 |         self.assertListEqual([], scan('', SMT_20))
24 |         self.assertListEqual([], scan('', SMT_20_STRING))
25 |         self.assertListEqual([], scan('', SMT_25_STRING))
26 | 
27 |     def test_simple(self):
28 |         tokens = scan('(check-sat)', SMT_20)
29 |         self.assertEqual(len(tokens), 3)
30 |         self.assertEqual(tokens[0].name, 'LPAREN')
31 |         self.assertEqual(tokens[0].value, '(')
32 |         self.assertEqual(tokens[1].name, 'IDENTIFIER')
33 |         self.assertEqual(tokens[1].value, 'check-sat')
34 |         self.assertEqual(tokens[2].name, 'RPAREN')
35 |         self.assertEqual(tokens[2].value, ')')
36 | 
37 | if __name__ == '__main__':
38 |     unittest.main()
39 | 


--------------------------------------------------------------------------------
/tests/walker_tests.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dblotsky/stringfuzz/5507894ed5d94ed36098753357d33adee182b298/tests/walker_tests.py


--------------------------------------------------------------------------------