├── test └── rnn_model │ └── model.h5 ├── requirement.txt ├── AbstractRNNClassifier.py ├── LICENSE.txt ├── evaluation_scripts └── fuzzing │ ├── check_unique_crash.py │ └── coverage_analyzer.py ├── .gitignore ├── Abstraction ├── Coder.py ├── GraphWrapper.py ├── StateAbstraction.py └── DTMCGraph.py ├── fuzzer ├── fuzzone.py ├── construct_initial_seeds.py ├── image_queue.py ├── lib │ ├── fuzzer.py │ └── queue.py ├── image_fuzzer.py └── mutators.py ├── abstraction_runner.py ├── coverage.py ├── README.md └── mnist_demo └── mnist_lstm.py /test/rnn_model/model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaoningdu/deepstellar/HEAD/test/rnn_model/model.h5 -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | keras==2.3 2 | tensorflow==1.14 3 | sklearn 4 | pandas 5 | matplotlib==3.1 6 | joblib 7 | pillow 8 | opencv-python 9 | xxhash 10 | graphviz -------------------------------------------------------------------------------- /AbstractRNNClassifier.py: -------------------------------------------------------------------------------- 1 | 2 | class AbstractRNNClassifier: 3 | 4 | def load_hidden_state_model(self, model_path): 5 | pass 6 | 7 | def input_preprocess(self, data): 8 | return data 9 | 10 | def profile_train_data(self, profile_save_path): 11 | pass 12 | 13 | def get_state_profile(self, inputs): 14 | pass 15 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2020 Xiaoning Du 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /evaluation_scripts/fuzzing/check_unique_crash.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import argparse 4 | import xxhash 5 | 6 | 7 | if __name__ == '__main__': 8 | parser = argparse.ArgumentParser(description='control experiment') 9 | parser.add_argument('-i', help='crash dir path') 10 | args = parser.parse_args() 11 | 12 | dirs = os.listdir(args.i) 13 | hash_set = set() 14 | for i in dirs: 15 | crash_seed = os.path.join(args.i, i) 16 | seed = np.load(crash_seed) 17 | 18 | h = xxhash.xxh64() 19 | h.update(seed) 20 | q = h.intdigest() 21 | if q not in hash_set: 22 | hash_set.add(q) 23 | 24 | print(len(hash_set)) 25 | print('finish') 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | bin/ 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Installer logs 24 | pip-log.txt 25 | pip-delete-this-directory.txt 26 | 27 | # Unit test / coverage reports 28 | .tox/ 29 | .coverage 30 | .cache 31 | nosetests.xml 32 | coverage.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # Rope 43 | .ropeproject 44 | 45 | # Django stuff: 46 | *.log 47 | *.pot 48 | 49 | # Sphinx documentation 50 | docs/_build/ -------------------------------------------------------------------------------- /Abstraction/Coder.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Coder(object): 4 | 5 | def __init__(self, bits, dim): 6 | """ 7 | :param bits: each dimension is encoded with how many bits 8 | :param dim: how many dimensions of the vectors 9 | """ 10 | self.bits = bits 11 | self.dim = dim 12 | assert self.bits * self.dim <= 64 13 | 14 | def encode(self, vec): 15 | assert len(vec) == self.dim 16 | d = 0 17 | for i in range(self.dim): 18 | di = vec[i] << (self.bits * i) 19 | d = d | di 20 | return d 21 | 22 | def decode(self, d): 23 | mask = 2 ** self.bits - 1 24 | vec = [] 25 | for i in range(self.dim): 26 | di = d & mask 27 | d = d >> self.bits 28 | vec.append(di) 29 | return vec 30 | 31 | 32 | if __name__ == '__main__': 33 | coder = Coder(8, 3) 34 | en = coder.encode([1, 2, 3]) 35 | print(format(en, '02x')) 36 | de = coder.decode(en) 37 | print(de) 38 | -------------------------------------------------------------------------------- /fuzzer/fuzzone.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | import numpy as np 3 | 4 | 5 | def predict(self, input_data): 6 | inp = self.model.input 7 | functor = K.function([inp] + [K.learning_phase()], self.outputs) 8 | outputs = functor([input_data, 0]) 9 | return outputs 10 | 11 | 12 | def fetch_function(handler, input_batches, preprocess): 13 | _, img_batches, _ = input_batches 14 | if len(img_batches) == 0: 15 | return None, None 16 | preprocessed = preprocess(img_batches) 17 | outputs = handler.predict(preprocessed) 18 | return outputs[1], np.expand_dims(np.argmax(outputs[0], axis=1), axis=0) 19 | 20 | 21 | def build_fetch_function(handler, preprocess): 22 | def func(input_batches): 23 | return fetch_function( 24 | handler, 25 | input_batches, 26 | preprocess 27 | ) 28 | 29 | return func 30 | 31 | 32 | def adptive_coverage_function(handler, cov_num): 33 | def func(layerouts): 34 | """The fetch function.""" 35 | ptr = np.zeros(cov_num, dtype=np.uint8) 36 | return handler.update_coverage(layerouts, ptr) 37 | 38 | return func 39 | -------------------------------------------------------------------------------- /fuzzer/construct_initial_seeds.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | from keras.datasets import mnist 5 | 6 | from keras.models import load_model 7 | import numpy as np 8 | 9 | 10 | def mnist_preprocessing(x_test): 11 | temp = np.copy(x_test) 12 | temp = temp.reshape(temp.shape[0], 28, 28) 13 | temp = temp.astype('float32') 14 | temp /= 255 15 | return temp 16 | 17 | 18 | def createBatch(x_batch, batch_size, output_path, prefix): 19 | if not os.path.exists(output_path): 20 | os.makedirs(output_path) 21 | batch_num = len(x_batch) / batch_size 22 | batches = np.split(x_batch, batch_num, axis=0) 23 | for i, batch in enumerate(batches): 24 | test = batch 25 | saved_name = prefix + str(i) + '.npy' 26 | np.save(os.path.join(output_path, saved_name), test) 27 | 28 | 29 | if __name__ == '__main__': 30 | 31 | parser = argparse.ArgumentParser(description='control experiment') 32 | 33 | parser.add_argument('-dl_model', help='path to model') 34 | parser.add_argument('-output_path', help='Out path') 35 | parser.add_argument('-batch_size', type=int, help='Number of images in one batch', default=1) 36 | parser.add_argument('-batch_num', type=int, help='Number of batches', default=100) 37 | args = parser.parse_args() 38 | if not os.path.exists(args.output_path): 39 | os.makedirs(args.output_path) 40 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 41 | batch = mnist_preprocessing(x_test) 42 | model = load_model(args.dl_model) 43 | x_test = x_test.reshape(x_test.shape[0], 28, 28) 44 | 45 | num_in_each_class = (args.batch_size * args.batch_num) / 10 46 | 47 | result = np.argmax(model.predict(batch), axis=1) # [0],axis=1 48 | 49 | new_label = np.reshape(y_test, result.shape) 50 | 51 | idx_good = np.where(new_label == result)[0] 52 | 53 | for cl in range(10): 54 | cl_indexes = [i for i in idx_good if new_label[i] == cl] 55 | selected = random.sample(cl_indexes, int(num_in_each_class)) 56 | createBatch(x_test[selected], args.batch_size, args.output_path, str(cl) + '_') 57 | print('finish') 58 | -------------------------------------------------------------------------------- /abstraction_runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pickle 4 | from mnist_demo.mnist_lstm import MnistLSTMClassifier 5 | from Abstraction.StateAbstraction import StateAbstraction 6 | from Abstraction.GraphWrapper import GraphWrapper 7 | 8 | 9 | if __name__ == '__main__': 10 | parse = argparse.ArgumentParser("Generate abstract model") 11 | parse.add_argument('-dl_model', help='path of dl model', required=True) 12 | # parse.add_argument('-profile_data', help="path of data to do the profiling") 13 | parse.add_argument('-profile_save_path', help="dir to save profiling raw data", required=True) 14 | parse.add_argument('-comp_num', help="number of component when fitting pca", type=int, required=True) # can select a larger number 15 | parse.add_argument('-k', help='number of dimension to keep', type=int, required=True) 16 | parse.add_argument('-m', help="number of intervals on each dimension", type=int, required=True) 17 | parse.add_argument('-bits', help="number of bits for encoding", type=int, required=True) 18 | parse.add_argument('-name_prefix', help="name prefix when save the abstract model", required=True) 19 | parse.add_argument('-abst_save_path', help="path to save abstract model", required=True) 20 | parse.add_argument('-n_step', help="extend the graph to n_step", type=int, default=0) 21 | 22 | args = parse.parse_args() 23 | 24 | lstm_classifier = MnistLSTMClassifier() 25 | lstm_classifier.load_hidden_state_model(args.dl_model) 26 | 27 | if not os.path.exists(args.profile_save_path): 28 | lstm_classifier.profile_train_data(args.profile_save_path) 29 | print("profiling done...") 30 | else: 31 | print("profiling is already done...") 32 | 33 | par_k = [args.m]*args.k 34 | stateAbst = StateAbstraction(args.profile_save_path, args.comp_num, args.bits, [args.m]*args.k, args.n_step) 35 | wrapper = GraphWrapper(stateAbst) 36 | wrapper.build_model() 37 | 38 | save_file = 'wrapper_%s_%s_%s.pkl' % (args.name_prefix, len(par_k), par_k[0]) 39 | save_file = os.path.join(args.abst_save_path, save_file) 40 | os.makedirs(args.abst_save_path, exist_ok=True) 41 | with open(save_file, 'wb') as f: 42 | pickle.dump(wrapper, f) 43 | 44 | print('finish') 45 | 46 | -------------------------------------------------------------------------------- /evaluation_scripts/fuzzing/coverage_analyzer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from coverage import Coverage 4 | import numpy as np 5 | from mnist_demo.mnist_lstm import MnistLSTMClassifier 6 | 7 | 8 | def read_inputs_from_folder(folder, type="queue"): 9 | files = os.listdir(folder) 10 | tests = [] 11 | for file in files: 12 | data = np.load(os.path.join(folder, file)) 13 | if type == "crash": 14 | x_test = np.expand_dims(data, 0) 15 | elif type == "queue": 16 | x_test = data[1:2] 17 | else: 18 | x_test = data 19 | tests.extend(x_test) 20 | 21 | return np.asarray(tests) 22 | 23 | 24 | def fuzzing_analyzer(classifier, folder, dtmc_wrapper_f, type): 25 | if type == "queue": 26 | inputs = read_inputs_from_folder(folder, type="queue") 27 | else: # type == "seeds" 28 | inputs = read_inputs_from_folder(folder, type="seed") 29 | 30 | states = classifier.get_state_profile(inputs) 31 | coverage_handlers = [] 32 | 33 | for criteria, k_step in [("state", 0), ("transition", 0)]: # , ("k-step", 3), ("k-step", 6) 34 | cov = Coverage(dtmc_wrapper_f, criteria, k_step) 35 | coverage_handlers.append(cov) 36 | 37 | for coverage_handler in coverage_handlers: 38 | cov = coverage_handler.get_coverage_criteria(states) 39 | total = coverage_handler.get_total() 40 | print(len(cov) / total) 41 | if coverage_handler.mode != "k-step": # to printout the weighted coverage metrics 42 | weight_dic = coverage_handler.get_weight_dic() 43 | print(sum([weight_dic[e] for e in cov])) 44 | rev_weight_dic = coverage_handler.get_weight_dic(reverse=True) 45 | print(sum([rev_weight_dic[e] for e in cov])) 46 | 47 | 48 | if __name__ == '__main__': 49 | parser = argparse.ArgumentParser(description='analyzing the fuzzing results') 50 | parser.add_argument('-dl_model', help='path to the dl model', required=True) 51 | parser.add_argument('-wrapper', help='path to the abstract graph wrapper', required=True) 52 | parser.add_argument('-inputs_folder', help='path to the inputs folder', required=True) 53 | parser.add_argument('-type', choices=['seeds', 'queue'], default='queue') 54 | args = parser.parse_args() 55 | 56 | classifier = MnistLSTMClassifier() 57 | classifier.load_hidden_state_model(args.dl_model) 58 | fuzzing_analyzer(classifier, args.inputs_folder, args.wrapper, args.type) 59 | 60 | -------------------------------------------------------------------------------- /coverage.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | from Abstraction.GraphWrapper import GraphWrapper 4 | 5 | 6 | class Coverage(object): 7 | 8 | def __init__(self, pkl_dir, mode, k_step): 9 | self.par_wrap = load_graph_pkl(pkl_dir) 10 | if mode == 'state': 11 | self.total_size = self.par_wrap.graph.get_major_states_num() 12 | print('There are %s major states in total.' % self.total_size) 13 | elif mode == 'k-step': 14 | if k_step > self.par_wrap.stateAbst.n_step: 15 | print('this step is larger than the steps kept, please rebuild the model.') 16 | exit(0) 17 | self.par_wrap.graph.init_k_step_idx(k_step) 18 | self.total_size = self.par_wrap.graph.get_k_step_states_num() 19 | print('There are %s k-step states in total with k = %s.' % (self.total_size, k_step)) 20 | elif mode == 'transition': 21 | self.total_size = self.par_wrap.graph.get_transition_num() 22 | print('There are %s transitions in total.' % self.total_size) 23 | else: 24 | self.total_size = 0 25 | self.mode = mode 26 | 27 | def update_coverage(self, outputs): 28 | seed_num = len(outputs) 29 | ptrs = np.tile(np.zeros(self.total_size, dtype=np.uint8), (seed_num, 1)) 30 | 31 | for i in range(len(ptrs)): 32 | self.par_wrap.visit_graph(outputs[i], ptrs[i], self.mode) 33 | 34 | return ptrs 35 | 36 | def get_coverage(self, outputs): 37 | result = [] 38 | 39 | for i in range(len(outputs)): 40 | tmp = [] 41 | self.par_wrap.visit_graph(outputs[i], tmp, self.mode, return_set=True) 42 | result.append(tmp) 43 | 44 | return result 45 | 46 | def get_coverage_criteria(self, outputs): 47 | result = set() 48 | 49 | for i in range(len(outputs)): 50 | tmp = [] 51 | self.par_wrap.visit_graph(outputs[i], tmp, self.mode, return_set=True) 52 | result = result.union(set(tmp)) 53 | 54 | return result 55 | 56 | def get_total(self): 57 | return self.total_size 58 | 59 | def get_weight_dic(self, reverse=False): 60 | if reverse: 61 | return self.par_wrap.graph.get_index_weight_dic(type=self.mode, reverse=True) 62 | return self.par_wrap.graph.get_index_weight_dic(type=self.mode) 63 | 64 | 65 | def load_graph_pkl(pkl_dir): 66 | with open(pkl_dir, 'rb') as f: 67 | g = pickle.load(f) 68 | return g 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepStellar: Model-Based Quantitative Analysis of Stateful Deep Learning Systems 2 | 3 | ## Prepare Environment 4 | python=3.6 5 | 6 | pip install -r requirement.txt 7 | 8 | 9 | ## To prepare an RNN model 10 | 11 | ```shell script 12 | python -m mnist_demo.mnist_lstm 13 | ``` 14 | 15 | 16 | ## Generate the DTMC abstract model 17 | 18 | ```shell script 19 | python abstraction_runner.py -dl_model test/rnn_model/model.h5 -profile_save_path test/output/profile_save -comp_num 128 -k 3 -m 10 -bits 8 -name_prefix lstm_mnist -abst_save_path test/output/abst_model 20 | 21 | ``` 22 | 23 | 24 | ## Coverage Guided Testing 25 | 26 | ### Construct initial seeds 27 | ```shell script 28 | python -m fuzzer.construct_initial_seeds -dl_model test/rnn_model/model.h5 -output_path ../fuzz_data/initialseeds 29 | ``` 30 | 31 | ### Launch the testing process 32 | ```shell script 33 | python -m fuzzer.image_fuzzer -i ../fuzz_data/initialseeds -o ../fuzz_data/fuzzing-out-1/lstm-trans-3-10 -model_type mnist -dl_model test/rnn_model/model.h5 -criteria state -pkl_path test/output/abst_model/wrapper_lstm_mnist_3_10.pkl 34 | ``` 35 | 36 | 37 | ## Evaluation of the testing 38 | 39 | ### Check the coverage metrics of the fuzzing output queue: 40 | 41 | ```shell script 42 | python -m evaluation_scripts.fuzzing.coverage_analyzer -dl_model test/rnn_model/model.h5 -wrapper test/output/abst_model/wrapper_lstm_mnist_3_10.pkl -inputs_folder ../fuzz_data/fuzzing-out-1/lstm-trans-3-10/queue -type queue 43 | ``` 44 | 45 | ### Check the coverage metrics of the initial seeds: 46 | 47 | ```shell script 48 | python -m evaluation_scripts.fuzzing.coverage_analyzer -dl_model test/rnn_model/model.h5 -wrapper test/output/abst_model/wrapper_lstm_mnist_3_10.pkl -inputs_folder ../fuzz_data/initialseeds -type seeds 49 | ``` 50 | 51 | ### Check the number of unique crashes 52 | 53 | ```shell script 54 | python -m evaluation_scripts.fuzzing.check_unique_crash -i ../fuzz_data/fuzzing-out-1/lstm-trans-3-10/crashes 55 | ``` 56 | 57 | ### If you would like to use Deepsteller in your research, please cite our FSE'19 paper: 58 | 59 | ```shell script 60 | @inproceedings{10.1145/3338906.3338954, 61 | author = {Du, Xiaoning and Xie, Xiaofei and Li, Yi and Ma, Lei and Liu, Yang and Zhao, Jianjun}, 62 | title = {DeepStellar: Model-Based Quantitative Analysis of Stateful Deep Learning Systems}, 63 | year = {2019}, 64 | booktitle = {Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering}, 65 | pages = {477–487}, 66 | series = {ESEC/FSE 2019} 67 | } 68 | ``` 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /fuzzer/image_queue.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | from fuzzer.lib.queue import FuzzQueue 4 | from fuzzer.lib.queue import Seed 5 | 6 | 7 | class ImageInputCorpus(FuzzQueue): 8 | """Class that holds inputs and associated coverage.""" 9 | 10 | def __init__(self, outdir, israndom, sample_function, cov_num, criteria): 11 | """Init the class. 12 | 13 | Args: 14 | seed_corpus: a list of numpy arrays, one for each input tensor in the 15 | fuzzing process. 16 | sample_function: a function that looks at the whole current corpus and 17 | samples the next element to mutate in the fuzzing loop. 18 | Returns: 19 | Initialized object. 20 | """ 21 | FuzzQueue.__init__(self, outdir, israndom, sample_function, cov_num, criteria) 22 | 23 | self.loopup = {} 24 | self.loopup[0] = 0 25 | self.loopup[1] = 1 26 | self.loopup.update(self.loopup.fromkeys(range(2, 51), 2)) 27 | self.loopup.update(self.loopup.fromkeys(range(51, 151), 4)) 28 | self.loopup.update(self.loopup.fromkeys(range(151, 256), 128)) 29 | 30 | def save_if_interesting(self, seed, data, crash, dry_run=False, suffix=None): 31 | """Adds item to corpus if it exercises new coverage.""" 32 | 33 | def class_loop_up(x): 34 | return self.loopup[x] 35 | 36 | self.mutations_processed += 1 37 | current_time = time.time() 38 | if dry_run: 39 | coverage = self.compute_cov() 40 | self.dry_run_cov = coverage 41 | if current_time - self.log_time > 2: 42 | self.log_time = current_time 43 | self.log() 44 | describe_op = "src:%06d" % (seed.parent.id) if suffix is None else "src:%s" % (suffix) 45 | 46 | if crash: 47 | fn = "%s/crashes/id:%06d,%s.npy" % (self.out_dir, self.uniq_crashes, describe_op) 48 | self.uniq_crashes += 1 49 | self.last_crash_time = current_time 50 | else: 51 | fn = "%s/queue/id:%06d,%s.npy" % (self.out_dir, self.total_queue, describe_op) 52 | if self.has_new_bits(seed) or dry_run: 53 | self.last_reg_time = current_time 54 | if self.sample_type != 'random2' or dry_run: 55 | seed.queue_time = current_time 56 | seed.id = self.total_queue 57 | seed.fname = fn 58 | seed.probability = self.REG_INIT_PROB 59 | self.queue.append(seed) 60 | del seed.coverage 61 | else: 62 | del seed 63 | self.total_queue += 1 64 | else: 65 | del seed 66 | return False 67 | np.save(fn, data) 68 | return True 69 | -------------------------------------------------------------------------------- /fuzzer/lib/fuzzer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import gc 3 | 4 | 5 | class Fuzzer(object): 6 | """Class representing the fuzzer itself.""" 7 | 8 | def __init__( 9 | self, 10 | corpus, 11 | coverage_function, 12 | metadata_function, 13 | objective_function, 14 | mutation_function, 15 | fetch_function, 16 | iterate_function, 17 | plot=True 18 | ): 19 | """Init the class. 20 | 21 | Args: 22 | corpus: An InputCorpus object. 23 | coverage_function: a function that does CorpusElement -> Coverage. 24 | metadata_function: a function that does CorpusElement -> Metadata. 25 | objective_function: a function that checks if a CorpusElement satisifies 26 | the fuzzing objective (e.g. find a NaN, find a misclassification, etc). 27 | mutation_function: a function that does CorpusElement -> Metadata. 28 | fetch_function: grabs numpy arrays from the TF runtime using the relevant 29 | tensors. 30 | Returns: 31 | Initialized object. 32 | """ 33 | self.plot = plot 34 | self.queue = corpus 35 | self.coverage_function = coverage_function 36 | self.metadata_function = metadata_function 37 | self.objective_function = objective_function 38 | self.mutation_function = mutation_function 39 | self.fetch_function = fetch_function 40 | self.iterate_function = iterate_function 41 | 42 | def loop(self, iterations): 43 | """Fuzzes a machine learning model in a loop, making *iterations* steps.""" 44 | iteration = 0 45 | while True: 46 | 47 | if len(self.queue.queue) < 1 or iteration >= iterations: 48 | break 49 | if iteration % 100 == 0: 50 | tf.logging.info("fuzzing iteration: %s", iteration) 51 | gc.collect() 52 | 53 | parent = self.queue.select_next() 54 | # Get a mutated batch for each input tensor 55 | mutated_data_batches = self.mutation_function(parent) 56 | # Grab the coverage and metadata for mutated batch from the TF runtime. 57 | coverage_batches, metadata_batches = self.fetch_function( 58 | mutated_data_batches 59 | ) 60 | if self.plot: 61 | self.queue.plot_log(iteration) 62 | 63 | if coverage_batches is not None and len(coverage_batches) > 0: 64 | # Get the coverage - one from each batch element 65 | mutated_coverage_list = self.coverage_function(coverage_batches) 66 | 67 | # Get the metadata objects - one from each batch element 68 | mutated_metadata_list = self.metadata_function(metadata_batches) 69 | 70 | # Check for new coverage and create new corpus elements if necessary. 71 | # pylint: disable=consider-using-enumerate 72 | 73 | bug_found, cov_inc = self.iterate_function(self.queue, parent.root_seed, parent, mutated_coverage_list, 74 | mutated_data_batches, mutated_metadata_list, 75 | self.objective_function) 76 | del mutated_coverage_list 77 | del mutated_metadata_list 78 | else: 79 | bug_found = False 80 | cov_inc = False 81 | 82 | self.queue.fuzzer_handler(iteration, parent, bug_found, cov_inc) 83 | iteration += 1 84 | 85 | del mutated_data_batches 86 | del coverage_batches 87 | del metadata_batches 88 | 89 | self.queue.write_logs() 90 | return None 91 | -------------------------------------------------------------------------------- /mnist_demo/mnist_lstm.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import os 3 | from AbstractRNNClassifier import AbstractRNNClassifier 4 | from keras.datasets import mnist 5 | from keras.models import Sequential 6 | from keras.layers import Input, Lambda, LSTM, Dense 7 | from keras.models import load_model 8 | from keras.models import Model 9 | import numpy as np 10 | 11 | 12 | class MnistLSTMClassifier(AbstractRNNClassifier): 13 | def __init__(self): 14 | # Classifier 15 | self.time_steps = 28 # timesteps to unroll 16 | self.n_units = 128 # hidden LSTM units 17 | self.n_inputs = 28 # rows of 28 pixels (an mnist img is 28x28) 18 | self.n_classes = 10 # mnist classes/labels (0-9) 19 | self.batch_size = 128 # Size of each batch 20 | self.n_epochs = 20 21 | 22 | def create_model(self): 23 | self.model = Sequential() 24 | self.model.add(LSTM(self.n_units, input_shape=(self.time_steps, self.n_inputs))) 25 | self.model.add(Dense(self.n_classes, activation='softmax')) 26 | 27 | self.model.compile(loss='categorical_crossentropy', 28 | optimizer='rmsprop', 29 | metrics=['accuracy']) 30 | # self.model.summary() 31 | 32 | def load_hidden_state_model(self, model_path): 33 | """ 34 | return the rnn model with return_sequence enabled. 35 | """ 36 | input = Input(shape=(self.time_steps, self.n_inputs)) 37 | lstm = LSTM(self.n_units, input_shape=(self.time_steps, self.n_inputs), return_sequences=True)(input) 38 | last_timestep = Lambda(lambda x: x[:, -1, :])(lstm) 39 | dense = Dense(10, activation='softmax')(last_timestep) 40 | model = Model(inputs=input, outputs=[dense, lstm]) 41 | model.load_weights(model_path) 42 | self.model = model 43 | 44 | def train(self, save_path): 45 | self.create_model() 46 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 47 | 48 | x_train = self.input_preprocess(x_train) 49 | x_test = self.input_preprocess(x_test) 50 | 51 | y_test = keras.utils.to_categorical(y_test, num_classes=10) 52 | y_train = keras.utils.to_categorical(y_train, num_classes=10) 53 | 54 | self.model.fit(x_train, y_train, validation_data=(x_test, y_test), 55 | batch_size=self.batch_size, epochs=self.n_epochs, shuffle=False) 56 | 57 | os.makedirs(save_path, exist_ok=True) 58 | self.model.save(os.path.join(save_path, "model.h5")) 59 | 60 | def evaluate(self, model=None): 61 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 62 | 63 | x_test = self.input_preprocess(x_test) 64 | y_test = keras.utils.to_categorical(y_test, num_classes=10) 65 | 66 | model = load_model(model) if model else self.model 67 | test_loss = model.evaluate(x_test, y_test) 68 | print(test_loss) 69 | 70 | def input_preprocess(self, data): 71 | data = data.reshape(data.shape[0], self.n_inputs, self.n_inputs) 72 | data = data.astype('float32') 73 | data /= 255 74 | return data 75 | 76 | def profile_train_data(self, profile_save_path): 77 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 78 | x_train = self.input_preprocess(x_train) 79 | output = self.model.predict(x_train) 80 | cls = np.argmax(output[0], axis=1) 81 | correct_idx = np.where(cls == y_train)[0] 82 | os.makedirs(profile_save_path, exist_ok=True) 83 | states_correct = output[1][correct_idx] 84 | np.save(os.path.join(profile_save_path, "states_profile.npy"), states_correct) 85 | 86 | def get_state_profile(self, inputs): 87 | inputs = self.input_preprocess(inputs) 88 | output = self.model.predict(inputs) 89 | return output[1] 90 | 91 | 92 | if __name__ == "__main__": 93 | save_path = "test/rnn_model" 94 | 95 | lstm_classifier = MnistLSTMClassifier() 96 | # train an rnn model 97 | lstm_classifier.create_model() 98 | lstm_classifier.train(save_path) 99 | lstm_classifier.evaluate() 100 | 101 | # Load a trained model with return_sequence enabled. 102 | # profile_path = "test/output/profile_save" 103 | # lstm_classifier.load_hidden_state_model(os.path.join(save_path, "model.h5")) 104 | # lstm_classifier.profile_train_data(profile_path) 105 | -------------------------------------------------------------------------------- /Abstraction/GraphWrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from Abstraction.DTMCGraph import DTMCGraph 3 | import json 4 | 5 | 6 | class GraphWrapper: 7 | def __init__(self, stateAbst, fake_initial=-1): 8 | self.graph = DTMCGraph(fake_initial) 9 | self.stateAbst = stateAbst 10 | 11 | def build_model(self, label_dir=None): 12 | """ 13 | Build model for a specific configuration 14 | :label_dir: file of the label profiling, currently not used. 15 | """ 16 | pca_fit = self.stateAbst.get_pca_trans_data() 17 | 18 | if label_dir: 19 | with open(label_dir) as f: 20 | translation_all = json.load(f) 21 | else: 22 | translation_all = None 23 | 24 | if translation_all: # if with labels 25 | for i in range(len(pca_fit)): 26 | seq = pca_fit[i] 27 | trans = translation_all[i] 28 | assert len(seq) == len(trans) 29 | self.build_step(seq, trans) 30 | else: # if without labels 31 | for i in range(len(pca_fit)): 32 | seq = pca_fit[i] 33 | self.build_step(seq, None) 34 | # break 35 | # del pca_fit 36 | # del translation_all 37 | # self.graph.draw_graph("0", "DTMC") 38 | # g_warp.graph.transitions = None 39 | self.extend_to_k_step() # extend the graph to the steps 40 | self.graph.init_k_step_idx(self.stateAbst.n_step) 41 | # g_warp.visit_graph('', [0]*500, 'k-step') 42 | # g_warp.visit_graph(pca_fit[0], [0]*2000, 'transition') 43 | # os.makedirs(save2folder, exist_ok=True) 44 | 45 | def build_step(self, seq, labels=None): 46 | """ 47 | Add a sequence of state vectors to the graph, the vectors are usually transformed by PCA model 48 | :param seq: the sequence of state vectors 49 | :param labels: labels for the transitions, currently not used 50 | """ 51 | transition_seq_name = self.stateAbst.data_transform(seq) # make abstraction without PCA transformation 52 | if labels is None: 53 | labels = ['-']*len(seq) 54 | self.graph.add_ordered_transitions(transition_seq_name, labels) 55 | del transition_seq_name 56 | 57 | def extend_to_k_step(self): 58 | """ 59 | Extend the graph to k step states 60 | """ 61 | if self.stateAbst.n_step <= 0: 62 | return 63 | moves = enumerate_manhattan(self.stateAbst.dimension, self.stateAbst.n_step) 64 | step_out_dic = {} 65 | for state_name, _ in self.graph.states.items(): 66 | if state_name != -1: 67 | decoded_vec = self.stateAbst.coder.decode(state_name) 68 | for move in moves: 69 | step_out = list(np.array(decoded_vec)+np.array(move)) 70 | step_out = self.stateAbst.coder.encode(step_out) 71 | step = abs_sum(move) 72 | if step_out in step_out_dic: 73 | if step_out_dic[step_out] > step: 74 | step_out_dic[step_out] = step 75 | else: 76 | step_out_dic[step_out] = step 77 | step_out_seq = [] 78 | step_seq = [] 79 | for step_out, step in step_out_dic.items(): 80 | step_out_seq.append(step_out) 81 | step_seq.append(step) 82 | 83 | self.graph.add_other_states(step_out_seq, step_seq) 84 | 85 | def visit_graph(self, state_seq, cnt_states, mode, return_set=False): 86 | """ 87 | Update the coverage for a specific sequence 88 | :param state_seq: the state vector sequence 89 | :param cnt_states: current coverage 90 | :param mode: which coverage criteria 91 | :param return_set: whether to return the set of covered state/transition id 92 | :return: the cnt_states will be updated 93 | """ 94 | transition_seq_name = self.stateAbst.data_transform(state_seq, pca_transform=True) 95 | if mode == 'state': 96 | self.graph.to_cover_major_states(transition_seq_name, cnt_states, return_set=return_set) 97 | elif mode == 'k-step': 98 | self.graph.to_cover_k_step(transition_seq_name, cnt_states, return_set=return_set) 99 | elif mode == 'transition': 100 | self.graph.to_cover_transitions(transition_seq_name, cnt_states, return_set=return_set) 101 | 102 | 103 | def enumerate_manhattan(dim, k): 104 | """ 105 | :param dim: dimension of the space 106 | :param k: max step-out 107 | :return: the set of all possible moves with in k steps 108 | """ 109 | vec = [0] * dim 110 | covered_list = [] 111 | queue = [vec] 112 | while queue: 113 | cur_vec = queue.pop(0) 114 | if cur_vec not in covered_list: 115 | covered_list.append(cur_vec) 116 | for i in range(len(cur_vec)): 117 | tmp = cur_vec.copy() 118 | tmp[i] += 1 119 | if abs_sum(tmp) <= k: 120 | queue.append(tmp) 121 | tmp = cur_vec.copy() 122 | tmp[i] -= 1 123 | if abs_sum(tmp) <= k: 124 | queue.append(tmp) 125 | covered_list.remove(vec) 126 | return covered_list 127 | 128 | 129 | def abs_sum(vec): 130 | return sum([abs(i) for i in vec]) 131 | -------------------------------------------------------------------------------- /Abstraction/StateAbstraction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.decomposition import PCA 3 | import time 4 | import joblib 5 | import _pickle as pickle 6 | import os 7 | from Abstraction.Coder import Coder 8 | 9 | 10 | class StateAbstraction: 11 | def __init__(self, state_profile_folder, comp_num, bits, par_k, n_step): 12 | self.state_profile_folder = state_profile_folder 13 | self.comp_num = comp_num 14 | self.profile_file_list = get_all_file(state_profile_folder) 15 | self.cache_dir = os.path.join(state_profile_folder, "cache") 16 | self.pca_trans_dir = os.path.join(state_profile_folder, "pca_trans") 17 | self.pca_model_f = os.path.join(self.cache_dir, 'pca_model_cmp_%s.joblib' % self.comp_num) 18 | self.diag_matrix_f = os.path.join(self.cache_dir, 'diag_matrix.npy') 19 | self.min_array_f = os.path.join(self.cache_dir, 'min_array.npy') 20 | 21 | if not os.path.exists(self.cache_dir): 22 | os.makedirs(self.cache_dir, exist_ok=True) 23 | os.makedirs(self.pca_trans_dir, exist_ok=True) 24 | self.pca_fit() 25 | self.pca_trans() 26 | self.get_quantization_matrix() 27 | 28 | self.pca_model = joblib.load(self.pca_model_f) 29 | self.diag_matrix = np.load(self.diag_matrix_f) 30 | self.min_array = np.load(self.min_array_f) 31 | 32 | self.bits = bits 33 | self.par_k = par_k 34 | self.dimension = len(par_k) 35 | self.n_step = n_step 36 | self.min_array = self.min_array[range(self.dimension)] # tailor to the dimension 37 | self.diag_matrix = self.diag_matrix[:, range(self.dimension)][range(self.dimension), :] # tailor to the dimension 38 | self.diag_matrix = self.diag_matrix.dot(np.diag(par_k)) # prepare par_k/range 39 | self.coder = Coder(bits, self.dimension) # init a Coder for the encoding and decoding 40 | 41 | def pca_fit(self): 42 | """ 43 | Read data from the data_repo and calculate the first comp_num principal components. 44 | For choose to sample the data before fitting PCA model 45 | """ 46 | 47 | # read from data repo 48 | all_sample_data = [] 49 | for f in self.profile_file_list: 50 | sample_chunk = np.load(os.path.join(self.state_profile_folder, f)) 51 | all_sample_data.extend(sample_chunk) 52 | 53 | # fitting PCA model and save the model to the 'cache' folder under the data_repo 54 | start = time.time() 55 | pca = PCA(n_components=self.comp_num, copy=False) 56 | pca.fit(np.array([e for l in all_sample_data for e in l])) 57 | joblib.dump(pca, self.pca_model_f) 58 | print('pca fitting used %s ...' % (time.time() - start)) 59 | 60 | def pca_trans(self): 61 | """ 62 | Transform all the data with the PCA model and save the transformed data to pca_trans folder inside the repo folder 63 | """ 64 | pca = joblib.load(self.pca_model_f) 65 | for f in self.profile_file_list: 66 | sample_chunk = np.load(os.path.join(self.state_profile_folder, f)) 67 | sample_chunk_pca = [] 68 | for sample in sample_chunk: 69 | sample_pca = pca.transform(np.array(sample)) 70 | sample_chunk_pca.append(sample_pca) 71 | np.save(os.path.join(self.pca_trans_dir, f), sample_chunk_pca) 72 | print('pca_trans finished.') 73 | 74 | def get_quantization_matrix(self): 75 | """ 76 | Read the PCA-transformed data, and calculate the auxiliary matrix for quantization 77 | """ 78 | fit_data = self.get_pca_trans_data() 79 | fit_data = np.array([s for seq in fit_data for s in seq]) 80 | print('fit data shape:') 81 | print(fit_data.shape) 82 | 83 | diag_array = [] # holding the reciprocal of each dimension on the diagonal 84 | min_array = [] # holding the minimum value of each dimension 85 | for i in range(fit_data.shape[1]): 86 | proj_i = [e[i] for e in fit_data] 87 | diag_array.append(1 / (max(proj_i) - min(proj_i))) 88 | min_array.append(min(proj_i)) 89 | # print('%s--%s' % (min(proj_i), max(proj_i))) 90 | diag_matrix = np.diag(diag_array) 91 | 92 | np.save(self.diag_matrix_f, diag_matrix) 93 | np.save(self.min_array_f, min_array) 94 | 95 | def data_transform(self, seq, pca_transform=False): 96 | """ 97 | return the sequence of abstracted state name 98 | """ 99 | if pca_transform: 100 | seq = self.pca_model.transform(np.array(seq)) 101 | seq = seq[:, range(self.dimension)] # take the dimension 102 | my_min = np.repeat(self.min_array, len(seq)) 103 | my_min = my_min.reshape(self.dimension, len(seq)).transpose() 104 | seq = seq - my_min # each vector minus the lower bound 105 | pca_fit_partition = np.floor(seq.dot(self.diag_matrix)).astype(int) # (vec-min)/(range/par_k) and take the floor value 106 | pca_fit_partition = pca_fit_partition + self.n_step # to avoid negative encoding 107 | transition_seq_name = [self.coder.encode(a) for a in pca_fit_partition] # encode the abstracted vectors 108 | # print(transition_seq_name) 109 | # transition_seq_name = [self.fake_initial] + transition_seq_name # fake initial as starting state 110 | del my_min 111 | del seq 112 | del pca_fit_partition 113 | return transition_seq_name 114 | 115 | def pca_transform(self, seq): 116 | return self.pca_model.transform(np.array(seq[0])) 117 | 118 | def get_pca_trans_data(self): 119 | pca_fit = [] 120 | data_fs = get_all_file(self.pca_trans_dir) 121 | for f in data_fs: 122 | chunk = np.load(os.path.join(self.pca_trans_dir, f)) 123 | pca_fit.extend(chunk) 124 | # break 125 | return pca_fit 126 | 127 | 128 | def get_all_file(target_dir): 129 | """ 130 | A util function to return all files under a dir 131 | :param target_dir: the target folder 132 | :return: the set of files with name 133 | """ 134 | onlyfiles = [f for f in os.listdir(target_dir) if os.path.isfile(os.path.join(target_dir, f))] 135 | return onlyfiles 136 | 137 | 138 | def load_graph_pkl(pkl_dir): 139 | with open(pkl_dir, 'rb') as f: 140 | g = pickle.load(f) 141 | return g 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /fuzzer/image_fuzzer.py: -------------------------------------------------------------------------------- 1 | import argparse, pickle 2 | import shutil 3 | 4 | import tensorflow as tf 5 | import os 6 | from coverage import Coverage 7 | from keras.applications.vgg16 import preprocess_input 8 | import random 9 | import time 10 | import numpy as np 11 | from fuzzer.image_queue import ImageInputCorpus 12 | from fuzzer.fuzzone import build_fetch_function 13 | 14 | from fuzzer.lib.fuzzer import Fuzzer 15 | from fuzzer.mutators import Mutators 16 | from fuzzer.image_queue import Seed 17 | from mnist_demo.mnist_lstm import MnistLSTMClassifier 18 | 19 | 20 | def imagenet_preprocessing(input_img_data): 21 | temp = np.copy(input_img_data) 22 | temp = np.float32(temp) 23 | qq = preprocess_input(temp) # final input shape = (1,224,224,3) 24 | return qq 25 | 26 | 27 | def mnist_preprocessing(x_test): 28 | temp = np.copy(x_test) 29 | temp = temp.reshape(temp.shape[0], 28, 28) 30 | temp = temp.astype('float32') 31 | temp /= 255 32 | return temp 33 | 34 | 35 | def cifar_preprocessing(x_test): 36 | temp = np.copy(x_test) 37 | temp = temp.astype('float32') 38 | mean = [125.307, 122.95, 113.865] 39 | std = [62.9932, 62.0887, 66.7048] 40 | for i in range(3): 41 | temp[:, :, :, i] = (temp[:, :, :, i] - mean[i]) / std[i] 42 | return temp 43 | 44 | 45 | preprocess_dic = { 46 | 'cifar10': cifar_preprocessing, 47 | 'mnist': mnist_preprocessing, 48 | 'imagenet': imagenet_preprocessing 49 | } 50 | 51 | shape_dic = { 52 | 'cifar10': (32, 32, 3), 53 | 'mnist': (28, 28), 54 | 'imagenet': (224, 224, 3) 55 | } 56 | 57 | execlude_layer_dic = { 58 | 'vgg16': ['input', 'flatten', 'activation', 'batch', 'dropout'], 59 | 'resnet20': ['input', 'flatten', 'activation', 'batch', 'dropout'], 60 | 'lenet1': ['input', 'flatten', 'activation', 'batch', 'dropout'], 61 | 'lenet4': ['input', 'flatten', 'activation', 'batch', 'dropout'], 62 | 'lenet5': ['input', 'flatten', 'activation', 'batch', 'dropout'], 63 | 'mobilenet': ['input', 'flatten', 'padding', 'activation', 'batch', 'dropout', 64 | 'bn', 'reshape', 'relu', 'pool', 'concat', 'softmax', 'fc'], 65 | 'vgg19': ['input', 'flatten', 'padding', 'activation', 'batch', 'dropout', 'bn', 66 | 'reshape', 'relu', 'pool', 'concat', 'softmax', 'fc'], 67 | 'resnet50': ['input', 'flatten', 'padding', 'activation', 'batch', 'dropout', 'bn', 68 | 'reshape', 'relu', 'pool', 'concat', 'add', 'res4', 'res5'] 69 | } 70 | 71 | 72 | def metadata_function(meta_batches): 73 | return meta_batches 74 | 75 | 76 | def image_mutation_function(batch_num, deeptest=False): 77 | def func(seed): 78 | if deeptest: 79 | return Mutators.image_random_mutate(seed, batch_num) 80 | else: 81 | return Mutators.image_random_mutate(seed, batch_num) 82 | 83 | return func 84 | 85 | 86 | def objective_function(seed): 87 | """Checks if the metadata is inf or NaN.""" 88 | metadata = seed.metadata 89 | ground_truth = seed.ground_truth 90 | return metadata[0] != ground_truth 91 | 92 | 93 | def iterate_function(): 94 | def func(queue, root_seed, parent, mutated_coverage_list, mutated_data_batches, mutated_metadata_list, 95 | objective_function): 96 | ori_batches, batches, cl_batches = mutated_data_batches 97 | successed = False 98 | bug_found = False 99 | for idx in range(len(mutated_coverage_list)): 100 | # 1000 for placeholder 101 | input = Seed(cl_batches[idx], 1000, mutated_coverage_list[idx], root_seed, parent, 102 | mutated_metadata_list[:, idx], 103 | parent.ground_truth) 104 | is_adv = objective_function(input) 105 | if is_adv: 106 | suf = 'g_' + str(input.ground_truth) + 'c_' + str(input.metadata[0]) + '-' + root_seed 107 | queue.save_if_interesting(input, batches[idx], True, suffix=suf) 108 | else: 109 | new_img = np.append(ori_batches[idx:idx + 1], batches[idx:idx + 1], axis=0) 110 | successed = queue.save_if_interesting(input, new_img, False) or successed 111 | return bug_found, successed 112 | 113 | return func 114 | 115 | 116 | def dry_run(indir, fetch_function, coverage_function, queue): 117 | seed_lis = os.listdir(indir) 118 | if len(seed_lis) == 0: 119 | print('Empty dir') 120 | exit(0) 121 | for seed_name in seed_lis: 122 | tf.logging.info("Attempting dry run with '%s'...", seed_name) 123 | path = os.path.join(indir, seed_name) 124 | img = np.load(path) 125 | # input_batches = img 126 | coverage_batches, metadata_batches = fetch_function((0, img, 0)) 127 | coverage_list = coverage_function(coverage_batches) 128 | metadata_list = metadata_function(metadata_batches) 129 | input = Seed(0, 1000, coverage_list[0], seed_name, None, metadata_list[0][0], metadata_list[0][0]) 130 | new_img = np.append(img, img, axis=0) 131 | queue.save_if_interesting(input, new_img, False, True, seed_name) 132 | 133 | 134 | if __name__ == '__main__': 135 | 136 | start_time = time.time() 137 | # Log more 138 | tf.logging.set_verbosity(tf.logging.INFO) 139 | random.seed(time.time()) 140 | 141 | parser = argparse.ArgumentParser(description='coverage guided fuzzing') 142 | 143 | parser.add_argument('-i', help='input seed dir') 144 | parser.add_argument('-o', help='seed output') 145 | 146 | parser.add_argument('-model_type', help="target model fuzz", choices=['mnist', 'cifar10', 'imagenet']) 147 | parser.add_argument('-dl_model', help="path to the dl model", required=True) 148 | parser.add_argument('-criteria', help="set the criteria to guide", 149 | choices=['state', 'k-step', 'transition'], default='state') 150 | parser.add_argument('-k_step', help="how many outer step to check", type=int, default=0) 151 | parser.add_argument('-batch_num', help="set mutation batch number", type=int, default=20) 152 | parser.add_argument('-iterations', help="total regression tests tried", type=int, default=10000000) 153 | parser.add_argument('-cri_parameter', help="set the parameter of criteria", type=float) 154 | parser.add_argument('-quantize', help="fuzzer for quantize", default=0, type=int) 155 | parser.add_argument('-quantize_models', help="fuzzer for quantize") 156 | parser.add_argument('-random', help="set mutation batch number", type=int, default=0) 157 | parser.add_argument('-select', help="select next", 158 | choices=['random2', 'random', 'tensorfuzz', 'deeptest', 'deeptest2', 'prob'], default='prob') 159 | parser.add_argument('-pkl_path', help='pkl path') 160 | 161 | args = parser.parse_args() 162 | 163 | if os.path.exists(args.o): 164 | shutil.rmtree(args.o) 165 | os.makedirs(os.path.join(args.o, 'queue')) 166 | os.makedirs(os.path.join(args.o, 'crashes')) 167 | 168 | lstm_classifier = MnistLSTMClassifier() 169 | lstm_classifier.load_hidden_state_model(args.dl_model) 170 | model = lstm_classifier.model 171 | preprocess = preprocess_dic[args.model_type] 172 | 173 | coverage_handler = Coverage(args.pkl_path, args.criteria, args.k_step) 174 | 175 | plot_file = open(os.path.join(args.o, 'plot.log'), 'a+') 176 | 177 | fetch_function_1 = build_fetch_function(model, preprocess) 178 | 179 | dry_run_fetch = build_fetch_function(model, preprocess) 180 | 181 | coverage_function = coverage_handler.update_coverage 182 | 183 | mutation_function = image_mutation_function(args.batch_num) 184 | 185 | queue = ImageInputCorpus(args.o, args.random, args.select, coverage_handler.total_size, args.criteria) 186 | 187 | dry_run(args.i, dry_run_fetch, coverage_function, queue) 188 | 189 | image_iterate_function = iterate_function() 190 | 191 | fuzzer = Fuzzer(queue, coverage_function, metadata_function, objective_function, mutation_function, 192 | fetch_function_1, image_iterate_function, args.select) 193 | 194 | fuzzer.loop(args.iterations) 195 | # queue.log() 196 | 197 | print('finish', time.time() - start_time) 198 | -------------------------------------------------------------------------------- /fuzzer/lib/queue.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | from random import randint 4 | import tensorflow as tf 5 | import datetime 6 | import random 7 | import os 8 | 9 | 10 | class Seed(object): 11 | """Class representing a single element of a corpus.""" 12 | 13 | def __init__(self, cl, space, coverage, root_seed, parent, metadata, ground_truth): 14 | """Inits the object. 15 | 16 | Args: 17 | data: a list of numpy arrays representing the mutated data. 18 | metadata: arbitrary python object to be used by the fuzzer for e.g. 19 | computing the objective function during the fuzzing loop. 20 | coverage: an arbitrary hashable python object that guides fuzzing process. 21 | parent: a reference to the CorpusElement this element is a mutation of. 22 | iteration: the fuzzing iteration (number of CorpusElements sampled to 23 | mutate) that this CorpusElement was created at. 24 | Returns: 25 | Initialized object. 26 | """ 27 | 28 | self.clss = cl 29 | self.metadata = metadata 30 | self.parent = parent 31 | self.root_seed = root_seed 32 | self.coverage = coverage 33 | self.queue_time = None 34 | self.id = None 35 | self.probability = 0.8 36 | self.fuzzed_time = 0 37 | 38 | self.ground_truth = ground_truth 39 | self.space = space 40 | 41 | 42 | class FuzzQueue(object): 43 | """Class that holds inputs and associated coverage.""" 44 | 45 | def __init__(self, outdir, is_random, sample_type, cov_num, criteria): 46 | """Init the class. 47 | 48 | Args: 49 | seed_corpus: a list of numpy arrays, one for each input tensor in the 50 | fuzzing process. 51 | sample_function: a function that looks at the whole current corpus and 52 | samples the next element to mutate in the fuzzing loop. 53 | Returns: 54 | Initialized object. 55 | """ 56 | 57 | # care about the close 58 | self.plot_file = open(os.path.join(outdir, 'plot.log'), 'a+') 59 | self.out_dir = outdir 60 | self.mutations_processed = 0 61 | self.queue = [] 62 | self.sample_type = sample_type 63 | self.start_time = time.time() 64 | 65 | self.random = is_random 66 | self.criteria = criteria 67 | 68 | self.log_time = time.time() 69 | self.virgin_bits = np.full(cov_num, 0xFF, dtype=np.uint8) 70 | self.adv_bits = np.full(cov_num, 0xFF, dtype=np.uint8) 71 | self.uniq_crashes = 0 72 | self.total_cov = cov_num 73 | self.last_crash_time = self.start_time 74 | self.last_reg_time = self.start_time 75 | 76 | self.total_queue = 0 77 | 78 | self.dry_run_cov = None 79 | self.current_id = 0 80 | self.seed_attacked = set() 81 | self.seed_attacked_first_time = dict() 82 | 83 | self.REG_GAMMA = 5 84 | self.REG_MIN = 0.3 85 | self.REG_INIT_PROB = 0.8 86 | 87 | def has_new_bits(self, seed): 88 | 89 | temp = np.invert(seed.coverage, dtype=np.uint8) 90 | cur = np.bitwise_and(self.virgin_bits, temp) 91 | has_new = not np.array_equal(cur, self.virgin_bits) 92 | if has_new: 93 | self.virgin_bits = cur 94 | return has_new or self.random 95 | 96 | def plot_log(self, id): 97 | 98 | queue_len = len(self.queue) 99 | coverage = self.compute_cov() 100 | current_time = time.time() 101 | self.plot_file.write( 102 | "%d,%d,%d,%s,%s,%d,%d,%s,%s\n" % 103 | (time.time(), 104 | id, 105 | queue_len, 106 | self.dry_run_cov, 107 | coverage, 108 | self.uniq_crashes, 109 | len(self.seed_attacked), 110 | self.mutations_processed, 111 | round(float(self.mutations_processed) / (current_time - self.start_time), 2) 112 | )) 113 | self.plot_file.flush() 114 | 115 | def write_logs(self): 116 | log_file = open(os.path.join(self.out_dir, 'fuzz.log'), 'w+') 117 | for k in self.seed_attacked_first_time: 118 | log_file.write("%s:%s\n" % (k, self.seed_attacked_first_time[k])) 119 | log_file.close() 120 | self.plot_file.close() 121 | 122 | def log(self): 123 | queue_len = len(self.queue) 124 | coverage = self.compute_cov() 125 | current_time = time.time() 126 | tf.logging.info( 127 | "criteria %s | corpus_size %s | crashes_size %s | mutations_per_second: %s | total_exces %s | last new reg: %s | last new adv %s | coverage: %s -> %s%%", 128 | self.criteria, 129 | queue_len, 130 | self.uniq_crashes, 131 | round(float(self.mutations_processed) / (current_time - self.start_time), 2), 132 | self.mutations_processed, 133 | datetime.timedelta(seconds=(time.time() - self.last_reg_time)), 134 | datetime.timedelta(seconds=(time.time() - self.last_crash_time)), 135 | self.dry_run_cov, 136 | coverage 137 | ) 138 | 139 | def compute_cov(self): 140 | 141 | coverage = round(float(self.total_cov - np.count_nonzero(self.virgin_bits == 0xFF)) * 100 / self.total_cov, 2) 142 | return str(coverage) 143 | 144 | def tensorfuzz(self): 145 | """Grabs new input from corpus according to sample_function.""" 146 | # choice = self.sample_function(self) 147 | corpus = self.queue 148 | reservoir = corpus[-5:] + [random.choice(corpus)] 149 | choice = random.choice(reservoir) 150 | return choice 151 | # return random.choice(self.queue) 152 | 153 | def select_next(self): 154 | if self.sample_type == 'random' or self.sample_type == 'random2' or self.sample_type == 'ran_save': # ran_save is to random and save all mutants 155 | return self.random_select() 156 | elif self.sample_type == 'tensorfuzz': 157 | return self.tensorfuzz() 158 | elif self.sample_type == 'deeptest': 159 | return self.deeptest_next() 160 | elif self.sample_type == 'deeptest2': 161 | return self.deeptest_next2() 162 | elif self.sample_type == 'prob': 163 | return self.prob_next() 164 | 165 | def random_select(self): 166 | """Grabs new input from corpus according to sample_function.""" 167 | # choice = self.sample_function(self) 168 | 169 | return random.choice(self.queue) 170 | 171 | def deeptest_next(self): 172 | choice = self.queue[-1] 173 | return choice 174 | 175 | def fuzzer_handler(self, iteration, cur_seed, bug_found, coverage_inc): 176 | if self.sample_type == 'deeptest' and not coverage_inc: 177 | self.queue.pop() 178 | elif self.sample_type == 'prob' and not bug_found and not coverage_inc: 179 | if cur_seed.probability > self.REG_MIN and cur_seed.fuzzed_time < self.REG_GAMMA * (1 - self.REG_MIN): 180 | cur_seed.probability = self.REG_INIT_PROB - float(cur_seed.fuzzed_time) / self.REG_GAMMA 181 | 182 | if bug_found: 183 | self.seed_attacked.add(cur_seed.root_seed) 184 | if not (cur_seed.parent in self.seed_attacked_first_time): 185 | self.seed_attacked_first_time[cur_seed.root_seed] = iteration 186 | 187 | def deeptest_next2(self): 188 | if self.current_id == len(self.queue): 189 | self.current_id = 0 190 | choice = self.queue[self.current_id] 191 | self.current_id += 1 192 | return choice 193 | 194 | def prob_next(self): 195 | """Grabs new input from corpus according to sample_function.""" 196 | # choice = self.sample_function(self) 197 | while True: 198 | if self.current_id == len(self.queue): 199 | self.current_id = 0 200 | 201 | cur_seed = self.queue[self.current_id] 202 | if cur_seed.space > 0 and randint(0, 100) < cur_seed.probability * 100: 203 | # if cur_seed.probability > REG_MIN and cur_seed.fuzzed_time < REG_GAMMA * (1-REG_MIN): 204 | # cur_seed.probability = REG_INIT_PROB - float(cur_seed.fuzzed_time)/REG_GAMMA 205 | 206 | cur_seed.fuzzed_time += 1 207 | self.current_id += 1 208 | return cur_seed 209 | else: 210 | self.current_id += 1 211 | -------------------------------------------------------------------------------- /Abstraction/DTMCGraph.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import math 3 | from graphviz import Digraph 4 | import os 5 | 6 | 7 | class State(object): 8 | def __init__(self, name, id): 9 | self.name = name # name is the encoded vector 10 | self.freq = 0 11 | self.id = id 12 | self.section = 0 # to which section the state belongs 13 | 14 | def add_freq(self): 15 | self.freq += 1 16 | 17 | 18 | class Transition(object): 19 | def __init__(self, src, dest, label, id): 20 | self.src = src 21 | self.dest = dest 22 | self.label = label # for input/output label, currently not used 23 | self.freq = 0 24 | self.prob = 0.0 25 | self.id = id 26 | 27 | def add_freq(self): 28 | self.freq += 1 29 | 30 | 31 | class DTMCGraph(object): 32 | def __init__(self, fake_ini): 33 | self.states = OrderedDict() 34 | self.other_states = OrderedDict() 35 | self.transitions = {} 36 | self.fake_ini = fake_ini 37 | self.states[fake_ini] = State(fake_ini, 0) 38 | self.next_transition_id = 0 39 | self.next_state_id = 1 40 | self.k_step_idx = {} # keep a state_name:idx mapping for k-step coverage 41 | 42 | def _add_state(self, state_name): 43 | """ 44 | add a state to the graph 45 | :param state_name: the encoded int64 46 | """ 47 | if state_name not in self.states: 48 | state = State(state_name, self.next_state_id) 49 | self.next_state_id += 1 50 | self.states[state_name] = state 51 | print('STATE ADDED: %s with id %s' % (state_name, state.id)) 52 | else: 53 | print('You are trying to add a duplicate state with name %s' % state_name) 54 | 55 | def _add_other_state(self, state_name, section): 56 | """ 57 | add a k-step state to the graph 58 | :param state_name: the encoded int64 59 | :param section: 1 for 1 step, 2 for step and so on 60 | """ 61 | if state_name not in self.other_states: 62 | state = State(state_name, self.next_state_id) 63 | state.section = section 64 | self.next_state_id += 1 65 | self.other_states[state_name] = state 66 | print('OTHER STATE ADDED: %s with id %s' % (state_name, state.id)) 67 | else: 68 | print('You are tring to add a duplicate state with name %s' % state_name) 69 | 70 | def _add_transition(self, src, dst): 71 | """ 72 | add a transition 73 | :param src: name of the source state 74 | :param dst: name of the destination state 75 | """ 76 | if src not in self.states: 77 | print('ERROR: src state can not be found in the graph.') 78 | return -1 79 | if dst not in self.states: 80 | self._add_state(dst) 81 | 82 | self.states[dst].add_freq() 83 | src = self.states[src].id 84 | dst = self.states[dst].id 85 | 86 | if src in self.transitions: 87 | if dst in self.transitions[src]: 88 | self.transitions[src][dst].add_freq() 89 | else: 90 | trans = Transition(src, dst, '', self.next_transition_id) 91 | print('TRANSITION ADDED: with id %s' % self.next_transition_id) 92 | self.next_transition_id += 1 93 | trans.add_freq() 94 | self.transitions[src][dst] = trans 95 | else: 96 | self.transitions[src] = {} 97 | trans = Transition(src, dst, '', self.next_transition_id) 98 | self.next_transition_id += 1 99 | trans.add_freq() 100 | self.transitions[src][dst] = trans 101 | 102 | def add_ordered_transitions(self, trans_seq, label_seq): 103 | """ 104 | add a set of transitions with a sequence of states 105 | :param trans_seq: sequence of states specifying the transitions 106 | :param output_seq: transition label, but currently not used 107 | """ 108 | trans_seq = [self.fake_ini] + trans_seq 109 | for i in range(len(trans_seq)-1): 110 | src = trans_seq[i] 111 | dest = trans_seq[i+1] 112 | self._add_transition(src, dest) 113 | 114 | def add_other_states(self, state_seq, section_seq): 115 | """ 116 | add a set of other states 117 | :param state_seq: a list of states 118 | :param section_seq: a list of corresponding sections 119 | """ 120 | for i in range(len(state_seq)): 121 | if state_seq[i] not in self.states: 122 | self._add_other_state(state_seq[i], section_seq[i]) 123 | 124 | def cal_trans_prob(self): 125 | for _, state in self.states.items(): 126 | if state.id in self.transitions: 127 | out_trans = self.transitions[state.id] 128 | total = sum([tr.freq for _, tr in out_trans.items()]) 129 | for _, tr in out_trans.items(): 130 | tr.prob = tr.freq/total 131 | 132 | def draw_graph(self, folder, type): 133 | self.cal_trans_prob() 134 | dot = Digraph(comment='RNN state transition graph') 135 | for state in self.states.values(): 136 | dot.node(str(state.id), '%s' % state.id) 137 | for src, dlist in self.transitions.items(): 138 | for dest, transition in dlist.items(): 139 | lab = '%.2f' % transition.prob 140 | dot.edge(str(src), str(dest), label=lab) 141 | dot.render(os.path.join(folder, '%s.gv' % type), view=False) 142 | # print(dot.source) 143 | 144 | def to_cover_major_states(self, transition_seq_name, cnt_states, return_set=False): 145 | """ 146 | update the cnt_states with coverage triggered by the sequence of transitions 147 | :param transition_seq_name: a name sequence of states 148 | :param cnt_states: a coverage vector with same length of the self.states, it is 149 | indexed by the state.id 150 | :param return_set: whether to return the set of ids of covered states/transitions 151 | :return: the cnt_states is updated 152 | """ 153 | for i in range(len(transition_seq_name)-1): 154 | dst = transition_seq_name[i+1] 155 | if dst in self.states: 156 | idx = self.states[dst].id 157 | if not return_set: 158 | num = cnt_states[idx] 159 | if num < 255: 160 | num += 1 161 | cnt_states[idx] = num 162 | else: 163 | cnt_states.append(idx) 164 | 165 | def to_cover_k_step(self, transition_seq_name, cnt_states, return_set=False): 166 | for i in range(len(transition_seq_name)-1): 167 | dst = transition_seq_name[i+1] 168 | if dst in self.k_step_idx: 169 | idx = self.k_step_idx[dst] 170 | if not return_set: 171 | num = cnt_states[idx] 172 | if num < 255: 173 | num += 1 174 | cnt_states[idx] = num 175 | else: 176 | cnt_states.append(idx) 177 | 178 | def init_k_step_idx(self, k): 179 | """ 180 | initialize a mapping between k-step state name and vector index 181 | :param k: maximum step to calculate the coverage, i.e., only consider states within k step 182 | """ 183 | if k <= 0: 184 | return 185 | self.k_step_idx = {} 186 | i = 0 187 | for state_name, state in self.other_states.items(): 188 | if state.section <= k: 189 | self.k_step_idx[state_name] = i 190 | i += 1 191 | 192 | def to_cover_transitions(self, transition_seq_name, cnt_states, return_set=False): 193 | for i in range(len(transition_seq_name)-1): 194 | src = transition_seq_name[i] 195 | dst = transition_seq_name[i+1] 196 | if src not in self.states or dst not in self.states: 197 | continue 198 | src = self.states[src].id 199 | dst = self.states[dst].id 200 | # tran = trans[i] 201 | if src in self.transitions: 202 | if dst in self.transitions[src]: 203 | idx = self.transitions[src][dst].id 204 | if not return_set: 205 | num = cnt_states[idx] 206 | if num < 255: 207 | num += 1 208 | cnt_states[idx] = num 209 | else: 210 | cnt_states.append(idx) 211 | 212 | def get_index_weight_dic(self, type="state", reverse=False): 213 | cri_dic = self.states 214 | if type == "transition": 215 | cri_dic = dict() 216 | for src, entry in self.transitions.items(): 217 | for dst, tran in entry.items(): 218 | cri_dic[tran.id] = tran 219 | 220 | total = 0 221 | for name, entry in cri_dic.items(): 222 | total += entry.freq 223 | # print(self.next_transition_id) 224 | result = dict() 225 | for name, entry in cri_dic.items(): 226 | result[entry.id] = entry.freq/total 227 | 228 | if not reverse: 229 | return result 230 | else: 231 | rev_dic = dict() 232 | for k, w in result.items(): 233 | rev_dic[k] = (1-w)/(self.next_transition_id-1) 234 | return rev_dic 235 | 236 | def get_major_states_num(self): 237 | return len(self.states) 238 | 239 | def get_transition_num(self): 240 | return self.next_transition_id 241 | 242 | def get_k_step_states_num(self): 243 | return len(self.k_step_idx) -------------------------------------------------------------------------------- /fuzzer/mutators.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import cv2 3 | import numpy as np 4 | import random 5 | import time 6 | import copy 7 | 8 | 9 | class Mutators(): 10 | def image_translation(img, params): 11 | 12 | rows, cols, ch = img.shape 13 | # rows, cols = img.shape 14 | 15 | # M = np.float32([[1, 0, params[0]], [0, 1, params[1]]]) 16 | M = np.float32([[1, 0, params], [0, 1, params]]) 17 | dst = cv2.warpAffine(img, M, (cols, rows)) 18 | return dst 19 | 20 | def image_scale(img, params): 21 | 22 | # res = cv2.resize(img, None, fx=params[0], fy=params[1], interpolation=cv2.INTER_CUBIC) 23 | rows, cols, ch = img.shape 24 | res = cv2.resize(img, None, fx=params, fy=params, interpolation=cv2.INTER_CUBIC) 25 | res = res.reshape((res.shape[0], res.shape[1], ch)) 26 | y, x, z = res.shape 27 | if params > 1: # need to crop 28 | startx = x // 2 - cols // 2 29 | starty = y // 2 - rows // 2 30 | return res[starty:starty + rows, startx:startx + cols] 31 | elif params < 1: # need to pad 32 | sty = int((rows - y) / 2) 33 | stx = int((cols - x) / 2) 34 | return np.pad(res, [(sty, rows - y - sty), (stx, cols - x - stx), (0, 0)], mode='constant', 35 | constant_values=0) 36 | return res 37 | 38 | def image_shear(img, params): 39 | rows, cols, ch = img.shape 40 | # rows, cols = img.shape 41 | factor = params * (-1.0) 42 | M = np.float32([[1, factor, 0], [0, 1, 0]]) 43 | dst = cv2.warpAffine(img, M, (cols, rows)) 44 | return dst 45 | 46 | def image_rotation(img, params): 47 | rows, cols, ch = img.shape 48 | # rows, cols = img.shape 49 | M = cv2.getRotationMatrix2D((cols / 2, rows / 2), params, 1) 50 | dst = cv2.warpAffine(img, M, (cols, rows), flags=cv2.INTER_AREA) 51 | return dst 52 | 53 | def image_contrast(img, params): 54 | alpha = params 55 | new_img = cv2.multiply(img, np.array([alpha])) # mul_img = img*alpha 56 | # new_img = cv2.add(mul_img, beta) # new_img = img*alpha + beta 57 | 58 | return new_img 59 | 60 | def image_brightness(img, params): 61 | beta = params 62 | new_img = cv2.add(img, beta) # new_img = img*alpha + beta 63 | return new_img 64 | 65 | def image_blur(img, params): 66 | 67 | # print("blur") 68 | blur = [] 69 | if params == 1: 70 | blur = cv2.blur(img, (3, 3)) 71 | if params == 2: 72 | blur = cv2.blur(img, (4, 4)) 73 | if params == 3: 74 | blur = cv2.blur(img, (5, 5)) 75 | if params == 4: 76 | blur = cv2.GaussianBlur(img, (3, 3), 0) 77 | if params == 5: 78 | blur = cv2.GaussianBlur(img, (5, 5), 0) 79 | if params == 6: 80 | blur = cv2.GaussianBlur(img, (7, 7), 0) 81 | if params == 7: 82 | blur = cv2.medianBlur(img, 3) 83 | if params == 8: 84 | blur = cv2.medianBlur(img, 5) 85 | # if params == 9: 86 | # blur = cv2.blur(img, (6, 6)) 87 | if params == 9: 88 | blur = cv2.bilateralFilter(img, 6, 50, 50) 89 | # blur = cv2.bilateralFilter(img, 9, 75, 75) 90 | return blur 91 | 92 | def image_pixel_change(img, params): 93 | # random change 1 - 5 pixels from 0 -255 94 | img_shape = img.shape 95 | img1d = np.ravel(img) 96 | arr = np.random.randint(0, len(img1d), params) 97 | for i in arr: 98 | img1d[i] = np.random.randint(0, 256) 99 | new_img = img1d.reshape(img_shape) 100 | return new_img 101 | 102 | def image_noise(img, params): 103 | if params == 1: # Gaussian-distributed additive noise. 104 | row, col, ch = img.shape 105 | mean = 0 106 | var = 0.1 107 | sigma = var ** 0.5 108 | gauss = np.random.normal(mean, sigma, (row, col, ch)) 109 | gauss = gauss.reshape(row, col, ch) 110 | noisy = img + gauss 111 | return noisy.astype(np.uint8) 112 | elif params == 2: # Replaces random pixels with 0 or 1. 113 | s_vs_p = 0.5 114 | amount = 0.004 115 | out = np.copy(img) 116 | # Salt mode 117 | num_salt = np.ceil(amount * img.size * s_vs_p) 118 | coords = [np.random.randint(0, i, int(num_salt)) 119 | for i in img.shape] 120 | out[tuple(coords)] = 1 121 | 122 | # Pepper mode 123 | num_pepper = np.ceil(amount * img.size * (1. - s_vs_p)) 124 | coords = [np.random.randint(0, i, int(num_pepper)) 125 | for i in img.shape] 126 | out[tuple(coords)] = 0 127 | return out 128 | elif params == 3: # Multiplicative noise using out = image + n*image,where n is uniform noise with specified mean & variance. 129 | row, col, ch = img.shape 130 | gauss = np.random.randn(row, col, ch) 131 | gauss = gauss.reshape(row, col, ch) 132 | noisy = img + img * gauss 133 | return noisy.astype(np.uint8) 134 | 135 | ''' 136 | TODO: Add more mutators, current version is from DeepTest, https://arxiv.org/pdf/1708.08559.pdf 137 | 138 | Also check, https://arxiv.org/pdf/1712.01785.pdf, and DeepExplore 139 | 140 | ''' 141 | 142 | # TODO: Random L 0 143 | 144 | # TODO: Random L infinity 145 | 146 | # more transformations refer to: http://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_geometric_transformations/py_geometric_transformations.html#geometric-transformations 147 | # http://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_table_of_contents_imgproc/py_table_of_contents_imgproc.html 148 | 149 | transformations = [image_translation, image_scale, image_shear, image_rotation, 150 | image_contrast, image_brightness, image_blur, image_pixel_change, image_noise] 151 | 152 | # these parameters need to be carefullly considered in the experiment 153 | # to consider the feedbacks 154 | params = [] 155 | params.append(list(range(-3, 3))) # image_translation 156 | params.append(list(map(lambda x: x * 0.1, list(range(8, 11))))) # image_scale 157 | params.append(list(map(lambda x: x * 0.1, list(range(-5, 5))))) # image_shear 158 | params.append(list(range(-30, 30))) # image_rotation 159 | params.append(list(map(lambda x: x * 0.1, list(range(6, 12))))) # image_contrast 160 | params.append(list(range(-20, 20))) # image_brightness 161 | params.append(list(range(1, 10))) # image_blur 162 | params.append(list(range(1, 10))) # image_pixel_change 163 | params.append(list(range(1, 4))) # image_noise 164 | 165 | classA = [7, 8] 166 | classB = [0, 1, 2, 3, 4, 5, 6] 167 | 168 | # classB = [5, 6] 169 | # classB = [] 170 | @staticmethod 171 | def mutate_one(ori_img, img, cl, try_num=50): 172 | x, y, z = img.shape 173 | 174 | a = 0.02 175 | b = 0.30 176 | l0 = int(a * x * y * z) 177 | l_infinity = int(b * 255) 178 | ori_shape = ori_img.shape 179 | for ii in range(try_num): 180 | random.seed(time.time()) 181 | if cl == 0: # 0 can choose class A and B 182 | tid = random.sample(Mutators.classA + Mutators.classB, 1)[0] 183 | transformation = Mutators.transformations[tid] 184 | 185 | params = Mutators.params[tid] 186 | param = random.sample(params, 1)[0] 187 | img_new = transformation(copy.deepcopy(img), param) 188 | img_new = img_new.reshape(ori_shape) 189 | 190 | if tid in Mutators.classA: 191 | sub = ori_img - img_new 192 | if np.sum(sub != 0) < l0 or np.max(abs(sub)) < l_infinity: 193 | return ori_img, img_new, 0, 1 194 | else: # B, C 195 | # print(transformation) 196 | ori_img = transformation(copy.deepcopy(ori_img), param) # original image need to be updated 197 | # print('Original changed with %s',transformation) 198 | ori_img = ori_img.reshape(ori_shape) 199 | return ori_img, img_new, 1, 1 200 | if cl == 1: 201 | tid = random.sample(Mutators.classA, 1)[0] 202 | transformation = Mutators.transformations[tid] 203 | params = Mutators.params[tid] 204 | param = random.sample(params, 1)[0] 205 | img_new = transformation(copy.deepcopy(img), param) 206 | sub = ori_img - img_new 207 | if np.sum(sub != 0) < l0 or np.max(abs(sub)) < l_infinity: 208 | return ori_img, img_new, 1, 1 209 | return ori_img, img, cl, 0 210 | 211 | @staticmethod 212 | def image_random_mutate(seed, batch_num): 213 | ''' 214 | This is the interface to perform random mutation on input image, random select 215 | an mutator and perform a random mutation with a random parameter predefined. 216 | 217 | :param img: input image cl: class 218 | :param params: 219 | :return: 220 | ''' 221 | 222 | # randomly sample 223 | # tid = random.sample([0, 1, 2, 3, 4, 5, 6], 1)[0] 224 | # l0 = 300 225 | # l_infinity = 150 226 | 227 | test = np.load(seed.fname) 228 | test = np.expand_dims(test, axis=-1) 229 | ori_img = test[0] 230 | img = test[1] 231 | cl = seed.clss 232 | ori_batches = [] 233 | batches = [] 234 | cl_batches = [] 235 | for i in range(batch_num): 236 | ori_out, img_out, cl_out, changed = Mutators.mutate_one(ori_img, img, cl) 237 | if changed: 238 | ori_batches.append(ori_out) 239 | batches.append(img_out) 240 | cl_batches.append(cl_out) 241 | # ori_batches = np.squeeze(ori_batches) 242 | # batches = np.squeeze(batches) 243 | if len(ori_batches) > 0: 244 | ori_batches = np.squeeze(np.asarray(ori_batches), axis=-1) 245 | batches = np.squeeze(np.asarray(batches), axis=-1) 246 | return (ori_batches, batches, cl_batches) 247 | 248 | @staticmethod 249 | def mutate_two(seed, batch_num): 250 | ''' 251 | This is the interface to perform random mutation on input image, random select 252 | an mutator and perform a random mutation with a random parameter predefined. 253 | 254 | :param img: input image cl: class 255 | :param params: 256 | :return: 257 | ''' 258 | 259 | # randomly sample 260 | # tid = random.sample([0, 1, 2, 3, 4, 5, 6], 1)[0] 261 | # l0 = 300 262 | # l_infinity = 150 263 | 264 | test = np.load(seed.fname) 265 | ori_img = test[0] 266 | img = test[1] 267 | cl = seed.clss 268 | ori_batches = [] 269 | batches = [] 270 | cl_batches = [] 271 | for i in range(batch_num): 272 | ori_out, img_out, cl_out, changed = Mutators.mutate_one(ori_img, img, cl) 273 | # ori_out, img_out, cl_out, changed = Mutators.mutate_one(ori_out, img_out, cl_out) 274 | if changed: 275 | ori_batches.append(ori_out) 276 | batches.append(img_out) 277 | cl_batches.append(cl_out) 278 | 279 | return (np.asarray(ori_batches), np.asarray(batches), cl_batches) 280 | --------------------------------------------------------------------------------