├── test
    └── rnn_model
    │   └── model.h5
├── requirement.txt
├── AbstractRNNClassifier.py
├── LICENSE.txt
├── evaluation_scripts
    └── fuzzing
    │   ├── check_unique_crash.py
    │   └── coverage_analyzer.py
├── .gitignore
├── Abstraction
    ├── Coder.py
    ├── GraphWrapper.py
    ├── StateAbstraction.py
    └── DTMCGraph.py
├── fuzzer
    ├── fuzzone.py
    ├── construct_initial_seeds.py
    ├── image_queue.py
    ├── lib
    │   ├── fuzzer.py
    │   └── queue.py
    ├── image_fuzzer.py
    └── mutators.py
├── abstraction_runner.py
├── coverage.py
├── README.md
└── mnist_demo
    └── mnist_lstm.py


/test/rnn_model/model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaoningdu/deepstellar/HEAD/test/rnn_model/model.h5


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
 1 | keras==2.3
 2 | tensorflow==1.14
 3 | sklearn
 4 | pandas
 5 | matplotlib==3.1
 6 | joblib
 7 | pillow
 8 | opencv-python
 9 | xxhash
10 | graphviz


--------------------------------------------------------------------------------
/AbstractRNNClassifier.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class AbstractRNNClassifier:
 3 | 
 4 |     def load_hidden_state_model(self, model_path):
 5 |         pass
 6 | 
 7 |     def input_preprocess(self, data):
 8 |         return data
 9 | 
10 |     def profile_train_data(self, profile_save_path):
11 |         pass
12 | 
13 |     def get_state_profile(self, inputs):
14 |         pass
15 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2020 Xiaoning Du
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.


--------------------------------------------------------------------------------
/evaluation_scripts/fuzzing/check_unique_crash.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import argparse
 4 | import xxhash
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 |     parser = argparse.ArgumentParser(description='control experiment')
 9 |     parser.add_argument('-i', help='crash dir path')
10 |     args = parser.parse_args()
11 | 
12 |     dirs = os.listdir(args.i)
13 |     hash_set = set()
14 |     for i in dirs:
15 |         crash_seed = os.path.join(args.i, i)
16 |         seed = np.load(crash_seed)
17 | 
18 |         h = xxhash.xxh64()
19 |         h.update(seed)
20 |         q = h.intdigest()
21 |         if q not in hash_set:
22 |             hash_set.add(q)
23 | 
24 |     print(len(hash_set))
25 |     print('finish')
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | bin/
10 | build/
11 | develop-eggs/
12 | dist/
13 | eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | 
23 | # Installer logs
24 | pip-log.txt
25 | pip-delete-this-directory.txt
26 | 
27 | # Unit test / coverage reports
28 | .tox/
29 | .coverage
30 | .cache
31 | nosetests.xml
32 | coverage.xml
33 | 
34 | # Translations
35 | *.mo
36 | 
37 | # Mr Developer
38 | .mr.developer.cfg
39 | .project
40 | .pydevproject
41 | 
42 | # Rope
43 | .ropeproject
44 | 
45 | # Django stuff:
46 | *.log
47 | *.pot
48 | 
49 | # Sphinx documentation
50 | docs/_build/


--------------------------------------------------------------------------------
/Abstraction/Coder.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class Coder(object):
 4 | 
 5 |     def __init__(self, bits, dim):
 6 |         """
 7 |         :param bits: each dimension is encoded with how many bits
 8 |         :param dim: how many dimensions of the vectors
 9 |         """
10 |         self.bits = bits
11 |         self.dim = dim
12 |         assert self.bits * self.dim <= 64
13 | 
14 |     def encode(self, vec):
15 |         assert len(vec) == self.dim
16 |         d = 0
17 |         for i in range(self.dim):
18 |             di = vec[i] << (self.bits * i)
19 |             d = d | di
20 |         return d
21 | 
22 |     def decode(self, d):
23 |         mask = 2 ** self.bits - 1
24 |         vec = []
25 |         for i in range(self.dim):
26 |             di = d & mask
27 |             d = d >> self.bits
28 |             vec.append(di)
29 |         return vec
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     coder = Coder(8, 3)
34 |     en = coder.encode([1, 2, 3])
35 |     print(format(en, '02x'))
36 |     de = coder.decode(en)
37 |     print(de)
38 | 


--------------------------------------------------------------------------------
/fuzzer/fuzzone.py:
--------------------------------------------------------------------------------
 1 | from keras import backend as K
 2 | import numpy as np
 3 | 
 4 | 
 5 | def predict(self, input_data):
 6 |     inp = self.model.input
 7 |     functor = K.function([inp] + [K.learning_phase()], self.outputs)
 8 |     outputs = functor([input_data, 0])
 9 |     return outputs
10 | 
11 | 
12 | def fetch_function(handler, input_batches, preprocess):
13 |     _, img_batches, _ = input_batches
14 |     if len(img_batches) == 0:
15 |         return None, None
16 |     preprocessed = preprocess(img_batches)
17 |     outputs = handler.predict(preprocessed)
18 |     return outputs[1], np.expand_dims(np.argmax(outputs[0], axis=1), axis=0)
19 | 
20 | 
21 | def build_fetch_function(handler, preprocess):
22 |     def func(input_batches):
23 |         return fetch_function(
24 |             handler,
25 |             input_batches,
26 |             preprocess
27 |         )
28 | 
29 |     return func
30 | 
31 | 
32 | def adptive_coverage_function(handler, cov_num):
33 |     def func(layerouts):
34 |         """The fetch function."""
35 |         ptr = np.zeros(cov_num, dtype=np.uint8)
36 |         return handler.update_coverage(layerouts, ptr)
37 | 
38 |     return func
39 | 


--------------------------------------------------------------------------------
/fuzzer/construct_initial_seeds.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import random
 4 | from keras.datasets import mnist
 5 | 
 6 | from keras.models import load_model
 7 | import numpy as np
 8 | 
 9 | 
10 | def mnist_preprocessing(x_test):
11 |     temp = np.copy(x_test)
12 |     temp = temp.reshape(temp.shape[0], 28, 28)
13 |     temp = temp.astype('float32')
14 |     temp /= 255
15 |     return temp
16 | 
17 | 
18 | def createBatch(x_batch, batch_size, output_path, prefix):
19 |     if not os.path.exists(output_path):
20 |         os.makedirs(output_path)
21 |     batch_num = len(x_batch) / batch_size
22 |     batches = np.split(x_batch, batch_num, axis=0)
23 |     for i, batch in enumerate(batches):
24 |         test = batch
25 |         saved_name = prefix + str(i) + '.npy'
26 |         np.save(os.path.join(output_path, saved_name), test)
27 | 
28 | 
29 | if __name__ == '__main__':
30 | 
31 |     parser = argparse.ArgumentParser(description='control experiment')
32 | 
33 |     parser.add_argument('-dl_model', help='path to model')
34 |     parser.add_argument('-output_path', help='Out path')
35 |     parser.add_argument('-batch_size', type=int, help='Number of images in one batch', default=1)
36 |     parser.add_argument('-batch_num', type=int, help='Number of batches', default=100)
37 |     args = parser.parse_args()
38 |     if not os.path.exists(args.output_path):
39 |         os.makedirs(args.output_path)
40 |     (x_train, y_train), (x_test, y_test) = mnist.load_data()
41 |     batch = mnist_preprocessing(x_test)
42 |     model = load_model(args.dl_model)
43 |     x_test = x_test.reshape(x_test.shape[0], 28, 28)
44 | 
45 |     num_in_each_class = (args.batch_size * args.batch_num) / 10
46 | 
47 |     result = np.argmax(model.predict(batch), axis=1)  # [0],axis=1
48 | 
49 |     new_label = np.reshape(y_test, result.shape)
50 | 
51 |     idx_good = np.where(new_label == result)[0]
52 | 
53 |     for cl in range(10):
54 |         cl_indexes = [i for i in idx_good if new_label[i] == cl]
55 |         selected = random.sample(cl_indexes, int(num_in_each_class))
56 |         createBatch(x_test[selected], args.batch_size, args.output_path, str(cl) + '_')
57 |     print('finish')
58 | 


--------------------------------------------------------------------------------
/abstraction_runner.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import pickle
 4 | from mnist_demo.mnist_lstm import MnistLSTMClassifier
 5 | from Abstraction.StateAbstraction import StateAbstraction
 6 | from Abstraction.GraphWrapper import GraphWrapper
 7 | 
 8 | 
 9 | if __name__ == '__main__':
10 |     parse = argparse.ArgumentParser("Generate abstract model")
11 |     parse.add_argument('-dl_model', help='path of dl model', required=True)
12 |     # parse.add_argument('-profile_data', help="path of data to do the profiling")
13 |     parse.add_argument('-profile_save_path', help="dir to save profiling raw data", required=True)
14 |     parse.add_argument('-comp_num', help="number of component when fitting pca", type=int, required=True)  # can select a larger number
15 |     parse.add_argument('-k', help='number of dimension to keep', type=int, required=True)
16 |     parse.add_argument('-m', help="number of intervals on each dimension", type=int, required=True)
17 |     parse.add_argument('-bits', help="number of bits for encoding", type=int, required=True)
18 |     parse.add_argument('-name_prefix', help="name prefix when save the abstract model", required=True)
19 |     parse.add_argument('-abst_save_path', help="path to save abstract model", required=True)
20 |     parse.add_argument('-n_step', help="extend the graph to n_step", type=int, default=0)
21 | 
22 |     args = parse.parse_args()
23 | 
24 |     lstm_classifier = MnistLSTMClassifier()
25 |     lstm_classifier.load_hidden_state_model(args.dl_model)
26 | 
27 |     if not os.path.exists(args.profile_save_path):
28 |         lstm_classifier.profile_train_data(args.profile_save_path)
29 |         print("profiling done...")
30 |     else:
31 |         print("profiling is already done...")
32 | 
33 |     par_k = [args.m]*args.k
34 |     stateAbst = StateAbstraction(args.profile_save_path, args.comp_num, args.bits, [args.m]*args.k, args.n_step)
35 |     wrapper = GraphWrapper(stateAbst)
36 |     wrapper.build_model()
37 | 
38 |     save_file = 'wrapper_%s_%s_%s.pkl' % (args.name_prefix, len(par_k), par_k[0])
39 |     save_file = os.path.join(args.abst_save_path, save_file)
40 |     os.makedirs(args.abst_save_path, exist_ok=True)
41 |     with open(save_file, 'wb') as f:
42 |         pickle.dump(wrapper, f)
43 | 
44 |     print('finish')
45 | 
46 | 


--------------------------------------------------------------------------------
/evaluation_scripts/fuzzing/coverage_analyzer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | from coverage import Coverage
 4 | import numpy as np
 5 | from mnist_demo.mnist_lstm import MnistLSTMClassifier
 6 | 
 7 | 
 8 | def read_inputs_from_folder(folder, type="queue"):
 9 |     files = os.listdir(folder)
10 |     tests = []
11 |     for file in files:
12 |         data = np.load(os.path.join(folder, file))
13 |         if type == "crash":
14 |             x_test = np.expand_dims(data, 0)
15 |         elif type == "queue":
16 |             x_test = data[1:2]
17 |         else:
18 |             x_test = data
19 |         tests.extend(x_test)
20 | 
21 |     return np.asarray(tests)
22 | 
23 | 
24 | def fuzzing_analyzer(classifier, folder, dtmc_wrapper_f, type):
25 |     if type == "queue":
26 |         inputs = read_inputs_from_folder(folder, type="queue")
27 |     else:  # type == "seeds"
28 |         inputs = read_inputs_from_folder(folder, type="seed")
29 | 
30 |     states = classifier.get_state_profile(inputs)
31 |     coverage_handlers = []
32 | 
33 |     for criteria, k_step in [("state", 0), ("transition", 0)]:  # , ("k-step", 3), ("k-step", 6)
34 |         cov = Coverage(dtmc_wrapper_f, criteria, k_step)
35 |         coverage_handlers.append(cov)
36 | 
37 |     for coverage_handler in coverage_handlers:
38 |         cov = coverage_handler.get_coverage_criteria(states)
39 |         total = coverage_handler.get_total()
40 |         print(len(cov) / total)
41 |         if coverage_handler.mode != "k-step":  # to printout the weighted coverage metrics
42 |             weight_dic = coverage_handler.get_weight_dic()
43 |             print(sum([weight_dic[e] for e in cov]))
44 |             rev_weight_dic = coverage_handler.get_weight_dic(reverse=True)
45 |             print(sum([rev_weight_dic[e] for e in cov]))
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     parser = argparse.ArgumentParser(description='analyzing the fuzzing results')
50 |     parser.add_argument('-dl_model', help='path to the dl model', required=True)
51 |     parser.add_argument('-wrapper', help='path to the abstract graph wrapper', required=True)
52 |     parser.add_argument('-inputs_folder', help='path to the inputs folder', required=True)
53 |     parser.add_argument('-type', choices=['seeds', 'queue'], default='queue')
54 |     args = parser.parse_args()
55 | 
56 |     classifier = MnistLSTMClassifier()
57 |     classifier.load_hidden_state_model(args.dl_model)
58 |     fuzzing_analyzer(classifier, args.inputs_folder, args.wrapper, args.type)
59 | 
60 | 


--------------------------------------------------------------------------------
/coverage.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle
 3 | from Abstraction.GraphWrapper import GraphWrapper
 4 | 
 5 | 
 6 | class Coverage(object):
 7 | 
 8 |     def __init__(self, pkl_dir, mode, k_step):
 9 |         self.par_wrap = load_graph_pkl(pkl_dir)
10 |         if mode == 'state':
11 |             self.total_size = self.par_wrap.graph.get_major_states_num()
12 |             print('There are %s major states in total.' % self.total_size)
13 |         elif mode == 'k-step':
14 |             if k_step > self.par_wrap.stateAbst.n_step:
15 |                 print('this step is larger than the steps kept, please rebuild the model.')
16 |                 exit(0)
17 |             self.par_wrap.graph.init_k_step_idx(k_step)
18 |             self.total_size = self.par_wrap.graph.get_k_step_states_num()
19 |             print('There are %s k-step states in total with k = %s.' % (self.total_size, k_step))
20 |         elif mode == 'transition':
21 |             self.total_size = self.par_wrap.graph.get_transition_num()
22 |             print('There are %s transitions in total.' % self.total_size)
23 |         else:
24 |             self.total_size = 0
25 |         self.mode = mode
26 | 
27 |     def update_coverage(self, outputs):
28 |         seed_num = len(outputs)
29 |         ptrs = np.tile(np.zeros(self.total_size, dtype=np.uint8), (seed_num, 1))
30 | 
31 |         for i in range(len(ptrs)):
32 |             self.par_wrap.visit_graph(outputs[i], ptrs[i], self.mode)
33 | 
34 |         return ptrs
35 | 
36 |     def get_coverage(self, outputs):
37 |         result = []
38 | 
39 |         for i in range(len(outputs)):
40 |             tmp = []
41 |             self.par_wrap.visit_graph(outputs[i], tmp, self.mode, return_set=True)
42 |             result.append(tmp)
43 | 
44 |         return result
45 | 
46 |     def get_coverage_criteria(self, outputs):
47 |         result = set()
48 | 
49 |         for i in range(len(outputs)):
50 |             tmp = []
51 |             self.par_wrap.visit_graph(outputs[i], tmp, self.mode, return_set=True)
52 |             result = result.union(set(tmp))
53 | 
54 |         return result
55 | 
56 |     def get_total(self):
57 |         return self.total_size
58 | 
59 |     def get_weight_dic(self, reverse=False):
60 |         if reverse:
61 |             return self.par_wrap.graph.get_index_weight_dic(type=self.mode, reverse=True)
62 |         return self.par_wrap.graph.get_index_weight_dic(type=self.mode)
63 | 
64 | 
65 | def load_graph_pkl(pkl_dir):
66 |     with open(pkl_dir, 'rb') as f:
67 |         g = pickle.load(f)
68 |     return g
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeepStellar: Model-Based Quantitative Analysis of Stateful Deep Learning Systems
 2 | 
 3 | ## Prepare Environment
 4 | python=3.6
 5 | 
 6 | pip install -r requirement.txt
 7 | 
 8 | 
 9 | ## To prepare an RNN model
10 | 
11 | ```shell script
12 | python -m mnist_demo.mnist_lstm
13 | ```
14 | 
15 | 
16 | ## Generate the DTMC abstract model
17 | 
18 | ```shell script
19 | python abstraction_runner.py -dl_model test/rnn_model/model.h5  -profile_save_path test/output/profile_save -comp_num 128 -k 3 -m 10 -bits 8 -name_prefix lstm_mnist -abst_save_path test/output/abst_model
20 | 
21 | ```
22 | 
23 | 
24 | ## Coverage Guided Testing
25 | 
26 | ### Construct initial seeds
27 | ```shell script
28 | python -m fuzzer.construct_initial_seeds -dl_model test/rnn_model/model.h5  -output_path ../fuzz_data/initialseeds
29 | ```
30 | 
31 | ### Launch the testing process
32 | ```shell script
33 | python -m fuzzer.image_fuzzer -i ../fuzz_data/initialseeds -o ../fuzz_data/fuzzing-out-1/lstm-trans-3-10 -model_type mnist -dl_model test/rnn_model/model.h5 -criteria state -pkl_path test/output/abst_model/wrapper_lstm_mnist_3_10.pkl
34 | ```
35 | 
36 | 
37 | ## Evaluation of the testing
38 | 
39 | ### Check the coverage metrics of the fuzzing output queue:
40 | 
41 | ```shell script
42 | python -m evaluation_scripts.fuzzing.coverage_analyzer -dl_model test/rnn_model/model.h5 -wrapper test/output/abst_model/wrapper_lstm_mnist_3_10.pkl -inputs_folder ../fuzz_data/fuzzing-out-1/lstm-trans-3-10/queue -type queue
43 | ```
44 | 
45 | ### Check the coverage metrics of the initial seeds:
46 | 
47 | ```shell script
48 | python -m evaluation_scripts.fuzzing.coverage_analyzer -dl_model test/rnn_model/model.h5 -wrapper test/output/abst_model/wrapper_lstm_mnist_3_10.pkl -inputs_folder ../fuzz_data/initialseeds -type seeds
49 | ```
50 | 
51 | ### Check the number of unique crashes
52 | 
53 | ```shell script
54 | python -m evaluation_scripts.fuzzing.check_unique_crash -i ../fuzz_data/fuzzing-out-1/lstm-trans-3-10/crashes
55 | ```
56 | 
57 | ### If you would like to use Deepsteller in your research, please cite our FSE'19 paper:
58 | 
59 | ```shell script
60 | @inproceedings{10.1145/3338906.3338954,
61 | author = {Du, Xiaoning and Xie, Xiaofei and Li, Yi and Ma, Lei and Liu, Yang and Zhao, Jianjun},
62 | title = {DeepStellar: Model-Based Quantitative Analysis of Stateful Deep Learning Systems},
63 | year = {2019},
64 | booktitle = {Proceedings of the 2019 27th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering},
65 | pages = {477–487},
66 | series = {ESEC/FSE 2019}
67 | }
68 | ```
69 | 
70 |   
71 | 
72 | 


--------------------------------------------------------------------------------
/fuzzer/image_queue.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | from fuzzer.lib.queue import FuzzQueue
 4 | from fuzzer.lib.queue import Seed
 5 | 
 6 | 
 7 | class ImageInputCorpus(FuzzQueue):
 8 |     """Class that holds inputs and associated coverage."""
 9 | 
10 |     def __init__(self, outdir, israndom, sample_function, cov_num, criteria):
11 |         """Init the class.
12 | 
13 |         Args:
14 |           seed_corpus: a list of numpy arrays, one for each input tensor in the
15 |             fuzzing process.
16 |           sample_function: a function that looks at the whole current corpus and
17 |             samples the next element to mutate in the fuzzing loop.
18 |         Returns:
19 |           Initialized object.
20 |         """
21 |         FuzzQueue.__init__(self, outdir, israndom, sample_function, cov_num, criteria)
22 | 
23 |         self.loopup = {}
24 |         self.loopup[0] = 0
25 |         self.loopup[1] = 1
26 |         self.loopup.update(self.loopup.fromkeys(range(2, 51), 2))
27 |         self.loopup.update(self.loopup.fromkeys(range(51, 151), 4))
28 |         self.loopup.update(self.loopup.fromkeys(range(151, 256), 128))
29 | 
30 |     def save_if_interesting(self, seed, data, crash, dry_run=False, suffix=None):
31 |         """Adds item to corpus if it exercises new coverage."""
32 | 
33 |         def class_loop_up(x):
34 |             return self.loopup[x]
35 | 
36 |         self.mutations_processed += 1
37 |         current_time = time.time()
38 |         if dry_run:
39 |             coverage = self.compute_cov()
40 |             self.dry_run_cov = coverage
41 |         if current_time - self.log_time > 2:
42 |             self.log_time = current_time
43 |             self.log()
44 |         describe_op = "src:%06d" % (seed.parent.id) if suffix is None else "src:%s" % (suffix)
45 | 
46 |         if crash:
47 |             fn = "%s/crashes/id:%06d,%s.npy" % (self.out_dir, self.uniq_crashes, describe_op)
48 |             self.uniq_crashes += 1
49 |             self.last_crash_time = current_time
50 |         else:
51 |             fn = "%s/queue/id:%06d,%s.npy" % (self.out_dir, self.total_queue, describe_op)
52 |             if self.has_new_bits(seed) or dry_run:
53 |                 self.last_reg_time = current_time
54 |                 if self.sample_type != 'random2' or dry_run:
55 |                     seed.queue_time = current_time
56 |                     seed.id = self.total_queue
57 |                     seed.fname = fn
58 |                     seed.probability = self.REG_INIT_PROB
59 |                     self.queue.append(seed)
60 |                     del seed.coverage
61 |                 else:
62 |                     del seed
63 |                 self.total_queue += 1
64 |             else:
65 |                 del seed
66 |                 return False
67 |         np.save(fn, data)
68 |         return True
69 | 


--------------------------------------------------------------------------------
/fuzzer/lib/fuzzer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import gc
 3 | 
 4 | 
 5 | class Fuzzer(object):
 6 |     """Class representing the fuzzer itself."""
 7 | 
 8 |     def __init__(
 9 |             self,
10 |             corpus,
11 |             coverage_function,
12 |             metadata_function,
13 |             objective_function,
14 |             mutation_function,
15 |             fetch_function,
16 |             iterate_function,
17 |             plot=True
18 |     ):
19 |         """Init the class.
20 | 
21 |     Args:
22 |       corpus: An InputCorpus object.
23 |       coverage_function: a function that does CorpusElement -> Coverage.
24 |       metadata_function: a function that does CorpusElement -> Metadata.
25 |       objective_function: a function that checks if a CorpusElement satisifies
26 |         the fuzzing objective (e.g. find a NaN, find a misclassification, etc).
27 |       mutation_function: a function that does CorpusElement -> Metadata.
28 |       fetch_function: grabs numpy arrays from the TF runtime using the relevant
29 |         tensors.
30 |     Returns:
31 |       Initialized object.
32 |     """
33 |         self.plot = plot
34 |         self.queue = corpus
35 |         self.coverage_function = coverage_function
36 |         self.metadata_function = metadata_function
37 |         self.objective_function = objective_function
38 |         self.mutation_function = mutation_function
39 |         self.fetch_function = fetch_function
40 |         self.iterate_function = iterate_function
41 | 
42 |     def loop(self, iterations):
43 |         """Fuzzes a machine learning model in a loop, making *iterations* steps."""
44 |         iteration = 0
45 |         while True:
46 | 
47 |             if len(self.queue.queue) < 1 or iteration >= iterations:
48 |                 break
49 |             if iteration % 100 == 0:
50 |                 tf.logging.info("fuzzing iteration: %s", iteration)
51 |                 gc.collect()
52 | 
53 |             parent = self.queue.select_next()
54 |             # Get a mutated batch for each input tensor
55 |             mutated_data_batches = self.mutation_function(parent)
56 |             # Grab the coverage and metadata for mutated batch from the TF runtime.
57 |             coverage_batches, metadata_batches = self.fetch_function(
58 |                 mutated_data_batches
59 |             )
60 |             if self.plot:
61 |                 self.queue.plot_log(iteration)
62 | 
63 |             if coverage_batches is not None and len(coverage_batches) > 0:
64 |                 # Get the coverage - one from each batch element
65 |                 mutated_coverage_list = self.coverage_function(coverage_batches)
66 | 
67 |                 # Get the metadata objects - one from each batch element
68 |                 mutated_metadata_list = self.metadata_function(metadata_batches)
69 | 
70 |                 # Check for new coverage and create new corpus elements if necessary.
71 |                 # pylint: disable=consider-using-enumerate
72 | 
73 |                 bug_found, cov_inc = self.iterate_function(self.queue, parent.root_seed, parent, mutated_coverage_list,
74 |                                                            mutated_data_batches, mutated_metadata_list,
75 |                                                            self.objective_function)
76 |                 del mutated_coverage_list
77 |                 del mutated_metadata_list
78 |             else:
79 |                 bug_found = False
80 |                 cov_inc = False
81 | 
82 |             self.queue.fuzzer_handler(iteration, parent, bug_found, cov_inc)
83 |             iteration += 1
84 | 
85 |             del mutated_data_batches
86 |             del coverage_batches
87 |             del metadata_batches
88 | 
89 |         self.queue.write_logs()
90 |         return None
91 | 


--------------------------------------------------------------------------------
/mnist_demo/mnist_lstm.py:
--------------------------------------------------------------------------------
  1 | import keras
  2 | import os
  3 | from AbstractRNNClassifier import AbstractRNNClassifier
  4 | from keras.datasets import mnist
  5 | from keras.models import Sequential
  6 | from keras.layers import Input, Lambda, LSTM, Dense
  7 | from keras.models import load_model
  8 | from keras.models import Model
  9 | import numpy as np
 10 | 
 11 | 
 12 | class MnistLSTMClassifier(AbstractRNNClassifier):
 13 |     def __init__(self):
 14 |         # Classifier
 15 |         self.time_steps = 28  # timesteps to unroll
 16 |         self.n_units = 128  # hidden LSTM units
 17 |         self.n_inputs = 28  # rows of 28 pixels (an mnist img is 28x28)
 18 |         self.n_classes = 10  # mnist classes/labels (0-9)
 19 |         self.batch_size = 128  # Size of each batch
 20 |         self.n_epochs = 20
 21 | 
 22 |     def create_model(self):
 23 |         self.model = Sequential()
 24 |         self.model.add(LSTM(self.n_units, input_shape=(self.time_steps, self.n_inputs)))
 25 |         self.model.add(Dense(self.n_classes, activation='softmax'))
 26 | 
 27 |         self.model.compile(loss='categorical_crossentropy',
 28 |                            optimizer='rmsprop',
 29 |                            metrics=['accuracy'])
 30 |         # self.model.summary()
 31 | 
 32 |     def load_hidden_state_model(self, model_path):
 33 |         """
 34 |         return the rnn model with return_sequence enabled.
 35 |         """
 36 |         input = Input(shape=(self.time_steps, self.n_inputs))
 37 |         lstm = LSTM(self.n_units, input_shape=(self.time_steps, self.n_inputs), return_sequences=True)(input)
 38 |         last_timestep = Lambda(lambda x: x[:, -1, :])(lstm)
 39 |         dense = Dense(10, activation='softmax')(last_timestep)
 40 |         model = Model(inputs=input, outputs=[dense, lstm])
 41 |         model.load_weights(model_path)
 42 |         self.model = model
 43 | 
 44 |     def train(self, save_path):
 45 |         self.create_model()
 46 |         (x_train, y_train), (x_test, y_test) = mnist.load_data()
 47 | 
 48 |         x_train = self.input_preprocess(x_train)
 49 |         x_test = self.input_preprocess(x_test)
 50 | 
 51 |         y_test = keras.utils.to_categorical(y_test, num_classes=10)
 52 |         y_train = keras.utils.to_categorical(y_train, num_classes=10)
 53 | 
 54 |         self.model.fit(x_train, y_train, validation_data=(x_test, y_test),
 55 |                        batch_size=self.batch_size, epochs=self.n_epochs, shuffle=False)
 56 | 
 57 |         os.makedirs(save_path, exist_ok=True)
 58 |         self.model.save(os.path.join(save_path, "model.h5"))
 59 | 
 60 |     def evaluate(self, model=None):
 61 |         (x_train, y_train), (x_test, y_test) = mnist.load_data()
 62 | 
 63 |         x_test = self.input_preprocess(x_test)
 64 |         y_test = keras.utils.to_categorical(y_test, num_classes=10)
 65 | 
 66 |         model = load_model(model) if model else self.model
 67 |         test_loss = model.evaluate(x_test, y_test)
 68 |         print(test_loss)
 69 | 
 70 |     def input_preprocess(self, data):
 71 |         data = data.reshape(data.shape[0], self.n_inputs, self.n_inputs)
 72 |         data = data.astype('float32')
 73 |         data /= 255
 74 |         return data
 75 | 
 76 |     def profile_train_data(self, profile_save_path):
 77 |         (x_train, y_train), (x_test, y_test) = mnist.load_data()
 78 |         x_train = self.input_preprocess(x_train)
 79 |         output = self.model.predict(x_train)
 80 |         cls = np.argmax(output[0], axis=1)
 81 |         correct_idx = np.where(cls == y_train)[0]
 82 |         os.makedirs(profile_save_path, exist_ok=True)
 83 |         states_correct = output[1][correct_idx]
 84 |         np.save(os.path.join(profile_save_path, "states_profile.npy"), states_correct)
 85 | 
 86 |     def get_state_profile(self, inputs):
 87 |         inputs = self.input_preprocess(inputs)
 88 |         output = self.model.predict(inputs)
 89 |         return output[1]
 90 | 
 91 | 
 92 | if __name__ == "__main__":
 93 |     save_path = "test/rnn_model"
 94 | 
 95 |     lstm_classifier = MnistLSTMClassifier()
 96 |     # train an rnn model
 97 |     lstm_classifier.create_model()
 98 |     lstm_classifier.train(save_path)
 99 |     lstm_classifier.evaluate()
100 | 
101 |     # Load a trained model with return_sequence enabled.
102 |     # profile_path = "test/output/profile_save"
103 |     # lstm_classifier.load_hidden_state_model(os.path.join(save_path, "model.h5"))
104 |     # lstm_classifier.profile_train_data(profile_path)
105 | 


--------------------------------------------------------------------------------
/Abstraction/GraphWrapper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from Abstraction.DTMCGraph import DTMCGraph
  3 | import json
  4 | 
  5 | 
  6 | class GraphWrapper:
  7 |     def __init__(self, stateAbst, fake_initial=-1):
  8 |         self.graph = DTMCGraph(fake_initial)
  9 |         self.stateAbst = stateAbst
 10 | 
 11 |     def build_model(self, label_dir=None):
 12 |         """
 13 |         Build model for a specific configuration
 14 |         :label_dir: file of the label profiling, currently not used.
 15 |         """
 16 |         pca_fit = self.stateAbst.get_pca_trans_data()
 17 | 
 18 |         if label_dir:
 19 |             with open(label_dir) as f:
 20 |                 translation_all = json.load(f)
 21 |         else:
 22 |             translation_all = None
 23 | 
 24 |         if translation_all:  # if with labels
 25 |             for i in range(len(pca_fit)):
 26 |                 seq = pca_fit[i]
 27 |                 trans = translation_all[i]
 28 |                 assert len(seq) == len(trans)
 29 |                 self.build_step(seq, trans)
 30 |         else:  # if without labels
 31 |             for i in range(len(pca_fit)):
 32 |                 seq = pca_fit[i]
 33 |                 self.build_step(seq, None)
 34 |                 # break
 35 |         # del pca_fit
 36 |         # del translation_all
 37 |         # self.graph.draw_graph("0", "DTMC")
 38 |         # g_warp.graph.transitions = None
 39 |         self.extend_to_k_step()  # extend the graph to the steps
 40 |         self.graph.init_k_step_idx(self.stateAbst.n_step)
 41 |         # g_warp.visit_graph('', [0]*500, 'k-step')
 42 |         # g_warp.visit_graph(pca_fit[0], [0]*2000, 'transition')
 43 |         # os.makedirs(save2folder, exist_ok=True)
 44 | 
 45 |     def build_step(self, seq, labels=None):
 46 |         """
 47 |         Add a sequence of state vectors to the graph, the vectors are usually transformed by PCA model
 48 |         :param seq: the sequence of state vectors
 49 |         :param labels: labels for the transitions, currently not used
 50 |         """
 51 |         transition_seq_name = self.stateAbst.data_transform(seq)  # make abstraction without PCA transformation
 52 |         if labels is None:
 53 |             labels = ['-']*len(seq)
 54 |         self.graph.add_ordered_transitions(transition_seq_name, labels)
 55 |         del transition_seq_name
 56 | 
 57 |     def extend_to_k_step(self):
 58 |         """
 59 |         Extend the graph to k step states
 60 |         """
 61 |         if self.stateAbst.n_step <= 0:
 62 |             return
 63 |         moves = enumerate_manhattan(self.stateAbst.dimension, self.stateAbst.n_step)
 64 |         step_out_dic = {}
 65 |         for state_name, _ in self.graph.states.items():
 66 |             if state_name != -1:
 67 |                 decoded_vec = self.stateAbst.coder.decode(state_name)
 68 |                 for move in moves:
 69 |                     step_out = list(np.array(decoded_vec)+np.array(move))
 70 |                     step_out = self.stateAbst.coder.encode(step_out)
 71 |                     step = abs_sum(move)
 72 |                     if step_out in step_out_dic:
 73 |                         if step_out_dic[step_out] > step:
 74 |                             step_out_dic[step_out] = step
 75 |                     else:
 76 |                         step_out_dic[step_out] = step
 77 |         step_out_seq = []
 78 |         step_seq = []
 79 |         for step_out, step in step_out_dic.items():
 80 |             step_out_seq.append(step_out)
 81 |             step_seq.append(step)
 82 | 
 83 |         self.graph.add_other_states(step_out_seq, step_seq)
 84 | 
 85 |     def visit_graph(self, state_seq, cnt_states, mode, return_set=False):
 86 |         """
 87 |         Update the coverage for a specific sequence
 88 |         :param state_seq: the state vector sequence
 89 |         :param cnt_states: current coverage
 90 |         :param mode: which coverage criteria
 91 |         :param return_set: whether to return the set of covered state/transition id
 92 |         :return: the cnt_states will be updated
 93 |         """
 94 |         transition_seq_name = self.stateAbst.data_transform(state_seq, pca_transform=True)
 95 |         if mode == 'state':
 96 |             self.graph.to_cover_major_states(transition_seq_name, cnt_states, return_set=return_set)
 97 |         elif mode == 'k-step':
 98 |             self.graph.to_cover_k_step(transition_seq_name, cnt_states, return_set=return_set)
 99 |         elif mode == 'transition':
100 |             self.graph.to_cover_transitions(transition_seq_name, cnt_states, return_set=return_set)
101 | 
102 | 
103 | def enumerate_manhattan(dim, k):
104 |     """
105 |     :param dim: dimension of the space
106 |     :param k: max step-out
107 |     :return: the set of all possible moves with in k steps
108 |     """
109 |     vec = [0] * dim
110 |     covered_list = []
111 |     queue = [vec]
112 |     while queue:
113 |         cur_vec = queue.pop(0)
114 |         if cur_vec not in covered_list:
115 |             covered_list.append(cur_vec)
116 |             for i in range(len(cur_vec)):
117 |                 tmp = cur_vec.copy()
118 |                 tmp[i] += 1
119 |                 if abs_sum(tmp) <= k:
120 |                     queue.append(tmp)
121 |                 tmp = cur_vec.copy()
122 |                 tmp[i] -= 1
123 |                 if abs_sum(tmp) <= k:
124 |                     queue.append(tmp)
125 |     covered_list.remove(vec)
126 |     return covered_list
127 | 
128 | 
129 | def abs_sum(vec):
130 |     return sum([abs(i) for i in vec])
131 | 


--------------------------------------------------------------------------------
/Abstraction/StateAbstraction.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.decomposition import PCA
  3 | import time
  4 | import joblib
  5 | import _pickle as pickle
  6 | import os
  7 | from Abstraction.Coder import Coder
  8 | 
  9 | 
 10 | class StateAbstraction:
 11 |     def __init__(self, state_profile_folder, comp_num, bits, par_k, n_step):
 12 |         self.state_profile_folder = state_profile_folder
 13 |         self.comp_num = comp_num
 14 |         self.profile_file_list = get_all_file(state_profile_folder)
 15 |         self.cache_dir = os.path.join(state_profile_folder, "cache")
 16 |         self.pca_trans_dir = os.path.join(state_profile_folder, "pca_trans")
 17 |         self.pca_model_f = os.path.join(self.cache_dir, 'pca_model_cmp_%s.joblib' % self.comp_num)
 18 |         self.diag_matrix_f = os.path.join(self.cache_dir, 'diag_matrix.npy')
 19 |         self.min_array_f = os.path.join(self.cache_dir, 'min_array.npy')
 20 | 
 21 |         if not os.path.exists(self.cache_dir):
 22 |             os.makedirs(self.cache_dir, exist_ok=True)
 23 |             os.makedirs(self.pca_trans_dir, exist_ok=True)
 24 |             self.pca_fit()
 25 |             self.pca_trans()
 26 |             self.get_quantization_matrix()
 27 | 
 28 |         self.pca_model = joblib.load(self.pca_model_f)
 29 |         self.diag_matrix = np.load(self.diag_matrix_f)
 30 |         self.min_array = np.load(self.min_array_f)
 31 | 
 32 |         self.bits = bits
 33 |         self.par_k = par_k
 34 |         self.dimension = len(par_k)
 35 |         self.n_step = n_step
 36 |         self.min_array = self.min_array[range(self.dimension)]  # tailor to the dimension
 37 |         self.diag_matrix = self.diag_matrix[:, range(self.dimension)][range(self.dimension), :]  # tailor to the dimension
 38 |         self.diag_matrix = self.diag_matrix.dot(np.diag(par_k))  # prepare par_k/range
 39 |         self.coder = Coder(bits, self.dimension)  # init a Coder for the encoding and decoding
 40 | 
 41 |     def pca_fit(self):
 42 |         """
 43 |         Read data from the data_repo and calculate the first comp_num principal components.
 44 |         For choose to sample the data before fitting PCA model
 45 |         """
 46 | 
 47 |         # read from data repo
 48 |         all_sample_data = []
 49 |         for f in self.profile_file_list:
 50 |             sample_chunk = np.load(os.path.join(self.state_profile_folder, f))
 51 |             all_sample_data.extend(sample_chunk)
 52 | 
 53 |         # fitting PCA model and save the model to the 'cache' folder under the data_repo
 54 |         start = time.time()
 55 |         pca = PCA(n_components=self.comp_num, copy=False)
 56 |         pca.fit(np.array([e for l in all_sample_data for e in l]))
 57 |         joblib.dump(pca, self.pca_model_f)
 58 |         print('pca fitting used %s ...' % (time.time() - start))
 59 | 
 60 |     def pca_trans(self):
 61 |         """
 62 |         Transform all the data with the PCA model and save the transformed data to pca_trans folder inside the repo folder
 63 |         """
 64 |         pca = joblib.load(self.pca_model_f)
 65 |         for f in self.profile_file_list:
 66 |             sample_chunk = np.load(os.path.join(self.state_profile_folder, f))
 67 |             sample_chunk_pca = []
 68 |             for sample in sample_chunk:
 69 |                 sample_pca = pca.transform(np.array(sample))
 70 |                 sample_chunk_pca.append(sample_pca)
 71 |             np.save(os.path.join(self.pca_trans_dir, f), sample_chunk_pca)
 72 |         print('pca_trans finished.')
 73 | 
 74 |     def get_quantization_matrix(self):
 75 |         """
 76 |         Read the PCA-transformed data, and calculate the auxiliary matrix for quantization
 77 |         """
 78 |         fit_data = self.get_pca_trans_data()
 79 |         fit_data = np.array([s for seq in fit_data for s in seq])
 80 |         print('fit data shape:')
 81 |         print(fit_data.shape)
 82 | 
 83 |         diag_array = []  # holding the reciprocal of each dimension on the diagonal
 84 |         min_array = []  # holding the minimum value of each dimension
 85 |         for i in range(fit_data.shape[1]):
 86 |             proj_i = [e[i] for e in fit_data]
 87 |             diag_array.append(1 / (max(proj_i) - min(proj_i)))
 88 |             min_array.append(min(proj_i))
 89 |             # print('%s--%s' % (min(proj_i), max(proj_i)))
 90 |         diag_matrix = np.diag(diag_array)
 91 | 
 92 |         np.save(self.diag_matrix_f, diag_matrix)
 93 |         np.save(self.min_array_f, min_array)
 94 | 
 95 |     def data_transform(self, seq, pca_transform=False):
 96 |         """
 97 |         return the sequence of abstracted state name
 98 |         """
 99 |         if pca_transform:
100 |             seq = self.pca_model.transform(np.array(seq))
101 |         seq = seq[:, range(self.dimension)]  # take the dimension
102 |         my_min = np.repeat(self.min_array, len(seq))
103 |         my_min = my_min.reshape(self.dimension, len(seq)).transpose()
104 |         seq = seq - my_min  # each vector minus the lower bound
105 |         pca_fit_partition = np.floor(seq.dot(self.diag_matrix)).astype(int)  # (vec-min)/(range/par_k) and take the floor value
106 |         pca_fit_partition = pca_fit_partition + self.n_step  # to avoid negative encoding
107 |         transition_seq_name = [self.coder.encode(a) for a in pca_fit_partition]  # encode the abstracted vectors
108 |         # print(transition_seq_name)
109 |         # transition_seq_name = [self.fake_initial] + transition_seq_name  # fake initial as starting state
110 |         del my_min
111 |         del seq
112 |         del pca_fit_partition
113 |         return transition_seq_name
114 | 
115 |     def pca_transform(self, seq):
116 |         return self.pca_model.transform(np.array(seq[0]))
117 | 
118 |     def get_pca_trans_data(self):
119 |         pca_fit = []
120 |         data_fs = get_all_file(self.pca_trans_dir)
121 |         for f in data_fs:
122 |             chunk = np.load(os.path.join(self.pca_trans_dir, f))
123 |             pca_fit.extend(chunk)
124 |             # break
125 |         return pca_fit
126 | 
127 | 
128 | def get_all_file(target_dir):
129 |     """
130 |     A util function to return all files under a dir
131 |     :param target_dir: the target folder
132 |     :return: the set of files with name
133 |     """
134 |     onlyfiles = [f for f in os.listdir(target_dir) if os.path.isfile(os.path.join(target_dir, f))]
135 |     return onlyfiles
136 | 
137 | 
138 | def load_graph_pkl(pkl_dir):
139 |     with open(pkl_dir, 'rb') as f:
140 |         g = pickle.load(f)
141 |     return g
142 | 
143 | 
144 | 
145 | 
146 | 


--------------------------------------------------------------------------------
/fuzzer/image_fuzzer.py:
--------------------------------------------------------------------------------
  1 | import argparse, pickle
  2 | import shutil
  3 | 
  4 | import tensorflow as tf
  5 | import os
  6 | from coverage import Coverage
  7 | from keras.applications.vgg16 import preprocess_input
  8 | import random
  9 | import time
 10 | import numpy as np
 11 | from fuzzer.image_queue import ImageInputCorpus
 12 | from fuzzer.fuzzone import build_fetch_function
 13 | 
 14 | from fuzzer.lib.fuzzer import Fuzzer
 15 | from fuzzer.mutators import Mutators
 16 | from fuzzer.image_queue import Seed
 17 | from mnist_demo.mnist_lstm import MnistLSTMClassifier
 18 | 
 19 | 
 20 | def imagenet_preprocessing(input_img_data):
 21 |     temp = np.copy(input_img_data)
 22 |     temp = np.float32(temp)
 23 |     qq = preprocess_input(temp)  # final input shape = (1,224,224,3)
 24 |     return qq
 25 | 
 26 | 
 27 | def mnist_preprocessing(x_test):
 28 |     temp = np.copy(x_test)
 29 |     temp = temp.reshape(temp.shape[0], 28, 28)
 30 |     temp = temp.astype('float32')
 31 |     temp /= 255
 32 |     return temp
 33 | 
 34 | 
 35 | def cifar_preprocessing(x_test):
 36 |     temp = np.copy(x_test)
 37 |     temp = temp.astype('float32')
 38 |     mean = [125.307, 122.95, 113.865]
 39 |     std = [62.9932, 62.0887, 66.7048]
 40 |     for i in range(3):
 41 |         temp[:, :, :, i] = (temp[:, :, :, i] - mean[i]) / std[i]
 42 |     return temp
 43 | 
 44 | 
 45 | preprocess_dic = {
 46 |     'cifar10': cifar_preprocessing,
 47 |     'mnist': mnist_preprocessing,
 48 |     'imagenet': imagenet_preprocessing
 49 | }
 50 | 
 51 | shape_dic = {
 52 |     'cifar10': (32, 32, 3),
 53 |     'mnist': (28, 28),
 54 |     'imagenet': (224, 224, 3)
 55 | }
 56 | 
 57 | execlude_layer_dic = {
 58 |     'vgg16': ['input', 'flatten', 'activation', 'batch', 'dropout'],
 59 |     'resnet20': ['input', 'flatten', 'activation', 'batch', 'dropout'],
 60 |     'lenet1': ['input', 'flatten', 'activation', 'batch', 'dropout'],
 61 |     'lenet4': ['input', 'flatten', 'activation', 'batch', 'dropout'],
 62 |     'lenet5': ['input', 'flatten', 'activation', 'batch', 'dropout'],
 63 |     'mobilenet': ['input', 'flatten', 'padding', 'activation', 'batch', 'dropout',
 64 |                   'bn', 'reshape', 'relu', 'pool', 'concat', 'softmax', 'fc'],
 65 |     'vgg19': ['input', 'flatten', 'padding', 'activation', 'batch', 'dropout', 'bn',
 66 |               'reshape', 'relu', 'pool', 'concat', 'softmax', 'fc'],
 67 |     'resnet50': ['input', 'flatten', 'padding', 'activation', 'batch', 'dropout', 'bn',
 68 |                  'reshape', 'relu', 'pool', 'concat', 'add', 'res4', 'res5']
 69 | }
 70 | 
 71 | 
 72 | def metadata_function(meta_batches):
 73 |     return meta_batches
 74 | 
 75 | 
 76 | def image_mutation_function(batch_num, deeptest=False):
 77 |     def func(seed):
 78 |         if deeptest:
 79 |             return Mutators.image_random_mutate(seed, batch_num)
 80 |         else:
 81 |             return Mutators.image_random_mutate(seed, batch_num)
 82 | 
 83 |     return func
 84 | 
 85 | 
 86 | def objective_function(seed):
 87 |     """Checks if the metadata is inf or NaN."""
 88 |     metadata = seed.metadata
 89 |     ground_truth = seed.ground_truth
 90 |     return metadata[0] != ground_truth
 91 | 
 92 | 
 93 | def iterate_function():
 94 |     def func(queue, root_seed, parent, mutated_coverage_list, mutated_data_batches, mutated_metadata_list,
 95 |              objective_function):
 96 |         ori_batches, batches, cl_batches = mutated_data_batches
 97 |         successed = False
 98 |         bug_found = False
 99 |         for idx in range(len(mutated_coverage_list)):
100 |             # 1000 for placeholder
101 |             input = Seed(cl_batches[idx], 1000, mutated_coverage_list[idx], root_seed, parent,
102 |                          mutated_metadata_list[:, idx],
103 |                          parent.ground_truth)
104 |             is_adv = objective_function(input)
105 |             if is_adv:
106 |                 suf = 'g_' + str(input.ground_truth) + 'c_' + str(input.metadata[0]) + '-' + root_seed
107 |                 queue.save_if_interesting(input, batches[idx], True, suffix=suf)
108 |             else:
109 |                 new_img = np.append(ori_batches[idx:idx + 1], batches[idx:idx + 1], axis=0)
110 |                 successed = queue.save_if_interesting(input, new_img, False) or successed
111 |         return bug_found, successed
112 | 
113 |     return func
114 | 
115 | 
116 | def dry_run(indir, fetch_function, coverage_function, queue):
117 |     seed_lis = os.listdir(indir)
118 |     if len(seed_lis) == 0:
119 |         print('Empty dir')
120 |         exit(0)
121 |     for seed_name in seed_lis:
122 |         tf.logging.info("Attempting dry run with '%s'...", seed_name)
123 |         path = os.path.join(indir, seed_name)
124 |         img = np.load(path)
125 |         # input_batches = img
126 |         coverage_batches, metadata_batches = fetch_function((0, img, 0))
127 |         coverage_list = coverage_function(coverage_batches)
128 |         metadata_list = metadata_function(metadata_batches)
129 |         input = Seed(0, 1000, coverage_list[0], seed_name, None, metadata_list[0][0], metadata_list[0][0])
130 |         new_img = np.append(img, img, axis=0)
131 |         queue.save_if_interesting(input, new_img, False, True, seed_name)
132 | 
133 | 
134 | if __name__ == '__main__':
135 | 
136 |     start_time = time.time()
137 |     # Log more
138 |     tf.logging.set_verbosity(tf.logging.INFO)
139 |     random.seed(time.time())
140 | 
141 |     parser = argparse.ArgumentParser(description='coverage guided fuzzing')
142 | 
143 |     parser.add_argument('-i', help='input seed dir')
144 |     parser.add_argument('-o', help='seed output')
145 | 
146 |     parser.add_argument('-model_type', help="target model fuzz", choices=['mnist', 'cifar10', 'imagenet'])
147 |     parser.add_argument('-dl_model', help="path to the dl model", required=True)
148 |     parser.add_argument('-criteria', help="set the criteria to guide",
149 |                         choices=['state', 'k-step', 'transition'], default='state')
150 |     parser.add_argument('-k_step', help="how many outer step to check", type=int, default=0)
151 |     parser.add_argument('-batch_num', help="set mutation batch number", type=int, default=20)
152 |     parser.add_argument('-iterations', help="total regression tests tried", type=int, default=10000000)
153 |     parser.add_argument('-cri_parameter', help="set the parameter of criteria", type=float)
154 |     parser.add_argument('-quantize', help="fuzzer for quantize", default=0, type=int)
155 |     parser.add_argument('-quantize_models', help="fuzzer for quantize")
156 |     parser.add_argument('-random', help="set mutation batch number", type=int, default=0)
157 |     parser.add_argument('-select', help="select next",
158 |                         choices=['random2', 'random', 'tensorfuzz', 'deeptest', 'deeptest2', 'prob'], default='prob')
159 |     parser.add_argument('-pkl_path', help='pkl path')
160 | 
161 |     args = parser.parse_args()
162 | 
163 |     if os.path.exists(args.o):
164 |         shutil.rmtree(args.o)
165 |     os.makedirs(os.path.join(args.o, 'queue'))
166 |     os.makedirs(os.path.join(args.o, 'crashes'))
167 | 
168 |     lstm_classifier = MnistLSTMClassifier()
169 |     lstm_classifier.load_hidden_state_model(args.dl_model)
170 |     model = lstm_classifier.model
171 |     preprocess = preprocess_dic[args.model_type]
172 | 
173 |     coverage_handler = Coverage(args.pkl_path, args.criteria, args.k_step)
174 | 
175 |     plot_file = open(os.path.join(args.o, 'plot.log'), 'a+')
176 | 
177 |     fetch_function_1 = build_fetch_function(model, preprocess)
178 | 
179 |     dry_run_fetch = build_fetch_function(model, preprocess)
180 | 
181 |     coverage_function = coverage_handler.update_coverage
182 | 
183 |     mutation_function = image_mutation_function(args.batch_num)
184 | 
185 |     queue = ImageInputCorpus(args.o, args.random, args.select, coverage_handler.total_size, args.criteria)
186 | 
187 |     dry_run(args.i, dry_run_fetch, coverage_function, queue)
188 | 
189 |     image_iterate_function = iterate_function()
190 | 
191 |     fuzzer = Fuzzer(queue, coverage_function, metadata_function, objective_function, mutation_function,
192 |                     fetch_function_1, image_iterate_function, args.select)
193 | 
194 |     fuzzer.loop(args.iterations)
195 |     # queue.log()
196 | 
197 |     print('finish', time.time() - start_time)
198 | 


--------------------------------------------------------------------------------
/fuzzer/lib/queue.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import numpy as np
  3 | from random import randint
  4 | import tensorflow as tf
  5 | import datetime
  6 | import random
  7 | import os
  8 | 
  9 | 
 10 | class Seed(object):
 11 |     """Class representing a single element of a corpus."""
 12 | 
 13 |     def __init__(self, cl, space, coverage, root_seed, parent, metadata, ground_truth):
 14 |         """Inits the object.
 15 | 
 16 |         Args:
 17 |           data: a list of numpy arrays representing the mutated data.
 18 |           metadata: arbitrary python object to be used by the fuzzer for e.g.
 19 |             computing the objective function during the fuzzing loop.
 20 |           coverage: an arbitrary hashable python object that guides fuzzing process.
 21 |           parent: a reference to the CorpusElement this element is a mutation of.
 22 |           iteration: the fuzzing iteration (number of CorpusElements sampled to
 23 |             mutate) that this CorpusElement was created at.
 24 |         Returns:
 25 |           Initialized object.
 26 |         """
 27 | 
 28 |         self.clss = cl
 29 |         self.metadata = metadata
 30 |         self.parent = parent
 31 |         self.root_seed = root_seed
 32 |         self.coverage = coverage
 33 |         self.queue_time = None
 34 |         self.id = None
 35 |         self.probability = 0.8
 36 |         self.fuzzed_time = 0
 37 | 
 38 |         self.ground_truth = ground_truth
 39 |         self.space = space
 40 | 
 41 | 
 42 | class FuzzQueue(object):
 43 |     """Class that holds inputs and associated coverage."""
 44 | 
 45 |     def __init__(self, outdir, is_random, sample_type, cov_num, criteria):
 46 |         """Init the class.
 47 | 
 48 |         Args:
 49 |           seed_corpus: a list of numpy arrays, one for each input tensor in the
 50 |             fuzzing process.
 51 |           sample_function: a function that looks at the whole current corpus and
 52 |             samples the next element to mutate in the fuzzing loop.
 53 |         Returns:
 54 |           Initialized object.
 55 |         """
 56 | 
 57 |         # care about the close
 58 |         self.plot_file = open(os.path.join(outdir, 'plot.log'), 'a+')
 59 |         self.out_dir = outdir
 60 |         self.mutations_processed = 0
 61 |         self.queue = []
 62 |         self.sample_type = sample_type
 63 |         self.start_time = time.time()
 64 | 
 65 |         self.random = is_random
 66 |         self.criteria = criteria
 67 | 
 68 |         self.log_time = time.time()
 69 |         self.virgin_bits = np.full(cov_num, 0xFF, dtype=np.uint8)
 70 |         self.adv_bits = np.full(cov_num, 0xFF, dtype=np.uint8)
 71 |         self.uniq_crashes = 0
 72 |         self.total_cov = cov_num
 73 |         self.last_crash_time = self.start_time
 74 |         self.last_reg_time = self.start_time
 75 | 
 76 |         self.total_queue = 0
 77 | 
 78 |         self.dry_run_cov = None
 79 |         self.current_id = 0
 80 |         self.seed_attacked = set()
 81 |         self.seed_attacked_first_time = dict()
 82 | 
 83 |         self.REG_GAMMA = 5
 84 |         self.REG_MIN = 0.3
 85 |         self.REG_INIT_PROB = 0.8
 86 | 
 87 |     def has_new_bits(self, seed):
 88 | 
 89 |         temp = np.invert(seed.coverage, dtype=np.uint8)
 90 |         cur = np.bitwise_and(self.virgin_bits, temp)
 91 |         has_new = not np.array_equal(cur, self.virgin_bits)
 92 |         if has_new:
 93 |             self.virgin_bits = cur
 94 |         return has_new or self.random
 95 | 
 96 |     def plot_log(self, id):
 97 | 
 98 |         queue_len = len(self.queue)
 99 |         coverage = self.compute_cov()
100 |         current_time = time.time()
101 |         self.plot_file.write(
102 |             "%d,%d,%d,%s,%s,%d,%d,%s,%s\n" %
103 |             (time.time(),
104 |              id,
105 |              queue_len,
106 |              self.dry_run_cov,
107 |              coverage,
108 |              self.uniq_crashes,
109 |              len(self.seed_attacked),
110 |              self.mutations_processed,
111 |              round(float(self.mutations_processed) / (current_time - self.start_time), 2)
112 |              ))
113 |         self.plot_file.flush()
114 | 
115 |     def write_logs(self):
116 |         log_file = open(os.path.join(self.out_dir, 'fuzz.log'), 'w+')
117 |         for k in self.seed_attacked_first_time:
118 |             log_file.write("%s:%s\n" % (k, self.seed_attacked_first_time[k]))
119 |         log_file.close()
120 |         self.plot_file.close()
121 | 
122 |     def log(self):
123 |         queue_len = len(self.queue)
124 |         coverage = self.compute_cov()
125 |         current_time = time.time()
126 |         tf.logging.info(
127 |             "criteria %s | corpus_size %s | crashes_size %s | mutations_per_second: %s | total_exces %s | last new reg: %s | last new adv %s | coverage: %s -> %s%%",
128 |             self.criteria,
129 |             queue_len,
130 |             self.uniq_crashes,
131 |             round(float(self.mutations_processed) / (current_time - self.start_time), 2),
132 |             self.mutations_processed,
133 |             datetime.timedelta(seconds=(time.time() - self.last_reg_time)),
134 |             datetime.timedelta(seconds=(time.time() - self.last_crash_time)),
135 |             self.dry_run_cov,
136 |             coverage
137 |         )
138 | 
139 |     def compute_cov(self):
140 | 
141 |         coverage = round(float(self.total_cov - np.count_nonzero(self.virgin_bits == 0xFF)) * 100 / self.total_cov, 2)
142 |         return str(coverage)
143 | 
144 |     def tensorfuzz(self):
145 |         """Grabs new input from corpus according to sample_function."""
146 |         # choice = self.sample_function(self)
147 |         corpus = self.queue
148 |         reservoir = corpus[-5:] + [random.choice(corpus)]
149 |         choice = random.choice(reservoir)
150 |         return choice
151 |         # return random.choice(self.queue)
152 | 
153 |     def select_next(self):
154 |         if self.sample_type == 'random' or self.sample_type == 'random2' or self.sample_type == 'ran_save':  # ran_save is to random and save all mutants
155 |             return self.random_select()
156 |         elif self.sample_type == 'tensorfuzz':
157 |             return self.tensorfuzz()
158 |         elif self.sample_type == 'deeptest':
159 |             return self.deeptest_next()
160 |         elif self.sample_type == 'deeptest2':
161 |             return self.deeptest_next2()
162 |         elif self.sample_type == 'prob':
163 |             return self.prob_next()
164 | 
165 |     def random_select(self):
166 |         """Grabs new input from corpus according to sample_function."""
167 |         # choice = self.sample_function(self)
168 | 
169 |         return random.choice(self.queue)
170 | 
171 |     def deeptest_next(self):
172 |         choice = self.queue[-1]
173 |         return choice
174 | 
175 |     def fuzzer_handler(self, iteration, cur_seed, bug_found, coverage_inc):
176 |         if self.sample_type == 'deeptest' and not coverage_inc:
177 |             self.queue.pop()
178 |         elif self.sample_type == 'prob' and not bug_found and not coverage_inc:
179 |             if cur_seed.probability > self.REG_MIN and cur_seed.fuzzed_time < self.REG_GAMMA * (1 - self.REG_MIN):
180 |                 cur_seed.probability = self.REG_INIT_PROB - float(cur_seed.fuzzed_time) / self.REG_GAMMA
181 | 
182 |         if bug_found:
183 |             self.seed_attacked.add(cur_seed.root_seed)
184 |             if not (cur_seed.parent in self.seed_attacked_first_time):
185 |                 self.seed_attacked_first_time[cur_seed.root_seed] = iteration
186 | 
187 |     def deeptest_next2(self):
188 |         if self.current_id == len(self.queue):
189 |             self.current_id = 0
190 |         choice = self.queue[self.current_id]
191 |         self.current_id += 1
192 |         return choice
193 | 
194 |     def prob_next(self):
195 |         """Grabs new input from corpus according to sample_function."""
196 |         # choice = self.sample_function(self)
197 |         while True:
198 |             if self.current_id == len(self.queue):
199 |                 self.current_id = 0
200 | 
201 |             cur_seed = self.queue[self.current_id]
202 |             if cur_seed.space > 0 and randint(0, 100) < cur_seed.probability * 100:
203 |                 # if cur_seed.probability > REG_MIN  and cur_seed.fuzzed_time < REG_GAMMA * (1-REG_MIN):
204 |                 #     cur_seed.probability = REG_INIT_PROB - float(cur_seed.fuzzed_time)/REG_GAMMA
205 | 
206 |                 cur_seed.fuzzed_time += 1
207 |                 self.current_id += 1
208 |                 return cur_seed
209 |             else:
210 |                 self.current_id += 1
211 | 


--------------------------------------------------------------------------------
/Abstraction/DTMCGraph.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import math
  3 | from graphviz import Digraph
  4 | import os
  5 | 
  6 | 
  7 | class State(object):
  8 |     def __init__(self, name, id):
  9 |         self.name = name  # name is the encoded vector
 10 |         self.freq = 0
 11 |         self.id = id
 12 |         self.section = 0  # to which section the state belongs
 13 | 
 14 |     def add_freq(self):
 15 |         self.freq += 1
 16 | 
 17 | 
 18 | class Transition(object):
 19 |     def __init__(self, src, dest, label, id):
 20 |         self.src = src
 21 |         self.dest = dest
 22 |         self.label = label  # for input/output label, currently not used
 23 |         self.freq = 0
 24 |         self.prob = 0.0
 25 |         self.id = id
 26 | 
 27 |     def add_freq(self):
 28 |         self.freq += 1
 29 | 
 30 | 
 31 | class DTMCGraph(object):
 32 |     def __init__(self, fake_ini):
 33 |         self.states = OrderedDict()
 34 |         self.other_states = OrderedDict()
 35 |         self.transitions = {}
 36 |         self.fake_ini = fake_ini
 37 |         self.states[fake_ini] = State(fake_ini, 0)
 38 |         self.next_transition_id = 0
 39 |         self.next_state_id = 1
 40 |         self.k_step_idx = {}  # keep a state_name:idx mapping for k-step coverage
 41 | 
 42 |     def _add_state(self, state_name):
 43 |         """
 44 |         add a state to the graph
 45 |         :param state_name: the encoded int64
 46 |         """
 47 |         if state_name not in self.states:
 48 |             state = State(state_name, self.next_state_id)
 49 |             self.next_state_id += 1
 50 |             self.states[state_name] = state
 51 |             print('STATE ADDED: %s with id %s' % (state_name, state.id))
 52 |         else:
 53 |             print('You are trying to add a duplicate state with name %s' % state_name)
 54 | 
 55 |     def _add_other_state(self, state_name, section):
 56 |         """
 57 |         add a k-step state to the graph
 58 |         :param state_name: the encoded int64
 59 |         :param section: 1 for 1 step, 2 for step and so on
 60 |         """
 61 |         if state_name not in self.other_states:
 62 |             state = State(state_name, self.next_state_id)
 63 |             state.section = section
 64 |             self.next_state_id += 1
 65 |             self.other_states[state_name] = state
 66 |             print('OTHER STATE ADDED: %s with id %s' % (state_name, state.id))
 67 |         else:
 68 |             print('You are tring to add a duplicate state with name %s' % state_name)
 69 | 
 70 |     def _add_transition(self, src, dst):
 71 |         """
 72 |         add a transition
 73 |         :param src: name of the source state
 74 |         :param dst: name of the destination state
 75 |         """
 76 |         if src not in self.states:
 77 |             print('ERROR: src state can not be found in the graph.')
 78 |             return -1
 79 |         if dst not in self.states:
 80 |             self._add_state(dst)
 81 | 
 82 |         self.states[dst].add_freq()
 83 |         src = self.states[src].id
 84 |         dst = self.states[dst].id
 85 | 
 86 |         if src in self.transitions:
 87 |             if dst in self.transitions[src]:
 88 |                 self.transitions[src][dst].add_freq()
 89 |             else:
 90 |                 trans = Transition(src, dst, '', self.next_transition_id)
 91 |                 print('TRANSITION ADDED: with id %s' % self.next_transition_id)
 92 |                 self.next_transition_id += 1
 93 |                 trans.add_freq()
 94 |                 self.transitions[src][dst] = trans
 95 |         else:
 96 |             self.transitions[src] = {}
 97 |             trans = Transition(src, dst, '', self.next_transition_id)
 98 |             self.next_transition_id += 1
 99 |             trans.add_freq()
100 |             self.transitions[src][dst] = trans
101 | 
102 |     def add_ordered_transitions(self, trans_seq, label_seq):
103 |         """
104 |         add a set of transitions with a sequence of states
105 |         :param trans_seq: sequence of states specifying the transitions
106 |         :param output_seq: transition label, but currently not used
107 |         """
108 |         trans_seq = [self.fake_ini] + trans_seq
109 |         for i in range(len(trans_seq)-1):
110 |             src = trans_seq[i]
111 |             dest = trans_seq[i+1]
112 |             self._add_transition(src, dest)
113 | 
114 |     def add_other_states(self, state_seq, section_seq):
115 |         """
116 |         add a set of other states
117 |         :param state_seq: a list of states
118 |         :param section_seq: a list of corresponding sections
119 |         """
120 |         for i in range(len(state_seq)):
121 |             if state_seq[i] not in self.states:
122 |                 self._add_other_state(state_seq[i], section_seq[i])
123 | 
124 |     def cal_trans_prob(self):
125 |         for _, state in self.states.items():
126 |             if state.id in self.transitions:
127 |                 out_trans = self.transitions[state.id]
128 |                 total = sum([tr.freq for _, tr in out_trans.items()])
129 |                 for _, tr in out_trans.items():
130 |                     tr.prob = tr.freq/total
131 | 
132 |     def draw_graph(self, folder, type):
133 |         self.cal_trans_prob()
134 |         dot = Digraph(comment='RNN state transition graph')
135 |         for state in self.states.values():
136 |             dot.node(str(state.id), '%s' % state.id)
137 |         for src, dlist in self.transitions.items():
138 |             for dest, transition in dlist.items():
139 |                 lab = '%.2f' % transition.prob
140 |                 dot.edge(str(src), str(dest), label=lab)
141 |         dot.render(os.path.join(folder, '%s.gv' % type), view=False)
142 |         # print(dot.source)
143 | 
144 |     def to_cover_major_states(self, transition_seq_name, cnt_states, return_set=False):
145 |         """
146 |         update the cnt_states with coverage triggered by the sequence of transitions
147 |         :param transition_seq_name: a name sequence of states
148 |         :param cnt_states: a coverage vector with same length of the self.states, it is
149 |         indexed by the state.id
150 |         :param  return_set: whether to return the set of ids of covered states/transitions
151 |         :return: the cnt_states is updated
152 |         """
153 |         for i in range(len(transition_seq_name)-1):
154 |             dst = transition_seq_name[i+1]
155 |             if dst in self.states:
156 |                 idx = self.states[dst].id
157 |                 if not return_set:
158 |                     num = cnt_states[idx]
159 |                     if num < 255:
160 |                         num += 1
161 |                         cnt_states[idx] = num
162 |                 else:
163 |                     cnt_states.append(idx)
164 | 
165 |     def to_cover_k_step(self, transition_seq_name, cnt_states, return_set=False):
166 |         for i in range(len(transition_seq_name)-1):
167 |             dst = transition_seq_name[i+1]
168 |             if dst in self.k_step_idx:
169 |                 idx = self.k_step_idx[dst]
170 |                 if not return_set:
171 |                     num = cnt_states[idx]
172 |                     if num < 255:
173 |                         num += 1
174 |                         cnt_states[idx] = num
175 |                 else:
176 |                     cnt_states.append(idx)
177 | 
178 |     def init_k_step_idx(self, k):
179 |         """
180 |         initialize a mapping between k-step state name and vector index
181 |         :param k: maximum step to calculate the coverage, i.e., only consider states within k step
182 |         """
183 |         if k <= 0:
184 |             return
185 |         self.k_step_idx = {}
186 |         i = 0
187 |         for state_name, state in self.other_states.items():
188 |             if state.section <= k:
189 |                 self.k_step_idx[state_name] = i
190 |                 i += 1
191 | 
192 |     def to_cover_transitions(self, transition_seq_name, cnt_states, return_set=False):
193 |         for i in range(len(transition_seq_name)-1):
194 |             src = transition_seq_name[i]
195 |             dst = transition_seq_name[i+1]
196 |             if src not in self.states or dst not in self.states:
197 |                 continue
198 |             src = self.states[src].id
199 |             dst = self.states[dst].id
200 |             # tran = trans[i]
201 |             if src in self.transitions:
202 |                 if dst in self.transitions[src]:
203 |                     idx = self.transitions[src][dst].id
204 |                     if not return_set:
205 |                         num = cnt_states[idx]
206 |                         if num < 255:
207 |                             num += 1
208 |                             cnt_states[idx] = num
209 |                     else:
210 |                         cnt_states.append(idx)
211 | 
212 |     def get_index_weight_dic(self, type="state", reverse=False):
213 |         cri_dic = self.states
214 |         if type == "transition":
215 |             cri_dic = dict()
216 |             for src, entry in self.transitions.items():
217 |                 for dst, tran in entry.items():
218 |                     cri_dic[tran.id] = tran
219 | 
220 |         total = 0
221 |         for name, entry in cri_dic.items():
222 |             total += entry.freq
223 |         # print(self.next_transition_id)
224 |         result = dict()
225 |         for name, entry in cri_dic.items():
226 |             result[entry.id] = entry.freq/total
227 | 
228 |         if not reverse:
229 |             return result
230 |         else:
231 |             rev_dic = dict()
232 |             for k, w in result.items():
233 |                 rev_dic[k] = (1-w)/(self.next_transition_id-1)
234 |             return rev_dic
235 | 
236 |     def get_major_states_num(self):
237 |         return len(self.states)
238 | 
239 |     def get_transition_num(self):
240 |         return self.next_transition_id
241 | 
242 |     def get_k_step_states_num(self):
243 |         return len(self.k_step_idx)


--------------------------------------------------------------------------------
/fuzzer/mutators.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import cv2
  3 | import numpy as np
  4 | import random
  5 | import time
  6 | import copy
  7 | 
  8 | 
  9 | class Mutators():
 10 |     def image_translation(img, params):
 11 | 
 12 |         rows, cols, ch = img.shape
 13 |         # rows, cols = img.shape
 14 | 
 15 |         # M = np.float32([[1, 0, params[0]], [0, 1, params[1]]])
 16 |         M = np.float32([[1, 0, params], [0, 1, params]])
 17 |         dst = cv2.warpAffine(img, M, (cols, rows))
 18 |         return dst
 19 | 
 20 |     def image_scale(img, params):
 21 | 
 22 |         # res = cv2.resize(img, None, fx=params[0], fy=params[1], interpolation=cv2.INTER_CUBIC)
 23 |         rows, cols, ch = img.shape
 24 |         res = cv2.resize(img, None, fx=params, fy=params, interpolation=cv2.INTER_CUBIC)
 25 |         res = res.reshape((res.shape[0], res.shape[1], ch))
 26 |         y, x, z = res.shape
 27 |         if params > 1:  # need to crop
 28 |             startx = x // 2 - cols // 2
 29 |             starty = y // 2 - rows // 2
 30 |             return res[starty:starty + rows, startx:startx + cols]
 31 |         elif params < 1:  # need to pad
 32 |             sty = int((rows - y) / 2)
 33 |             stx = int((cols - x) / 2)
 34 |             return np.pad(res, [(sty, rows - y - sty), (stx, cols - x - stx), (0, 0)], mode='constant',
 35 |                           constant_values=0)
 36 |         return res
 37 | 
 38 |     def image_shear(img, params):
 39 |         rows, cols, ch = img.shape
 40 |         # rows, cols = img.shape
 41 |         factor = params * (-1.0)
 42 |         M = np.float32([[1, factor, 0], [0, 1, 0]])
 43 |         dst = cv2.warpAffine(img, M, (cols, rows))
 44 |         return dst
 45 | 
 46 |     def image_rotation(img, params):
 47 |         rows, cols, ch = img.shape
 48 |         # rows, cols = img.shape
 49 |         M = cv2.getRotationMatrix2D((cols / 2, rows / 2), params, 1)
 50 |         dst = cv2.warpAffine(img, M, (cols, rows), flags=cv2.INTER_AREA)
 51 |         return dst
 52 | 
 53 |     def image_contrast(img, params):
 54 |         alpha = params
 55 |         new_img = cv2.multiply(img, np.array([alpha]))  # mul_img = img*alpha
 56 |         # new_img = cv2.add(mul_img, beta)                                  # new_img = img*alpha + beta
 57 | 
 58 |         return new_img
 59 | 
 60 |     def image_brightness(img, params):
 61 |         beta = params
 62 |         new_img = cv2.add(img, beta)  # new_img = img*alpha + beta
 63 |         return new_img
 64 | 
 65 |     def image_blur(img, params):
 66 | 
 67 |         # print("blur")
 68 |         blur = []
 69 |         if params == 1:
 70 |             blur = cv2.blur(img, (3, 3))
 71 |         if params == 2:
 72 |             blur = cv2.blur(img, (4, 4))
 73 |         if params == 3:
 74 |             blur = cv2.blur(img, (5, 5))
 75 |         if params == 4:
 76 |             blur = cv2.GaussianBlur(img, (3, 3), 0)
 77 |         if params == 5:
 78 |             blur = cv2.GaussianBlur(img, (5, 5), 0)
 79 |         if params == 6:
 80 |             blur = cv2.GaussianBlur(img, (7, 7), 0)
 81 |         if params == 7:
 82 |             blur = cv2.medianBlur(img, 3)
 83 |         if params == 8:
 84 |             blur = cv2.medianBlur(img, 5)
 85 |         # if params == 9:
 86 |         #     blur = cv2.blur(img, (6, 6))
 87 |         if params == 9:
 88 |             blur = cv2.bilateralFilter(img, 6, 50, 50)
 89 |             # blur = cv2.bilateralFilter(img, 9, 75, 75)
 90 |         return blur
 91 | 
 92 |     def image_pixel_change(img, params):
 93 |         # random change 1 - 5 pixels from 0 -255
 94 |         img_shape = img.shape
 95 |         img1d = np.ravel(img)
 96 |         arr = np.random.randint(0, len(img1d), params)
 97 |         for i in arr:
 98 |             img1d[i] = np.random.randint(0, 256)
 99 |         new_img = img1d.reshape(img_shape)
100 |         return new_img
101 | 
102 |     def image_noise(img, params):
103 |         if params == 1:  # Gaussian-distributed additive noise.
104 |             row, col, ch = img.shape
105 |             mean = 0
106 |             var = 0.1
107 |             sigma = var ** 0.5
108 |             gauss = np.random.normal(mean, sigma, (row, col, ch))
109 |             gauss = gauss.reshape(row, col, ch)
110 |             noisy = img + gauss
111 |             return noisy.astype(np.uint8)
112 |         elif params == 2:  # Replaces random pixels with 0 or 1.
113 |             s_vs_p = 0.5
114 |             amount = 0.004
115 |             out = np.copy(img)
116 |             # Salt mode
117 |             num_salt = np.ceil(amount * img.size * s_vs_p)
118 |             coords = [np.random.randint(0, i, int(num_salt))
119 |                       for i in img.shape]
120 |             out[tuple(coords)] = 1
121 | 
122 |             # Pepper mode
123 |             num_pepper = np.ceil(amount * img.size * (1. - s_vs_p))
124 |             coords = [np.random.randint(0, i, int(num_pepper))
125 |                       for i in img.shape]
126 |             out[tuple(coords)] = 0
127 |             return out
128 |         elif params == 3:  # Multiplicative noise using out = image + n*image,where n is uniform noise with specified mean & variance.
129 |             row, col, ch = img.shape
130 |             gauss = np.random.randn(row, col, ch)
131 |             gauss = gauss.reshape(row, col, ch)
132 |             noisy = img + img * gauss
133 |             return noisy.astype(np.uint8)
134 | 
135 |     '''    
136 |     TODO: Add more mutators, current version is from DeepTest, https://arxiv.org/pdf/1708.08559.pdf
137 | 
138 |     Also check,   https://arxiv.org/pdf/1712.01785.pdf, and DeepExplore
139 | 
140 |     '''
141 | 
142 |     # TODO: Random L 0
143 | 
144 |     # TODO: Random L infinity
145 | 
146 |     # more transformations refer to: http://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_geometric_transformations/py_geometric_transformations.html#geometric-transformations
147 |     # http://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_table_of_contents_imgproc/py_table_of_contents_imgproc.html
148 | 
149 |     transformations = [image_translation, image_scale, image_shear, image_rotation,
150 |                        image_contrast, image_brightness, image_blur, image_pixel_change, image_noise]
151 | 
152 |     # these parameters need to be carefullly considered in the experiment
153 |     # to consider the feedbacks
154 |     params = []
155 |     params.append(list(range(-3, 3)))  # image_translation
156 |     params.append(list(map(lambda x: x * 0.1, list(range(8, 11)))))  # image_scale
157 |     params.append(list(map(lambda x: x * 0.1, list(range(-5, 5)))))  # image_shear
158 |     params.append(list(range(-30, 30)))  # image_rotation
159 |     params.append(list(map(lambda x: x * 0.1, list(range(6, 12)))))  # image_contrast
160 |     params.append(list(range(-20, 20)))  # image_brightness
161 |     params.append(list(range(1, 10)))  # image_blur
162 |     params.append(list(range(1, 10)))  # image_pixel_change
163 |     params.append(list(range(1, 4)))  # image_noise
164 | 
165 |     classA = [7, 8]
166 |     classB = [0, 1, 2, 3, 4, 5, 6]
167 | 
168 |     # classB = [5, 6]
169 |     # classB = []
170 |     @staticmethod
171 |     def mutate_one(ori_img, img, cl, try_num=50):
172 |         x, y, z = img.shape
173 | 
174 |         a = 0.02
175 |         b = 0.30
176 |         l0 = int(a * x * y * z)
177 |         l_infinity = int(b * 255)
178 |         ori_shape = ori_img.shape
179 |         for ii in range(try_num):
180 |             random.seed(time.time())
181 |             if cl == 0:  # 0 can choose class A and B
182 |                 tid = random.sample(Mutators.classA + Mutators.classB, 1)[0]
183 |                 transformation = Mutators.transformations[tid]
184 | 
185 |                 params = Mutators.params[tid]
186 |                 param = random.sample(params, 1)[0]
187 |                 img_new = transformation(copy.deepcopy(img), param)
188 |                 img_new = img_new.reshape(ori_shape)
189 | 
190 |                 if tid in Mutators.classA:
191 |                     sub = ori_img - img_new
192 |                     if np.sum(sub != 0) < l0 or np.max(abs(sub)) < l_infinity:
193 |                         return ori_img, img_new, 0, 1
194 |                 else:  # B, C
195 |                     # print(transformation)
196 |                     ori_img = transformation(copy.deepcopy(ori_img), param)  # original image need to be updated
197 |                     # print('Original changed with %s',transformation)
198 |                     ori_img = ori_img.reshape(ori_shape)
199 |                     return ori_img, img_new, 1, 1
200 |             if cl == 1:
201 |                 tid = random.sample(Mutators.classA, 1)[0]
202 |                 transformation = Mutators.transformations[tid]
203 |                 params = Mutators.params[tid]
204 |                 param = random.sample(params, 1)[0]
205 |                 img_new = transformation(copy.deepcopy(img), param)
206 |                 sub = ori_img - img_new
207 |                 if np.sum(sub != 0) < l0 or np.max(abs(sub)) < l_infinity:
208 |                     return ori_img, img_new, 1, 1
209 |         return ori_img, img, cl, 0
210 | 
211 |     @staticmethod
212 |     def image_random_mutate(seed, batch_num):
213 |         '''
214 |         This is the interface to perform random mutation on input image, random select
215 |         an mutator and perform a random mutation with a random parameter predefined.
216 | 
217 |         :param img: input image cl: class
218 |         :param params:
219 |         :return:
220 |         '''
221 | 
222 |         # randomly sample
223 |         # tid = random.sample([0, 1, 2, 3, 4, 5, 6], 1)[0]
224 |         # l0 = 300
225 |         # l_infinity = 150
226 | 
227 |         test = np.load(seed.fname)
228 |         test = np.expand_dims(test, axis=-1)
229 |         ori_img = test[0]
230 |         img = test[1]
231 |         cl = seed.clss
232 |         ori_batches = []
233 |         batches = []
234 |         cl_batches = []
235 |         for i in range(batch_num):
236 |             ori_out, img_out, cl_out, changed = Mutators.mutate_one(ori_img, img, cl)
237 |             if changed:
238 |                 ori_batches.append(ori_out)
239 |                 batches.append(img_out)
240 |                 cl_batches.append(cl_out)
241 |         # ori_batches = np.squeeze(ori_batches)
242 |         # batches = np.squeeze(batches)
243 |         if len(ori_batches) > 0:
244 |             ori_batches = np.squeeze(np.asarray(ori_batches), axis=-1)
245 |             batches = np.squeeze(np.asarray(batches), axis=-1)
246 |         return (ori_batches, batches, cl_batches)
247 | 
248 |     @staticmethod
249 |     def mutate_two(seed, batch_num):
250 |         '''
251 |         This is the interface to perform random mutation on input image, random select
252 |         an mutator and perform a random mutation with a random parameter predefined.
253 | 
254 |         :param img: input image cl: class
255 |         :param params:
256 |         :return:
257 |         '''
258 | 
259 |         # randomly sample
260 |         # tid = random.sample([0, 1, 2, 3, 4, 5, 6], 1)[0]
261 |         # l0 = 300
262 |         # l_infinity = 150
263 | 
264 |         test = np.load(seed.fname)
265 |         ori_img = test[0]
266 |         img = test[1]
267 |         cl = seed.clss
268 |         ori_batches = []
269 |         batches = []
270 |         cl_batches = []
271 |         for i in range(batch_num):
272 |             ori_out, img_out, cl_out, changed = Mutators.mutate_one(ori_img, img, cl)
273 |             # ori_out, img_out, cl_out, changed = Mutators.mutate_one(ori_out, img_out, cl_out)
274 |             if changed:
275 |                 ori_batches.append(ori_out)
276 |                 batches.append(img_out)
277 |                 cl_batches.append(cl_out)
278 | 
279 |         return (np.asarray(ori_batches), np.asarray(batches), cl_batches)
280 | 


--------------------------------------------------------------------------------