├── Explanation
    ├── Lemna.py
    ├── Lime.py
    ├── README.md
    └── perturbation_sampling.py
├── NetworkTraining
    ├── DAMD
    │   ├── DalvikOpcodes.txt
    │   ├── DalvikOpcodesDescription.json
    │   ├── Opcodes_all.zip
    │   ├── config.py
    │   ├── config_preprocessing.py
    │   ├── damd.py
    │   └── preprocessing.py
    ├── Drebin
    │   ├── DrebinDataGenerator.py
    │   ├── Drebin_DNN.py
    │   ├── config.py
    │   ├── drebin.py
    │   └── drebin_datapipeline.py
    ├── Mimicus
    │   ├── config.py
    │   ├── contagio-all.csv
    │   └── mimicus.py
    ├── README.md
    └── VulDeePecker
    │   ├── VuldeeDataGenerator.py
    │   ├── config_training.py
    │   ├── config_word_to_vec.py
    │   ├── source-CWE-119-full.zip
    │   ├── vuldeepecker.py
    │   └── word2vec.py
├── README.md
└── utils
    ├── custom_metrics.py
    └── utils.py


/Explanation/Lemna.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cvxpy as cvx
  3 | from sklearn import linear_model
  4 | import argparse
  5 | import multiprocessing
  6 | import pickle as pkl
  7 | import time
  8 | import sys
  9 | 
 10 | 
 11 | # gaussian density function
 12 | def gaussian(x, mu, sigma_squared):
 13 |     eps = 0
 14 |     return 1/(np.sqrt(2*np.pi*sigma_squared)+eps)*np.exp(-0.5*(x-mu)**2/(sigma_squared+eps))
 15 | 
 16 | 
 17 | # the expectation maximization algorithm of the lemna paper, calculation of indices can be found in appendix
 18 | def em_regression_algorithm(data, labels, K, alpha_S, iterations, linreg_type, verbose=True, save_path=None):
 19 |     # determine if data is sparse
 20 |     sparse = True if type(data).__module__ == 'scipy.sparse.csr' else False
 21 |     no_samples = data.shape[0]
 22 |     sample_len = data.shape[1]
 23 |     label_sum = np.sum(np.abs(labels))
 24 |     no_ones = len(np.where(labels == 1)[0])
 25 |     no_zeros = len(labels) - no_ones
 26 |     #data = (data-np.mean(data))/np.std(data)
 27 |     if linreg_type not in ['lasso', 'fused_lasso']:
 28 |         print('Invalid linreg_type (%s)' %linreg_type)
 29 |         exit(1)
 30 |     elif sample_len <=1:
 31 |         if verbose:
 32 |             print('Encountered invaliddata sample!')
 33 |         with open(save_path, 'a') as f:
 34 |             print('Invalid sample.', file=f)
 35 |         return np.array([-1]), np.array([-1])
 36 |     eps = 1e-6
 37 |     number_of_history_betas = 3
 38 |     convergence_threshold = 1e-2
 39 |     # initialize the parameters randomly
 40 |     pi, sigma_sq = np.random.uniform(0, 1, size=K), np.random.uniform(0, 1, size=K)
 41 |     # normalize pi
 42 |     pi = 1/np.sum(pi) * pi
 43 |     beta = np.random.uniform(-.1, .1, size=(K, sample_len))
 44 |     z_hat = np.zeros(shape=(no_samples, K))
 45 |     # check for convergence using last betas
 46 |     old_likelihoods = []
 47 |     converged = False
 48 |     # run at most 'iterations' iterations but finish if the last 'number of history betas' log likelihood values are
 49 |     # close to each other
 50 |     initial_log_likelihood = 0
 51 |     for n in range(no_samples):
 52 |         if sparse:
 53 |             likelihood = sum([pi[k] * gaussian(labels[n], data.getrow(n).dot(beta[k,:])[0], sigma_sq[k])
 54 |                               for k in range(K)])
 55 |         else:
 56 |             likelihood = sum([pi[k] * gaussian(labels[n], np.dot(data[n,:],beta[k, :]), sigma_sq[k])
 57 |                               for k in range(K)])
 58 |         if likelihood != 0:
 59 |             initial_log_likelihood += np.log(likelihood)
 60 |     if verbose:
 61 |         print('Starting Expectation maximization algorithm for %d iterations with sum of labels %d' %(iterations,
 62 |                                                                                                       label_sum))
 63 |     start_time = time.time()
 64 |     for iter in range(iterations):
 65 |         # E step
 66 |         for i in range(no_samples):
 67 |             if sparse:
 68 |                 denom_e = sum([pi[k] * gaussian(labels[i], data.getrow(i).dot(beta[k, :])[0], sigma_sq[k]) for k in
 69 |                              range(K)])
 70 |             else:
 71 |                 denom_e = sum([pi[k] * gaussian(labels[i], np.dot(data[i,:], beta[k,:]), sigma_sq[k]) for k in
 72 |                              range(K)])
 73 |             if denom_e == 0:
 74 |                 denom_e = eps
 75 |                 if verbose:
 76 |                     print('set denom_e to eps')
 77 |             for k in range(K):
 78 |                 pred_2 = data.getrow(i).dot(beta[k, :])[0] if sparse else np.dot(data[i,:], beta[k,:])
 79 |                 z_hat[i, k] = pi[k]*gaussian(labels[i], pred_2, sigma_sq[k])/denom_e
 80 |         # M step
 81 |         for k in range(K):
 82 |             denom_m = np.sum(z_hat[:, k])
 83 |             if denom_m == 0:
 84 |                 denom_m = eps
 85 |             if sparse:
 86 |                 sigma_sq[k] = sum([z_hat[i, k] * (labels[i] - data.getrow(i).dot(beta[k, :])[0])**2 for i in
 87 |                                    range(no_samples)]) / denom_m
 88 |             else:
 89 |                 sigma_sq[k] = sum([z_hat[i, k] * (labels[i] - np.dot(data[i, :], beta[k, :])) ** 2 for i in
 90 |                                    range(no_samples)]) / denom_m
 91 |             if sigma_sq[k] == 0:
 92 |                 sigma_sq[k] += eps
 93 |                 if verbose:
 94 |                     print('added eps to sigma')
 95 |             pi[k] = np.sum(z_hat[:, k])/no_samples
 96 |         component_assignments = np.argmax(z_hat, axis=1)
 97 |         # estimate betas by linear regression with fused lasso loss
 98 |         for k in range(K):
 99 |             sample_indices_of_k = np.where(component_assignments == k)[0]
100 |             samples_of_k = data[sample_indices_of_k, :]
101 |             labels_of_k = labels[sample_indices_of_k]
102 |             if len(labels_of_k) > 0:
103 |                 if linreg_type == 'fused_lasso':
104 |                     beta[k,:] = solve_fused_lasso_regression(samples_of_k, labels_of_k, alpha_S)
105 |                 elif linreg_type == 'lasso':
106 |                     reg = linear_model.Lasso(alpha=alpha_S, precompute=True, normalize=True, max_iter=3000)
107 |                     reg.fit(samples_of_k, labels_of_k)
108 |                     beta[k, :] = reg.coef_
109 |         # recompute log_likelihood in order to check for convergence
110 |         log_likelihood = 0
111 |         for n in range(no_samples):
112 |             if sparse:
113 |                 likelihood = sum([pi[k] * gaussian(labels[n], data.getrow(n).dot(beta[k, :])[0], sigma_sq[k])
114 |                                   for k in range(K)])
115 |             else:
116 |                 likelihood = sum([pi[k] * gaussian(labels[n], np.dot(data[n, :], beta[k, :]), sigma_sq[k])
117 |                                   for k in range(K)])
118 |             if likelihood != 0:
119 |                 log_likelihood += np.log(likelihood)
120 |         if len(old_likelihoods) < number_of_history_betas:
121 |             old_likelihoods.append(log_likelihood)
122 |         else:
123 |             abs_diffs = []
124 |             for beta_idx in range(number_of_history_betas):
125 |                 diff = np.abs(old_likelihoods[beta_idx]-log_likelihood)
126 |                 abs_diffs.append(diff)
127 |             convergence_check = [np.sum(diff <= convergence_threshold) for diff in abs_diffs]
128 |             if np.sum(convergence_check) == number_of_history_betas:
129 |                 converged = True
130 |             old_likelihoods.pop(0)
131 |             old_likelihoods.append(log_likelihood)
132 |         if verbose:
133 |             print('likelihood history', old_likelihoods)
134 |         if converged:
135 |             end_time = time.time()
136 |             if verbose:
137 |                 print('EM-Alogirthm converged after %d iterations (%d seconds).' %(iter, end_time-start_time))
138 |                 argm = np.argmax(z_hat, axis=1)
139 |                 for k in range(K):
140 |                     indices_of_k = np.where(argm==k)[0]
141 |                     labels_of_k = labels[indices_of_k]
142 |                     labels_in_k = np.unique(labels_of_k)
143 |                     d = {}
144 |                     for label in labels_in_k:
145 |                         d[label] = len(np.where(labels_of_k==label)[0])
146 |                     print('labels in cluster %d'%k, d)
147 |             break
148 |     if save_path:
149 |         with open(save_path, 'a') as f:
150 |             projections = np.dot((beta * pi[:, np.newaxis]), np.transpose(data))
151 |             projections = np.sum(projections, axis=0)
152 |             diff = (projections - labels) ** 2
153 |             mse = 1. / len(diff) * np.sum(diff)
154 |             if converged:
155 |                 print('S=%.3f_K=%d_linreg_type=%s_no_ones=%d_no_zeros=%d_time=%.4f_mse=%.4f'
156 |                       % (alpha_S, K, linreg_type, no_ones, no_zeros, end_time - start_time, mse), file=f)
157 |             else:
158 |                 print('S=%.3f_K=%d_linreg_type=%s_no_ones=%d_no_zeros=%d_time=%.4f_mse=%.4f'
159 |                       % (alpha_S, K, linreg_type, no_ones, no_zeros, -1, mse), file=f)
160 |             # print('mse', mse)
161 |     # return the parameters by choosing the cluster belonging to the first row of the perturbations which is by
162 |     # assumption the sample to be explained
163 |     cluster_idx_sample = np.argmax(z_hat[0])
164 |     return beta[cluster_idx_sample], sigma_sq[cluster_idx_sample]
165 | 
166 | 
167 | # returns matrix A such that sum(abs(A*x)) is the fused lasso constraint on x
168 | def get_band_matrix_fused_lasso(dim):
169 |     if dim <= 1:
170 |         print('Invalid dimension for band matrix (%d)!'%dim)
171 |         return None
172 |     A = np.diag(-1*np.ones(dim))
173 |     rng = np.arange(dim-1)
174 |     A[rng, rng+1] = 1
175 |     A[dim-1,:] = 0
176 |     return A
177 | 
178 | 
179 | def solve_fused_lasso_regression(samples, labels, S):
180 |     # for the sake of clarity
181 |     A = cvx.Constant(samples)
182 |     no_dimensions = samples.shape[1]
183 |     beta = cvx.Variable(no_dimensions)
184 |     # careful: the band matrix can get large very fast if dimension is high
185 |     # D = get_band_matrix_fused_lasso(no_dimensions)
186 |     regularization = beta[1:] - beta[:no_dimensions - 1]
187 |     objective = cvx.Minimize(cvx.sum_squares(A*beta - labels))
188 |     # the constraint is the sum of the (absolute) differences of the neighbored betas to be bounded by S
189 |     # constraints = [cvx.sum(cvx.abs(D*beta)) <= S]
190 |     constraints = [cvx.sum(cvx.abs(regularization)) <= S]
191 |     problem = cvx.Problem(objective, constraints)
192 |     problem.solve()
193 |     return beta.value
194 | 
195 | 
196 | def lemna_parallel(perturbation_data, perturbation_labels, K, alpha_S, iterations, no_processes, linreg_type,
197 |                    repetitions=1, verbose=False, save_path=None):
198 |     assert len(perturbation_data) == len(perturbation_labels)
199 |     no_samples = len(perturbation_data) * repetitions
200 |     # repeat each perturbation repetitions times for parallel processing
201 |     perturbations_repeated = []
202 |     for p in perturbation_data:
203 |         perturbations_repeated += [p]*repetitions
204 |     labels_repeated = np.repeat(perturbation_labels, repetitions, axis=0)
205 |     if save_path:
206 |         filenames = np.array([save_path+str(i) for i in range(len(perturbation_data))])
207 |         filenames = np.repeat(filenames, repetitions)
208 |     else:
209 |         filenames = no_samples*[None]
210 |     arg_gen = zip(perturbations_repeated, labels_repeated, no_samples*[K], no_samples*[alpha_S], no_samples*[iterations],
211 |                   no_samples*[linreg_type], no_samples*[verbose], filenames)
212 |     with multiprocessing.Pool(processes=no_processes) as pool:
213 |         lemna_betas = pool.starmap(em_regression_algorithm, arg_gen)
214 |     if type(perturbation_data) is list:
215 |         betas = [lemna_beta[0] for lemna_beta in lemna_betas]
216 |     else:
217 |         betas = np.array([lemna_beta[0] for lemna_beta in lemna_betas]).reshape((len(perturbation_data), repetitions,
218 |                                                                                  perturbation_data.shape[-1]))
219 |     sigmas = np.array([lemna_beta[1] for lemna_beta in lemna_betas]).reshape((len(perturbation_data), repetitions))
220 |     return betas, sigmas
221 | 
222 | 
223 | if __name__ == '__main__':
224 |     parser = argparse.ArgumentParser(description='Implementation of the Lemna algorithm.')
225 |     parser.add_argument('data_path', type=str,
226 |                         help='Path to list (.pkl) containing perturbations of shape (no_perturbations, no_features).')
227 |     parser.add_argument('label_path', type=str,
228 |                         help='Path to array containing labels of shape (no_samples, no_perturbations). Labels are'
229 |                              ' assumed to be binary (0/1).')
230 |     parser.add_argument('save_path', type=str, help='Runtime, mse and more is documented in a file for each sample.')
231 |     parser.add_argument('K', type=int, help='K parameter (number of components) of the algorithm.')
232 |     parser.add_argument('linreg_type', nargs=2,
233 |                         help='Lasso for linear regression with L1 regularization, fused_lasso for fused lasso. Second'
234 |                              'parameter is alpha for lasso and S for fused_lasso.')
235 |     parser.add_argument('iterations', type=int, help='Number of maximum iterations during EM Algorithm.')
236 |     parser.add_argument('repetitions', type=int, help='Number of repetitions of EM Algorithm as its'
237 |                                                                        'output is not deterministic.')
238 |     parser.add_argument('--no_processes', type=int, default=2, help='Number of processes for running parallel.')
239 |     parser.add_argument('--verbose', type=int, default=0, help='Detailed output of EM algorithm.')
240 |     args = vars(parser.parse_args())
241 |     for k,v in args.items():
242 |         print('{} = {}'.format(k, v))
243 |     if args['data_path'].split('.')[-1] == 'npy':
244 |         data = np.load(args['data_path'])
245 |     elif args['data_path'].split('.')[-1] == 'pkl':
246 |         data = pkl.load(open(args['data_path'], 'rb'))
247 |     else:
248 |         print('Data format was not understood. Data could not be loaded.')
249 |         sys.exit(1)
250 |     labels = np.load(args['label_path'])
251 |     betas, sigmas = lemna_parallel(data, labels, args['K'], float(args['linreg_type'][1]), args['iterations'],
252 |                                        args['no_processes'], args['linreg_type'][0], repetitions=args['repetitions'],
253 |                                        verbose=bool(args['verbose']), save_path=args['save_path']
254 |                                        )
255 |     if type(betas) is list:
256 |         pkl.dump(betas, open(args['save_path'] + 'K=%d_S=%.4f_betas.pkl'%(args['K'], float(args['linreg_type'][1])), 'wb'))
257 |     else:
258 |         np.save(args['save_path'] + 'K=%d_S=%.4f_betas.npy'%(args['K'], float(args['linreg_type'][1])), betas)
259 |     np.save(args['save_path'] + 'K=%d_S=%.4f_sigmas.npy' % (args['K'], float(args['linreg_type'][1])), sigmas)
260 | 


--------------------------------------------------------------------------------
/Explanation/Lime.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle as pkl
 3 | import argparse
 4 | import time
 5 | import sys
 6 | from sklearn.linear_model import Ridge
 7 | from tqdm import tqdm
 8 | from scipy.spatial.distance import cosine
 9 | from lemna_postprocessing_scripts.relevances_to_linreg import linreg_relevances_to_vector_space
10 | from scipy.sparse import load_npz
11 | 
12 | 
13 | # returns l^2 weight for two points
14 | def get_weight(x,y,sigma=1.0):
15 |     dist = cosine(x,y)
16 |     return np.exp(-dist/sigma)
17 | 
18 | 
19 | # calculates lime weights for each feature
20 | # perturbations is a 2d numpy array where the first row corresponds to the original sample
21 | # labels is a 1d numpy array containing the labels for the perturbations and the original sample is supposed to be
22 | # given label 1 by the classifier always. (this way, positive relevances will always speak _for_ the original
23 | # classification of the classifier)
24 | def get_lime_weights(perturbations, labels, random_state):
25 |     assert perturbations.shape[0] == labels.shape[0]
26 |     model_regressor = Ridge(alpha=1, fit_intercept=True, random_state=random_state)
27 |     weights = np.array([get_weight(perturbations[0], y) for y in perturbations])
28 |     model_regressor.fit(perturbations, labels, sample_weight=weights)
29 |     return model_regressor.coef_
30 | 
31 | 
32 | # calculates lime weights for several perturbations and labels
33 | # perturbations is a list where each list entry is a 2d numpy array suitable for get_lime_weights
34 | # labels is a 2d numpy array with shape (no_samples, no_perturbations)
35 | def explain_samples(perturbations, labels, random_state=None):
36 |     relevances = []
37 |     start_time = time.time()
38 |     for p, l in tqdm(zip(perturbations, labels), total=len(perturbations)):
39 |         w = get_lime_weights(p, l, random_state)
40 |         relevances.append(w)
41 |     end_time = time.time()
42 |     print('Calculation of {} relevances took on {} seconds ({} seconds per sample).'.format(len(perturbations),
43 |                                                                                            end_time-start_time,
44 |                                                                                             (end_time-start_time)/
45 |                                                                                             len(perturbations),
46 |                                                                                            ))
47 |     return relevances
48 | 
49 | if __name__ == '__main__':
50 |     parser = argparse.ArgumentParser(description='Implementation of the LIME algorithm.')
51 |     parser.add_argument('perturbation_path', type=str,
52 |                         help='Path to list (.pkl) of perturbations for data of shape (no_perturbations, no_features).')
53 |     parser.add_argument('label_path', type=str,
54 |                         help='Path to array containing labels of perturbations of shape (no_samples, no_perturbations).'
55 |                              'Labels are assumed to be binary (0/1).')
56 |     parser.add_argument('save_path', type=str, help='Folder to save results.')
57 |     parser.add_argument('--data_path', type=str, help='Path to data. Can be .npy, .npz, .pkl (sparse,numpy,list)')
58 |     args = parser.parse_args()
59 |     perturbations = pkl.load(open(args.perturbation_path, 'rb'))
60 |     labels = np.load(args.label_path)
61 |     if args.data_path:
62 |         is_sparse = args.data_path.split('.')[-1] == 'npz'
63 |         if args.data_path.split('.')[-1] == 'npy':
64 |             data = np.load(args.data_path)
65 |         elif args.data_path.split('.')[-1] == 'pkl':
66 |             data = pkl.load(open(args.data_path, 'rb'))
67 |         elif args.data_path.split('.')[-1] == 'npz':
68 |             data = load_npz(args.data_path)
69 |         else:
70 |             print('Data format was not understood. Data could not be loaded.')
71 |             sys.exit(1)
72 |     rels = explain_samples(perturbations, labels)
73 |     if args.data_path:
74 |         linreg_relevances_to_vector_space(rels, data, args.save_path, is_sparse)
75 |     else:
76 |         pkl.dump(rels, open(args.save_path+'relevances_lime.pkl', 'wb'))
77 | 


--------------------------------------------------------------------------------
/Explanation/README.md:
--------------------------------------------------------------------------------
1 | # Explanations
2 | 
3 | This folder contains scripts to create explanations for the network architectures using LIME ([Ribeiro et al.](https://www.kdd.org/kdd2016/papers/files/rfp0573-ribeiroA.pdf)) and LEMNA ([Guo et al.](http://people.cs.vt.edu/gangwang/ccs18.pdf)). Based on the finding of our paper we do not recommend to use these methods but [white-box methods](https://github.com/albermax/innvestigate) instead. Still, we want to publish our implementations for the sake of open access.
4 | 
5 | * The input data for usage of this repository can be of three types: numpy array of shape (n_samples, n_features) like in Mimicus or VulDeePecker, scipy.sparse.csr_matrix of shape (n_samples, n_features) like in Drebin or a list of length n_samples where each entry in the list is a numpy array of different length like in DAMD, for example.
6 | * To use LIME or LEMNA you firstly need perturbations of the data. You can call  `python3 perturbation_sampling.py --help` to find out how to generate them.
7 | * With the perturbations you can calculate relevances for the features your models use. Check `python3 Lemna.py --help` or `python3 Lime.py --help` to find out how.


--------------------------------------------------------------------------------
/Explanation/perturbation_sampling.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy import sparse
  3 | import pickle as pkl
  4 | import sys
  5 | import os
  6 | import argparse
  7 | from tqdm import tqdm
  8 | from keras.models import load_model
  9 | 
 10 | sys.path.append('../utils/')
 11 | from utils import load_npy_npz_pkl
 12 | 
 13 | 
 14 | # samples perturbations from a data_sample (vector) by choosing a random number of random features from the original
 15 | # sample and setting the non-chosen features to 0
 16 | def sample_data_points(data_sample, no_samples):
 17 |     if type(data_sample).__module__ == 'scipy.sparse.csr':
 18 |         is_sparse = True
 19 |         # in sparse case we remember all row and column indices of the perturbations and create the matrix at one point
 20 |         nonzero_row_indices, nonzero_column_indices = [], []
 21 |     else:
 22 |         is_sparse = False
 23 |         # in the non-sparse case we save the perturbation directly after creation
 24 |         samples = np.zeros(shape=(no_samples,) + data_sample.shape, dtype=data_sample.dtype)
 25 |     non_zeros = np.nonzero(data_sample)
 26 |     # if data_sample is a (sparse) vector each nonzero index appears exactly once in the non_zeros
 27 |     if is_sparse:
 28 |         no_nonzero_entries = len(non_zeros[0])
 29 |         sampling_values = non_zeros[1]
 30 |     elif len(data_sample.shape) < 2:
 31 |         no_nonzero_entries = len(non_zeros[0])
 32 |         sampling_values = non_zeros[0]
 33 |     # if data sample is a vector of vectors, each nonzero index appears multiple times for each vector
 34 |     else:
 35 |         no_nonzero_entries = len(np.unique(non_zeros[0]))
 36 |         sampling_values = np.unique(non_zeros[0])
 37 |     for i in range(no_samples):
 38 |         # the first row contains the original sample
 39 |         if i == 0:
 40 |             no_samples_indices = no_nonzero_entries
 41 |         else:
 42 |             # how many entries are we going to draw (at least one!)
 43 |             no_samples_indices = np.random.randint(1, no_nonzero_entries + 1)
 44 |         # which samples are we actually drawing
 45 |         if is_sparse:
 46 |             sample_indices = np.random.choice(non_zeros[1], no_samples_indices, replace=False)
 47 |             nonzero_row_indices += [i] * no_samples_indices
 48 |             nonzero_column_indices += list(sample_indices)
 49 |             continue
 50 |         else:
 51 |             sample_indices = np.random.choice(sampling_values, no_samples_indices, replace=False)
 52 |             perturbed_data = np.zeros(shape=data_sample.shape)
 53 |             perturbed_data[sample_indices] = data_sample[sample_indices]
 54 |             samples[i][:] = perturbed_data
 55 |     if is_sparse:
 56 |         data = [1] * len(nonzero_row_indices)
 57 |         samples = sparse.csr_matrix((data, (nonzero_row_indices, nonzero_column_indices)),
 58 |                                     shape=(no_samples, data_sample.shape[1]), dtype=data_sample.dtype)
 59 |     return samples
 60 | 
 61 | 
 62 | # given  this method creates 'no_samples' perturbations of each datapoint in data. Careful, perturbations can easily
 63 | # become very big in memory. Pre calculate if no_samples*data fits into memory.
 64 | def get_pertubations(data, no_samples):
 65 |     perturbations = []
 66 |     if type(data).__module__ in ['scipy.sparse.csr', 'numpy']:
 67 |         total = data.shape[0]
 68 |     else:
 69 |         total = len(data)
 70 |     print('Sampling data points...')
 71 |     for data_sample in tqdm(data, total=total):
 72 |         perturbations.append(sample_data_points(data_sample, no_samples))
 73 |     # if all data points have the same shape, save one big numpy array
 74 |     if type(data).__module__ == 'scipy.sparse.csr':
 75 |         perturbations = sparse.vstack(perturbations)
 76 |     return perturbations
 77 | 
 78 | 
 79 | # classifies a batch of perturbation data. We assume that samples are of shape (no_perturbations, sample_dimension)
 80 | # and that the model can predict this sort of data
 81 | def get_classification(model, samples, batch_size=500):
 82 |     # if samples is list we assume that each sample has a different shape and we have to classify sample-wise
 83 |     if type(samples) is list:
 84 |         labels = []
 85 |         for sample in samples:
 86 |             labels.append(np.argmax(model.predict(sample.reshape((1,)+sample.shape)), axis=1))
 87 |         labels = np.array(labels).reshape((len(samples),))
 88 |     # else we can just predict the entire sample set
 89 |     else:
 90 |         labels = np.argmax(model.predict(samples, batch_size=batch_size), axis=1)
 91 |     labels = labels.astype(np.uint8)
 92 |     # we take care that label 1 is the label the classifier assigns to the sample (which is in row 0 of the
 93 |     # perturbations) and 0 is the one of differently classified perturbations
 94 |     classifier_label = labels[0]
 95 |     targets = np.where(labels == classifier_label)
 96 |     nontargets = np.where(labels != classifier_label)
 97 |     labels[targets] = 1
 98 |     labels[nontargets] = 0
 99 |     return labels
100 | 
101 | 
102 | # transforms a tuple of non zero indices (like output of scipy.sparse.nonzero or np.nonzero) to suited representation
103 | # for linear regression. Assumes original sample in the first row
104 | def perturbation_block_to_regression_sample(nonzero_tuple):
105 |     nonzero_indices_rows, nonzero_indices_columns = nonzero_tuple[0], nonzero_tuple[1]
106 |     nonzero_samples_indices = nonzero_indices_columns[np.where(nonzero_indices_rows==0)]
107 |     feature_size = len(np.unique(nonzero_samples_indices))
108 |     no_samples = len(np.unique(nonzero_indices_rows))
109 |     orig_idx_2_reg_idx = dict(zip(np.unique(nonzero_samples_indices), range(feature_size)))
110 |     linreg_block = np.zeros(shape=(no_samples, feature_size), dtype=np.uint8)
111 |     for row_no in np.unique(nonzero_indices_rows):
112 |         nonzero_entries = nonzero_indices_columns[np.where(nonzero_indices_rows == row_no)[0]]
113 |         reg_indices = [orig_idx_2_reg_idx[idx] for idx in np.unique(nonzero_entries)]
114 |         linreg_block[row_no, reg_indices] = 1
115 |     return linreg_block
116 | 
117 | 
118 | # takes (test) data and computes perturbations and the labels of the perturbations aswell as a linear representation of
119 | # the perturbation data. Delete specifies if the non-selected features of the perturbations will be deleted
120 | # or (if false) set to zero.
121 | def perturbation_pipeline(data, model, no_perturbations_per_sample, save_path, save_perturbations, delete):
122 |     seed = 40
123 |     np.random.seed(seed)
124 |     no_samples = data.shape[0] if not type(data) is list else len(data)
125 |     all_labels, all_linregs, all_perturbations = [], [], []
126 |     print('Computing perturbations for {} samples ...'.format(no_samples))
127 |     for data_sample in tqdm(data):
128 |         if len(data_sample.shape) < 3 and delete:
129 |             print('Error. Delete = 1 is only allowed for sequential data!')
130 |             sys.exit(1)
131 |         perturbations = sample_data_points(data_sample, no_perturbations_per_sample)
132 |         if delete:
133 |             perturbations_deleted = []
134 |             for i in range(perturbations.shape[0]):
135 |                 zero_vectors = np.array([(x == 0).all() for x in perturbations[i]])
136 |                 indices_to_delete = np.where(zero_vectors > 0)[0]
137 |                 perturbations_deleted.append(np.delete(perturbations[i], indices_to_delete, axis=0))
138 |             labels = get_classification(model, perturbations_deleted)
139 |         else:
140 |             labels = get_classification(model, perturbations)
141 |         all_labels.append(labels)
142 |         all_linregs.append(perturbation_block_to_regression_sample(perturbations.nonzero()))
143 |         if save_perturbations:
144 |             all_perturbations.append(perturbations)
145 |     np.save(os.path.join(save_path, 'perturbation_labels_seed_{}.npy'.format(seed)), np.array(all_labels))
146 |     pkl.dump(all_linregs, open(os.path.join(save_path, 'linreg_representations_seed_{}.pkl'.format(seed)), 'wb'))
147 |     if save_perturbations:
148 |         if type(data) is list:
149 |             pkl.dump(all_perturbations, open(os.path.join(save_path, 'perturbation_data_seed_{}.pkl'.format(seed)), 'wb'))
150 |         elif type(data).__module__ == 'scipy.sparse.csr':
151 |             sparse.save_npz(os.path.join(save_path, 'perturbation_data_seed_{}.npz'.format(seed)), sparse.vstack(all_perturbations))
152 |         else:
153 |             np.save(os.path.join(save_path, 'perturbation_data_seed_{}.npy'.format(seed)), np.array(all_perturbations))
154 | 
155 | 
156 | if __name__ == '__main__':
157 |     parser = argparse.ArgumentParser(description='Perturbation sampling process for lemna algorithm.')
158 |     parser.add_argument('data_path', type=str, help='Path to data structure containing data samples.')
159 |     parser.add_argument('model_path', type=str, help='Path to a keras model (*.hdf5) that can be loaded with model.load().')
160 |     parser.add_argument('save_path', type=str, help='Where to store the results.')
161 |     parser.add_argument('no_perturbations', type=int, help='How many perturbations of each sample will be created.')
162 |     parser.add_argument('--save_perturbations', type=int, default=0, help='If 1 the real perturbations (not only the'
163 |                                                     'binary representation of them) will be saved. This can be useful'
164 |                                                     'for debugging but can use a lot of memory.')
165 |     parser.add_argument('--delete', type=int, default=0, help='If 1, features that are not selected for a perturbation'
166 |                                                               'will be deleted from the sample instead of setting them'
167 |                                                               'to zero.')
168 |     args = vars(parser.parse_args())
169 |     for k,v in args.items():
170 |         print('{} = {}'.format(k, v))
171 |     data = load_npy_npz_pkl(args['data_path'])
172 |     model = load_model(args['model_path'])
173 |     perturbation_pipeline(data, model, args['no_perturbations'], args['save_path'], bool(args['save_perturbations']),
174 |                           bool(args['delete']))
175 | 
176 | 


--------------------------------------------------------------------------------
/NetworkTraining/DAMD/DalvikOpcodes.txt:
--------------------------------------------------------------------------------
  1 | nop 00
  2 | move 01
  3 | move/from16 02
  4 | move/16 03
  5 | move-wide 04
  6 | move-wide/from16 05
  7 | move-wide/16 06
  8 | move-object 07
  9 | move-object/from16 08
 10 | move-object/16 09
 11 | move-result 0a
 12 | move-result-wide 0b
 13 | move-result-object 0c
 14 | move-exception 0d
 15 | return-void 0e
 16 | return 0f
 17 | return-wide 10
 18 | return-object 11
 19 | const/4 12
 20 | const/16 13
 21 | const 14
 22 | const/high16 15
 23 | const-wide/16 16
 24 | const-wide/32 17
 25 | const-wide 18
 26 | const-wide/high16 19
 27 | const-string 1a
 28 | const-string/jumbo 1b
 29 | const-class 1c
 30 | monitor-enter 1d
 31 | monitor-exit 1e
 32 | check-cast 1f
 33 | instance-of 20
 34 | array-length 21
 35 | new-instance 22
 36 | new-array 23
 37 | filled-new-array 24
 38 | filled-new-array/range 25
 39 | fill-array-data 26
 40 | throw 27
 41 | goto 28
 42 | goto/16 29
 43 | goto/32 2a
 44 | packed-switch 2b
 45 | sparse-switch 2c
 46 | cmpl-float 2d
 47 | cmpg-float 2e
 48 | cmpl-double 2f
 49 | cmpg-double 30
 50 | cmp-long 31
 51 | if-eq 32
 52 | if-ne 33
 53 | if-lt 34
 54 | if-ge 35
 55 | if-gt 36
 56 | if-le 37
 57 | if-eqz 38
 58 | if-nez 39
 59 | if-ltz 3a
 60 | if-gez 3b
 61 | if-gtz 3c
 62 | if-lez 3d
 63 | aget 44
 64 | aget-wide 45
 65 | aget-object 46
 66 | aget-boolean 47
 67 | aget-byte 48
 68 | aget-char 49
 69 | aget-short 4a
 70 | aput 4b
 71 | aput-wide 4c
 72 | aput-object 4d
 73 | aput-boolean 4e
 74 | aput-byte 4f
 75 | aput-char 50
 76 | aput-short 51
 77 | iget 52
 78 | iget-wide 53
 79 | iget-object 54
 80 | iget-boolean 55
 81 | iget-byte 56
 82 | iget-char 57
 83 | iget-short 58
 84 | iput 59
 85 | iput-wide 5a
 86 | iput-object 5b
 87 | iput-boolean 5c
 88 | iput-byte 5d
 89 | iput-char 5e
 90 | iput-short 5f
 91 | sget 60
 92 | sget-wide 61
 93 | sget-object 62
 94 | sget-boolean 63
 95 | sget-byte 64
 96 | sget-char 65
 97 | sget-short 66
 98 | sput 67
 99 | sput-wide 68
100 | sput-object 69
101 | sput-boolean 6a
102 | sput-byte 6b
103 | sput-char 6c
104 | sput-short 6d
105 | invoke-virtual 6e
106 | invoke-super 6f
107 | invoke-direct 70
108 | invoke-static 71
109 | invoke-interface 72
110 | invoke-virtual/range 74
111 | invoke-super/range 75
112 | invoke-direct/range 76
113 | invoke-static/range 77
114 | invoke-interface/range 78
115 | neg-int 7b
116 | not-int 7c
117 | neg-long 7d
118 | not-long 7e
119 | neg-float 7f
120 | neg-double 80
121 | int-to-long 81
122 | int-to-float 82
123 | int-to-double 83
124 | long-to-int 84
125 | long-to-float 85
126 | long-to-double 86
127 | float-to-int 87
128 | float-to-long 88
129 | float-to-double 89
130 | double-to-int 8a
131 | double-to-long 8b
132 | double-to-float 8c
133 | int-to-byte 8d
134 | int-to-char 8e
135 | int-to-short 8f
136 | add-int 90
137 | sub-int 91
138 | mul-int 92
139 | div-int 93
140 | rem-int 94
141 | and-int 95
142 | or-int 96
143 | xor-int 97
144 | shl-int 98
145 | shr-int 99
146 | ushr-int 9a
147 | add-long 9b
148 | sub-long 9c
149 | mul-long 9d
150 | div-long 9e
151 | rem-long 9f
152 | and-long a0
153 | or-long a1
154 | xor-long a2
155 | shl-long a3
156 | shr-long a4
157 | ushr-long a5
158 | add-float a6
159 | sub-float a7
160 | mul-float a8
161 | div-float a9
162 | rem-float aa
163 | add-double ab
164 | sub-double ac
165 | mul-double ad
166 | div-double ae
167 | rem-double af
168 | add-int/2addr b0
169 | sub-int/2addr b1
170 | mul-int/2addr b2
171 | div-int/2addr b3
172 | rem-int/2addr b4
173 | and-int/2addr b5
174 | or-int/2addr b6
175 | xor-int/2addr b7
176 | shl-int/2addr b8
177 | shr-int/2addr b9
178 | ushr-int/2addr ba
179 | add-long/2addr bb
180 | sub-long/2addr bc
181 | mul-long/2addr bd
182 | div-long/2addr be
183 | rem-long/2addr bf
184 | and-long/2addr c0
185 | or-long/2addr c1
186 | xor-long/2addr c2
187 | shl-long/2addr c3
188 | shr-long/2addr c4
189 | ushr-long/2addr c5
190 | add-float/2addr c6
191 | sub-float/2addr c7
192 | mul-float/2addr c8
193 | div-float/2addr c9
194 | rem-float/2addr ca
195 | add-double/2addr cb
196 | sub-double/2addr cc
197 | mul-double/2addr cd
198 | div-double/2addr ce
199 | rem-double/2addr cf
200 | add-int/lit16 d0
201 | rsub-int d1
202 | mul-int/lit16 d2
203 | div-int/lit16 d3
204 | rem-int/lit16 d4
205 | and-int/lit16 d5
206 | or-int/lit16 d6
207 | xor-int/lit16 d7
208 | add-int/lit8 d8
209 | rsub-int/lit8 d9
210 | mul-int/lit8 da
211 | div-int/lit8 db
212 | rem-int/lit8 dc
213 | and-int/lit8 dd
214 | or-int/lit8 de
215 | xor-int/lit8 df
216 | shl-int/lit8 e0
217 | shr-int/lit8 e1
218 | ushr-int/lit8 e2
219 | 


--------------------------------------------------------------------------------
/NetworkTraining/DAMD/DalvikOpcodesDescription.json:
--------------------------------------------------------------------------------
   1 | {
   2 |     "00": [
   3 |         "00 10x",
   4 |         "nop",
   5 |         "",
   6 |         "Waste cycles.\nNote:\nData-bearing pseudo-instructions are tagged with this opcode, in which\ncase the high-order byte of the opcode unit indicates the nature of\nthe data. See \"packed-switch-payload Format\",\n\"sparse-switch-payload Format\", and\n\"fill-array-data-payload Format\" below."
   7 |     ],
   8 |     "01": [
   9 |         "01 12x",
  10 |         "move vA, vB",
  11 |         "A: destination register (4 bits)\nB: source register (4 bits)",
  12 |         "Move the contents of one non-object register to another."
  13 |     ],
  14 |     "02": [
  15 |         "02 22x",
  16 |         "move/from16 vAA, vBBBB",
  17 |         "A: destination register (8 bits)\nB: source register (16 bits)",
  18 |         "Move the contents of one non-object register to another."
  19 |     ],
  20 |     "03": [
  21 |         "03 32x",
  22 |         "move/16 vAAAA, vBBBB",
  23 |         "A: destination register (16 bits)\nB: source register (16 bits)",
  24 |         "Move the contents of one non-object register to another."
  25 |     ],
  26 |     "04": [
  27 |         "04 12x",
  28 |         "move-wide vA, vB",
  29 |         "A: destination register pair (4 bits)\nB: source register pair (4 bits)",
  30 |         "Move the contents of one register-pair to another.\nNote:\nIt is legal to move from vN to either\nvN-1 or vN+1, so implementations\nmust arrange for both halves of a register pair to be read before\nanything is written."
  31 |     ],
  32 |     "05": [
  33 |         "05 22x",
  34 |         "move-wide/from16 vAA, vBBBB",
  35 |         "A: destination register pair (8 bits)\nB: source register pair (16 bits)",
  36 |         "Move the contents of one register-pair to another.\nNote:\nImplementation considerations are the same as move-wide,\nabove."
  37 |     ],
  38 |     "06": [
  39 |         "06 32x",
  40 |         "move-wide/16 vAAAA, vBBBB",
  41 |         "A: destination register pair (16 bits)\nB: source register pair (16 bits)",
  42 |         "Move the contents of one register-pair to another.\nNote:\nImplementation considerations are the same as move-wide,\nabove."
  43 |     ],
  44 |     "07": [
  45 |         "07 12x",
  46 |         "move-object vA, vB",
  47 |         "A: destination register (4 bits)\nB: source register (4 bits)",
  48 |         "Move the contents of one object-bearing register to another."
  49 |     ],
  50 |     "08": [
  51 |         "08 22x",
  52 |         "move-object/from16 vAA, vBBBB",
  53 |         "A: destination register (8 bits)\nB: source register (16 bits)",
  54 |         "Move the contents of one object-bearing register to another."
  55 |     ],
  56 |     "09": [
  57 |         "09 32x",
  58 |         "move-object/16 vAAAA, vBBBB",
  59 |         "A: destination register (16 bits)\nB: source register (16 bits)",
  60 |         "Move the contents of one object-bearing register to another."
  61 |     ],
  62 |     "0a": [
  63 |         "0a 11x",
  64 |         "move-result vAA",
  65 |         "A: destination register (8 bits)",
  66 |         "Move the single-word non-object result of the most recent\ninvoke-kind into the indicated register.\nThis must be done as the instruction immediately after an\ninvoke-kind whose (single-word, non-object) result\nis not to be ignored; anywhere else is invalid."
  67 |     ],
  68 |     "0b": [
  69 |         "0b 11x",
  70 |         "move-result-wide vAA",
  71 |         "A: destination register pair (8 bits)",
  72 |         "Move the double-word result of the most recent\ninvoke-kind into the indicated register pair.\nThis must be done as the instruction immediately after an\ninvoke-kind whose (double-word) result\nis not to be ignored; anywhere else is invalid."
  73 |     ],
  74 |     "0c": [
  75 |         "0c 11x",
  76 |         "move-result-object vAA",
  77 |         "A: destination register (8 bits)",
  78 |         "Move the object result of the most recent invoke-kind\ninto the indicated register. This must be done as the instruction\nimmediately after an invoke-kind or\nfilled-new-array\nwhose (object) result is not to be ignored; anywhere else is invalid."
  79 |     ],
  80 |     "0d": [
  81 |         "0d 11x",
  82 |         "move-exception vAA",
  83 |         "A: destination register (8 bits)",
  84 |         "Save a just-caught exception into the given register. This must\nbe the first instruction of any exception handler whose caught\nexception is not to be ignored, and this instruction must only\never occur as the first instruction of an exception handler; anywhere\nelse is invalid."
  85 |     ],
  86 |     "0e": [
  87 |         "0e 10x",
  88 |         "return-void",
  89 |         "",
  90 |         "Return from a void method."
  91 |     ],
  92 |     "0f": [
  93 |         "0f 11x",
  94 |         "return vAA",
  95 |         "A: return value register (8 bits)",
  96 |         "Return from a single-width (32-bit) non-object value-returning\nmethod."
  97 |     ],
  98 |     "10": [
  99 |         "10 11x",
 100 |         "return-wide vAA",
 101 |         "A: return value register-pair (8 bits)",
 102 |         "Return from a double-width (64-bit) value-returning method."
 103 |     ],
 104 |     "11": [
 105 |         "11 11x",
 106 |         "return-object vAA",
 107 |         "A: return value register (8 bits)",
 108 |         "Return from an object-returning method."
 109 |     ],
 110 |     "12": [
 111 |         "12 11n",
 112 |         "const/4 vA, #+B",
 113 |         "A: destination register (4 bits)\nB: signed int (4 bits)",
 114 |         "Move the given literal value (sign-extended to 32 bits) into\nthe specified register."
 115 |     ],
 116 |     "13": [
 117 |         "13 21s",
 118 |         "const/16 vAA, #+BBBB",
 119 |         "A: destination register (8 bits)\nB: signed int (16 bits)",
 120 |         "Move the given literal value (sign-extended to 32 bits) into\nthe specified register."
 121 |     ],
 122 |     "14": [
 123 |         "14 31i",
 124 |         "const vAA, #+BBBBBBBB",
 125 |         "A: destination register (8 bits)\nB: arbitrary 32-bit constant",
 126 |         "Move the given literal value into the specified register."
 127 |     ],
 128 |     "15": [
 129 |         "15 21h",
 130 |         "const/high16 vAA, #+BBBB0000",
 131 |         "A: destination register (8 bits)\nB: signed int (16 bits)",
 132 |         "Move the given literal value (right-zero-extended to 32 bits) into\nthe specified register."
 133 |     ],
 134 |     "16": [
 135 |         "16 21s",
 136 |         "const-wide/16 vAA, #+BBBB",
 137 |         "A: destination register (8 bits)\nB: signed int (16 bits)",
 138 |         "Move the given literal value (sign-extended to 64 bits) into\nthe specified register-pair."
 139 |     ],
 140 |     "17": [
 141 |         "17 31i",
 142 |         "const-wide/32 vAA, #+BBBBBBBB",
 143 |         "A: destination register (8 bits)\nB: signed int (32 bits)",
 144 |         "Move the given literal value (sign-extended to 64 bits) into\nthe specified register-pair."
 145 |     ],
 146 |     "18": [
 147 |         "18 51l",
 148 |         "const-wide vAA, #+BBBBBBBBBBBBBBBB",
 149 |         "A: destination register (8 bits)\nB: arbitrary double-width (64-bit) constant",
 150 |         "Move the given literal value into\nthe specified register-pair."
 151 |     ],
 152 |     "19": [
 153 |         "19 21h",
 154 |         "const-wide/high16 vAA, #+BBBB000000000000",
 155 |         "A: destination register (8 bits)\nB: signed int (16 bits)",
 156 |         "Move the given literal value (right-zero-extended to 64 bits) into\nthe specified register-pair."
 157 |     ],
 158 |     "1a": [
 159 |         "1a 21c",
 160 |         "const-string vAA, string@BBBB",
 161 |         "A: destination register (8 bits)\nB: string index",
 162 |         "Move a reference to the string specified by the given index into the\nspecified register."
 163 |     ],
 164 |     "1b": [
 165 |         "1b 31c",
 166 |         "const-string/jumbo vAA, string@BBBBBBBB",
 167 |         "A: destination register (8 bits)\nB: string index",
 168 |         "Move a reference to the string specified by the given index into the\nspecified register."
 169 |     ],
 170 |     "1c": [
 171 |         "1c 21c",
 172 |         "const-class vAA, type@BBBB",
 173 |         "A: destination register (8 bits)\nB: type index",
 174 |         "Move a reference to the class specified by the given index into the\nspecified register. In the case where the indicated type is primitive,\nthis will store a reference to the primitive type's degenerate\nclass."
 175 |     ],
 176 |     "1d": [
 177 |         "1d 11x",
 178 |         "monitor-enter vAA",
 179 |         "A: reference-bearing register (8 bits)",
 180 |         "Acquire the monitor for the indicated object."
 181 |     ],
 182 |     "1e": [
 183 |         "1e 11x",
 184 |         "monitor-exit vAA",
 185 |         "A: reference-bearing register (8 bits)",
 186 |         "Release the monitor for the indicated object.\nNote:\nIf this instruction needs to throw an exception, it must do\nso as if the pc has already advanced past the instruction.\nIt may be useful to think of this as the instruction successfully\nexecuting (in a sense), and the exception getting thrown after\nthe instruction but before the next one gets a chance to\nrun. This definition makes it possible for a method to use\na monitor cleanup catch-all (e.g., finally) block as\nthe monitor cleanup for that block itself, as a way to handle the\narbitrary exceptions that might get thrown due to the historical\nimplementation of Thread.stop(), while still managing\nto have proper monitor hygiene."
 187 |     ],
 188 |     "1f": [
 189 |         "1f 21c",
 190 |         "check-cast vAA, type@BBBB",
 191 |         "A: reference-bearing register (8 bits)\nB: type index (16 bits)",
 192 |         "Throw a ClassCastException if the reference in the\ngiven register cannot be cast to the indicated type.\nNote: Since A must always be a reference\n(and not a primitive value), this will necessarily fail at runtime\n(that is, it will throw an exception) if B refers to a\nprimitive type."
 193 |     ],
 194 |     "20": [
 195 |         "20 22c",
 196 |         "instance-of vA, vB, type@CCCC",
 197 |         "A: destination register (4 bits)\nB: reference-bearing register (4 bits)\nC: type index (16 bits)",
 198 |         "Store in the given destination register 1\nif the indicated reference is an instance of the given type,\nor 0 if not.\nNote: Since B must always be a reference\n(and not a primitive value), this will always result\nin 0 being stored if C refers to a primitive\ntype."
 199 |     ],
 200 |     "21": [
 201 |         "21 12x",
 202 |         "array-length vA, vB",
 203 |         "A: destination register (4 bits)\nB: array reference-bearing register (4 bits)",
 204 |         "Store in the given destination register the length of the indicated\narray, in entries"
 205 |     ],
 206 |     "22": [
 207 |         "22 21c",
 208 |         "new-instance vAA, type@BBBB",
 209 |         "A: destination register (8 bits)\nB: type index",
 210 |         "Construct a new instance of the indicated type, storing a\nreference to it in the destination. The type must refer to a\nnon-array class."
 211 |     ],
 212 |     "23": [
 213 |         "23 22c",
 214 |         "new-array vA, vB, type@CCCC",
 215 |         "A: destination register (4 bits)\nB: size register\nC: type index",
 216 |         "Construct a new array of the indicated type and size. The type\nmust be an array type."
 217 |     ],
 218 |     "24": [
 219 |         "24 35c",
 220 |         "filled-new-array {vC, vD, vE, vF, vG}, type@BBBB",
 221 |         "A: array size and argument word count (4 bits)\nB: type index (16 bits)\nC..G: argument registers (4 bits each)",
 222 |         "Construct an array of the given type and size, filling it with the\nsupplied contents. The type must be an array type. The array's\ncontents must be single-word (that is,\nno arrays of long or double, but reference\ntypes are acceptable). The constructed\ninstance is stored as a \"result\" in the same way that the method invocation\ninstructions store their results, so the constructed instance must\nbe moved to a register with an immediately subsequent\nmove-result-object instruction (if it is to be used)."
 223 |     ],
 224 |     "25": [
 225 |         "25 3rc",
 226 |         "filled-new-array/range {vCCCC .. vNNNN}, type@BBBB",
 227 |         "A: array size and argument word count (8 bits)\nB: type index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1",
 228 |         "Construct an array of the given type and size, filling it with\nthe supplied contents. Clarifications and restrictions are the same\nas filled-new-array, described above."
 229 |     ],
 230 |     "26": [
 231 |         "26 31t",
 232 |         "fill-array-data vAA, +BBBBBBBB (with supplemental data as specified\nbelow in \"fill-array-data-payload Format\")",
 233 |         "A: array reference (8 bits)\nB: signed \"branch\" offset to table data pseudo-instruction\n(32 bits)",
 234 |         "Fill the given array with the indicated data. The reference must be\nto an array of primitives, and the data table must match it in type and\nmust contain no more elements than will fit in the array. That is,\nthe array may be larger than the table, and if so, only the initial\nelements of the array are set, leaving the remainder alone."
 235 |     ],
 236 |     "27": [
 237 |         "27 11x",
 238 |         "throw vAA",
 239 |         "A: exception-bearing register (8 bits)",
 240 |         "Throw the indicated exception."
 241 |     ],
 242 |     "28": [
 243 |         "28 10t",
 244 |         "goto +AA",
 245 |         "A: signed branch offset (8 bits)",
 246 |         "Unconditionally jump to the indicated instruction.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either with goto/32 or\nby including a nop as a target before the branch.)"
 247 |     ],
 248 |     "29": [
 249 |         "29 20t",
 250 |         "goto/16 +AAAA",
 251 |         "A: signed branch offset (16 bits)",
 252 |         "Unconditionally jump to the indicated instruction.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either with goto/32 or\nby including a nop as a target before the branch.)"
 253 |     ],
 254 |     "2a": [
 255 |         "2a 30t",
 256 |         "goto/32 +AAAAAAAA",
 257 |         "A: signed branch offset (32 bits)",
 258 |         "Unconditionally jump to the indicated instruction."
 259 |     ],
 260 |     "2b": [
 261 |         "2b 31t",
 262 |         "packed-switch vAA, +BBBBBBBB (with supplemental data as\nspecified below in \"packed-switch-payload Format\")",
 263 |         "A: register to test\nB: signed \"branch\" offset to table data pseudo-instruction\n(32 bits)",
 264 |         "Jump to a new instruction based on the value in the\ngiven register, using a table of offsets corresponding to each value\nin a particular integral range, or fall through to the next\ninstruction if there is no match."
 265 |     ],
 266 |     "2c": [
 267 |         "2c 31t",
 268 |         "sparse-switch vAA, +BBBBBBBB (with supplemental data as\nspecified below in \"sparse-switch-payload Format\")",
 269 |         "A: register to test\nB: signed \"branch\" offset to table data pseudo-instruction\n(32 bits)",
 270 |         "Jump to a new instruction based on the value in the given\nregister, using an ordered table of value-offset pairs, or fall\nthrough to the next instruction if there is no match."
 271 |     ],
 272 |     "2d": [
 273 |         "2d..31 23x",
 274 |         "cmpl-float (lt bias) vAA, vBB, vCC",
 275 |         "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair",
 276 |         "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN."
 277 |     ],
 278 |     "2e": [
 279 |         "2d..31 23x",
 280 |         "cmpg-float (gt bias) vAA, vBB, vCC",
 281 |         "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair",
 282 |         "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN."
 283 |     ],
 284 |     "2f": [
 285 |         "2d..31 23x",
 286 |         "cmpl-double (lt bias) vAA, vBB, vCC",
 287 |         "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair",
 288 |         "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN."
 289 |     ],
 290 |     "30": [
 291 |         "2d..31 23x",
 292 |         "cmpg-double (gt bias) vAA, vBB, vCC",
 293 |         "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair",
 294 |         "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN."
 295 |     ],
 296 |     "31": [
 297 |         "2d..31 23x",
 298 |         "cmp-long vAA, vBB, vCC",
 299 |         "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair",
 300 |         "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN."
 301 |     ],
 302 |     "32": [
 303 |         "32..37 22t",
 304 |         "if-eq vA, vB, +CCCC",
 305 |         "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)",
 306 |         "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 307 |     ],
 308 |     "33": [
 309 |         "32..37 22t",
 310 |         "if-ne vA, vB, +CCCC",
 311 |         "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)",
 312 |         "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 313 |     ],
 314 |     "34": [
 315 |         "32..37 22t",
 316 |         "if-lt vA, vB, +CCCC",
 317 |         "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)",
 318 |         "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 319 |     ],
 320 |     "35": [
 321 |         "32..37 22t",
 322 |         "if-ge vA, vB, +CCCC",
 323 |         "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)",
 324 |         "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 325 |     ],
 326 |     "36": [
 327 |         "32..37 22t",
 328 |         "if-gt vA, vB, +CCCC",
 329 |         "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)",
 330 |         "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 331 |     ],
 332 |     "37": [
 333 |         "32..37 22t",
 334 |         "if-le vA, vB, +CCCC",
 335 |         "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)",
 336 |         "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 337 |     ],
 338 |     "38": [
 339 |         "38..3d 21t",
 340 |         "if-eqz vAA, +BBBB",
 341 |         "A: register to test (8 bits)\nB: signed branch offset (16 bits)",
 342 |         "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 343 |     ],
 344 |     "39": [
 345 |         "38..3d 21t",
 346 |         "if-nez vAA, +BBBB",
 347 |         "A: register to test (8 bits)\nB: signed branch offset (16 bits)",
 348 |         "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 349 |     ],
 350 |     "3a": [
 351 |         "38..3d 21t",
 352 |         "if-ltz vAA, +BBBB",
 353 |         "A: register to test (8 bits)\nB: signed branch offset (16 bits)",
 354 |         "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 355 |     ],
 356 |     "3b": [
 357 |         "38..3d 21t",
 358 |         "if-gez vAA, +BBBB",
 359 |         "A: register to test (8 bits)\nB: signed branch offset (16 bits)",
 360 |         "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 361 |     ],
 362 |     "3c": [
 363 |         "38..3d 21t",
 364 |         "if-gtz vAA, +BBBB",
 365 |         "A: register to test (8 bits)\nB: signed branch offset (16 bits)",
 366 |         "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 367 |     ],
 368 |     "3d": [
 369 |         "38..3d 21t",
 370 |         "if-lez vAA, +BBBB",
 371 |         "A: register to test (8 bits)\nB: signed branch offset (16 bits)",
 372 |         "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)"
 373 |     ],
 374 |     "3e": [
 375 |         "3e..43 10x",
 376 |         "(unused)",
 377 |         "",
 378 |         "(unused)"
 379 |     ],
 380 |     "3f": [
 381 |         "3e..43 10x",
 382 |         "(unused)",
 383 |         "",
 384 |         "(unused)"
 385 |     ],
 386 |     "40": [
 387 |         "3e..43 10x",
 388 |         "(unused)",
 389 |         "",
 390 |         "(unused)"
 391 |     ],
 392 |     "41": [
 393 |         "3e..43 10x",
 394 |         "(unused)",
 395 |         "",
 396 |         "(unused)"
 397 |     ],
 398 |     "42": [
 399 |         "3e..43 10x",
 400 |         "(unused)",
 401 |         "",
 402 |         "(unused)"
 403 |     ],
 404 |     "43": [
 405 |         "3e..43 10x",
 406 |         "(unused)",
 407 |         "",
 408 |         "(unused)"
 409 |     ],
 410 |     "44": [
 411 |         "44..51 23x",
 412 |         "aget vAA, vBB, vCC",
 413 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 414 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 415 |     ],
 416 |     "45": [
 417 |         "44..51 23x",
 418 |         "aget-wide vAA, vBB, vCC",
 419 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 420 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 421 |     ],
 422 |     "46": [
 423 |         "44..51 23x",
 424 |         "aget-object vAA, vBB, vCC",
 425 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 426 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 427 |     ],
 428 |     "47": [
 429 |         "44..51 23x",
 430 |         "aget-boolean vAA, vBB, vCC",
 431 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 432 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 433 |     ],
 434 |     "48": [
 435 |         "44..51 23x",
 436 |         "aget-byte vAA, vBB, vCC",
 437 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 438 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 439 |     ],
 440 |     "49": [
 441 |         "44..51 23x",
 442 |         "aget-char vAA, vBB, vCC",
 443 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 444 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 445 |     ],
 446 |     "4a": [
 447 |         "44..51 23x",
 448 |         "aget-short vAA, vBB, vCC",
 449 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 450 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 451 |     ],
 452 |     "4b": [
 453 |         "44..51 23x",
 454 |         "aput vAA, vBB, vCC",
 455 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 456 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 457 |     ],
 458 |     "4c": [
 459 |         "44..51 23x",
 460 |         "aput-wide vAA, vBB, vCC",
 461 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 462 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 463 |     ],
 464 |     "4d": [
 465 |         "44..51 23x",
 466 |         "aput-object vAA, vBB, vCC",
 467 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 468 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 469 |     ],
 470 |     "4e": [
 471 |         "44..51 23x",
 472 |         "aput-boolean vAA, vBB, vCC",
 473 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 474 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 475 |     ],
 476 |     "4f": [
 477 |         "44..51 23x",
 478 |         "aput-byte vAA, vBB, vCC",
 479 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 480 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 481 |     ],
 482 |     "50": [
 483 |         "44..51 23x",
 484 |         "aput-char vAA, vBB, vCC",
 485 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 486 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 487 |     ],
 488 |     "51": [
 489 |         "44..51 23x",
 490 |         "aput-short vAA, vBB, vCC",
 491 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)",
 492 |         "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register."
 493 |     ],
 494 |     "52": [
 495 |         "52..5f 22c",
 496 |         "iget vA, vB, field@CCCC",
 497 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 498 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 499 |     ],
 500 |     "53": [
 501 |         "52..5f 22c",
 502 |         "iget-wide vA, vB, field@CCCC",
 503 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 504 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 505 |     ],
 506 |     "54": [
 507 |         "52..5f 22c",
 508 |         "iget-object vA, vB, field@CCCC",
 509 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 510 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 511 |     ],
 512 |     "55": [
 513 |         "52..5f 22c",
 514 |         "iget-boolean vA, vB, field@CCCC",
 515 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 516 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 517 |     ],
 518 |     "56": [
 519 |         "52..5f 22c",
 520 |         "iget-byte vA, vB, field@CCCC",
 521 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 522 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 523 |     ],
 524 |     "57": [
 525 |         "52..5f 22c",
 526 |         "iget-char vA, vB, field@CCCC",
 527 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 528 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 529 |     ],
 530 |     "58": [
 531 |         "52..5f 22c",
 532 |         "iget-short vA, vB, field@CCCC",
 533 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 534 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 535 |     ],
 536 |     "59": [
 537 |         "52..5f 22c",
 538 |         "iput vA, vB, field@CCCC",
 539 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 540 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 541 |     ],
 542 |     "5a": [
 543 |         "52..5f 22c",
 544 |         "iput-wide vA, vB, field@CCCC",
 545 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 546 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 547 |     ],
 548 |     "5b": [
 549 |         "52..5f 22c",
 550 |         "iput-object vA, vB, field@CCCC",
 551 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 552 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 553 |     ],
 554 |     "5c": [
 555 |         "52..5f 22c",
 556 |         "iput-boolean vA, vB, field@CCCC",
 557 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 558 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 559 |     ],
 560 |     "5d": [
 561 |         "52..5f 22c",
 562 |         "iput-byte vA, vB, field@CCCC",
 563 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 564 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 565 |     ],
 566 |     "5e": [
 567 |         "52..5f 22c",
 568 |         "iput-char vA, vB, field@CCCC",
 569 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 570 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 571 |     ],
 572 |     "5f": [
 573 |         "52..5f 22c",
 574 |         "iput-short vA, vB, field@CCCC",
 575 |         "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)",
 576 |         "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 577 |     ],
 578 |     "60": [
 579 |         "60..6d 21c",
 580 |         "sget vAA, field@BBBB",
 581 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 582 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 583 |     ],
 584 |     "61": [
 585 |         "60..6d 21c",
 586 |         "sget-wide vAA, field@BBBB",
 587 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 588 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 589 |     ],
 590 |     "62": [
 591 |         "60..6d 21c",
 592 |         "sget-object vAA, field@BBBB",
 593 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 594 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 595 |     ],
 596 |     "63": [
 597 |         "60..6d 21c",
 598 |         "sget-boolean vAA, field@BBBB",
 599 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 600 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 601 |     ],
 602 |     "64": [
 603 |         "60..6d 21c",
 604 |         "sget-byte vAA, field@BBBB",
 605 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 606 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 607 |     ],
 608 |     "65": [
 609 |         "60..6d 21c",
 610 |         "sget-char vAA, field@BBBB",
 611 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 612 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 613 |     ],
 614 |     "66": [
 615 |         "60..6d 21c",
 616 |         "sget-short vAA, field@BBBB",
 617 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 618 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 619 |     ],
 620 |     "67": [
 621 |         "60..6d 21c",
 622 |         "sput vAA, field@BBBB",
 623 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 624 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 625 |     ],
 626 |     "68": [
 627 |         "60..6d 21c",
 628 |         "sput-wide vAA, field@BBBB",
 629 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 630 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 631 |     ],
 632 |     "69": [
 633 |         "60..6d 21c",
 634 |         "sput-object vAA, field@BBBB",
 635 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 636 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 637 |     ],
 638 |     "6a": [
 639 |         "60..6d 21c",
 640 |         "sput-boolean vAA, field@BBBB",
 641 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 642 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 643 |     ],
 644 |     "6b": [
 645 |         "60..6d 21c",
 646 |         "sput-byte vAA, field@BBBB",
 647 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 648 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 649 |     ],
 650 |     "6c": [
 651 |         "60..6d 21c",
 652 |         "sput-char vAA, field@BBBB",
 653 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 654 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 655 |     ],
 656 |     "6d": [
 657 |         "60..6d 21c",
 658 |         "sput-short vAA, field@BBBB",
 659 |         "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)",
 660 |         "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset."
 661 |     ],
 662 |     "6e": [
 663 |         "6e..72 35c",
 664 |         "invoke-virtual {vC, vD, vE, vF, vG}, meth@BBBB",
 665 |         "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)",
 666 |         "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface.  The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)."
 667 |     ],
 668 |     "6f": [
 669 |         "6e..72 35c",
 670 |         "invoke-super {vC, vD, vE, vF, vG}, meth@BBBB",
 671 |         "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)",
 672 |         "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface.  The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)."
 673 |     ],
 674 |     "70": [
 675 |         "6e..72 35c",
 676 |         "invoke-direct {vC, vD, vE, vF, vG}, meth@BBBB",
 677 |         "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)",
 678 |         "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface.  The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)."
 679 |     ],
 680 |     "71": [
 681 |         "6e..72 35c",
 682 |         "invoke-static {vC, vD, vE, vF, vG}, meth@BBBB",
 683 |         "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)",
 684 |         "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface.  The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)."
 685 |     ],
 686 |     "72": [
 687 |         "6e..72 35c",
 688 |         "invoke-interface {vC, vD, vE, vF, vG}, meth@BBBB",
 689 |         "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)",
 690 |         "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface.  The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)."
 691 |     ],
 692 |     "73": [
 693 |         "73 10x",
 694 |         "(unused)",
 695 |         "",
 696 |         "(unused)"
 697 |     ],
 698 |     "74": [
 699 |         "74..78 3rc",
 700 |         "invoke-virtual/range {vCCCC .. vNNNN}, meth@BBBB",
 701 |         "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1",
 702 |         "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions."
 703 |     ],
 704 |     "75": [
 705 |         "74..78 3rc",
 706 |         "invoke-super/range {vCCCC .. vNNNN}, meth@BBBB",
 707 |         "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1",
 708 |         "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions."
 709 |     ],
 710 |     "76": [
 711 |         "74..78 3rc",
 712 |         "invoke-direct/range {vCCCC .. vNNNN}, meth@BBBB",
 713 |         "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1",
 714 |         "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions."
 715 |     ],
 716 |     "77": [
 717 |         "74..78 3rc",
 718 |         "invoke-static/range {vCCCC .. vNNNN}, meth@BBBB",
 719 |         "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1",
 720 |         "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions."
 721 |     ],
 722 |     "78": [
 723 |         "74..78 3rc",
 724 |         "invoke-interface/range {vCCCC .. vNNNN}, meth@BBBB",
 725 |         "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1",
 726 |         "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions."
 727 |     ],
 728 |     "79": [
 729 |         "79..7a 10x",
 730 |         "(unused)",
 731 |         "",
 732 |         "(unused)"
 733 |     ],
 734 |     "7a": [
 735 |         "79..7a 10x",
 736 |         "(unused)",
 737 |         "",
 738 |         "(unused)"
 739 |     ],
 740 |     "7b": [
 741 |         "7b..8f 12x",
 742 |         "neg-int vA, vB",
 743 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 744 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 745 |     ],
 746 |     "7c": [
 747 |         "7b..8f 12x",
 748 |         "not-int vA, vB",
 749 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 750 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 751 |     ],
 752 |     "7d": [
 753 |         "7b..8f 12x",
 754 |         "neg-long vA, vB",
 755 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 756 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 757 |     ],
 758 |     "7e": [
 759 |         "7b..8f 12x",
 760 |         "not-long vA, vB",
 761 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 762 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 763 |     ],
 764 |     "7f": [
 765 |         "7b..8f 12x",
 766 |         "neg-float vA, vB",
 767 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 768 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 769 |     ],
 770 |     "80": [
 771 |         "7b..8f 12x",
 772 |         "neg-double vA, vB",
 773 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 774 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 775 |     ],
 776 |     "81": [
 777 |         "7b..8f 12x",
 778 |         "int-to-long vA, vB",
 779 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 780 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 781 |     ],
 782 |     "82": [
 783 |         "7b..8f 12x",
 784 |         "int-to-float vA, vB",
 785 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 786 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 787 |     ],
 788 |     "83": [
 789 |         "7b..8f 12x",
 790 |         "int-to-double vA, vB",
 791 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 792 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 793 |     ],
 794 |     "84": [
 795 |         "7b..8f 12x",
 796 |         "long-to-int vA, vB",
 797 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 798 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 799 |     ],
 800 |     "85": [
 801 |         "7b..8f 12x",
 802 |         "long-to-float vA, vB",
 803 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 804 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 805 |     ],
 806 |     "86": [
 807 |         "7b..8f 12x",
 808 |         "long-to-double vA, vB",
 809 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 810 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 811 |     ],
 812 |     "87": [
 813 |         "7b..8f 12x",
 814 |         "float-to-int vA, vB",
 815 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 816 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 817 |     ],
 818 |     "88": [
 819 |         "7b..8f 12x",
 820 |         "float-to-long vA, vB",
 821 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 822 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 823 |     ],
 824 |     "89": [
 825 |         "7b..8f 12x",
 826 |         "float-to-double vA, vB",
 827 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 828 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 829 |     ],
 830 |     "8a": [
 831 |         "7b..8f 12x",
 832 |         "double-to-int vA, vB",
 833 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 834 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 835 |     ],
 836 |     "8b": [
 837 |         "7b..8f 12x",
 838 |         "double-to-long vA, vB",
 839 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 840 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 841 |     ],
 842 |     "8c": [
 843 |         "7b..8f 12x",
 844 |         "double-to-float vA, vB",
 845 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 846 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 847 |     ],
 848 |     "8d": [
 849 |         "7b..8f 12x",
 850 |         "int-to-byte vA, vB",
 851 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 852 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 853 |     ],
 854 |     "8e": [
 855 |         "7b..8f 12x",
 856 |         "int-to-char vA, vB",
 857 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 858 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 859 |     ],
 860 |     "8f": [
 861 |         "7b..8f 12x",
 862 |         "int-to-short vA, vB",
 863 |         "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)",
 864 |         "Perform the identified unary operation on the source register,\nstoring the result in the destination register."
 865 |     ],
 866 |     "90": [
 867 |         "90..af 23x",
 868 |         "add-int vAA, vBB, vCC",
 869 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 870 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 871 |     ],
 872 |     "91": [
 873 |         "90..af 23x",
 874 |         "sub-int vAA, vBB, vCC",
 875 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 876 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 877 |     ],
 878 |     "92": [
 879 |         "90..af 23x",
 880 |         "mul-int vAA, vBB, vCC",
 881 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 882 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 883 |     ],
 884 |     "93": [
 885 |         "90..af 23x",
 886 |         "div-int vAA, vBB, vCC",
 887 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 888 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 889 |     ],
 890 |     "94": [
 891 |         "90..af 23x",
 892 |         "rem-int vAA, vBB, vCC",
 893 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 894 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 895 |     ],
 896 |     "95": [
 897 |         "90..af 23x",
 898 |         "and-int vAA, vBB, vCC",
 899 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 900 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 901 |     ],
 902 |     "96": [
 903 |         "90..af 23x",
 904 |         "or-int vAA, vBB, vCC",
 905 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 906 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 907 |     ],
 908 |     "97": [
 909 |         "90..af 23x",
 910 |         "xor-int vAA, vBB, vCC",
 911 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 912 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 913 |     ],
 914 |     "98": [
 915 |         "90..af 23x",
 916 |         "shl-int vAA, vBB, vCC",
 917 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 918 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 919 |     ],
 920 |     "99": [
 921 |         "90..af 23x",
 922 |         "shr-int vAA, vBB, vCC",
 923 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 924 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 925 |     ],
 926 |     "9a": [
 927 |         "90..af 23x",
 928 |         "ushr-int vAA, vBB, vCC",
 929 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 930 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 931 |     ],
 932 |     "9b": [
 933 |         "90..af 23x",
 934 |         "add-long vAA, vBB, vCC",
 935 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 936 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 937 |     ],
 938 |     "9c": [
 939 |         "90..af 23x",
 940 |         "sub-long vAA, vBB, vCC",
 941 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 942 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 943 |     ],
 944 |     "9d": [
 945 |         "90..af 23x",
 946 |         "mul-long vAA, vBB, vCC",
 947 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 948 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 949 |     ],
 950 |     "9e": [
 951 |         "90..af 23x",
 952 |         "div-long vAA, vBB, vCC",
 953 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 954 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 955 |     ],
 956 |     "9f": [
 957 |         "90..af 23x",
 958 |         "rem-long vAA, vBB, vCC",
 959 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 960 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 961 |     ],
 962 |     "a0": [
 963 |         "90..af 23x",
 964 |         "and-long vAA, vBB, vCC",
 965 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 966 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 967 |     ],
 968 |     "a1": [
 969 |         "90..af 23x",
 970 |         "or-long vAA, vBB, vCC",
 971 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 972 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 973 |     ],
 974 |     "a2": [
 975 |         "90..af 23x",
 976 |         "xor-long vAA, vBB, vCC",
 977 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 978 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 979 |     ],
 980 |     "a3": [
 981 |         "90..af 23x",
 982 |         "shl-long vAA, vBB, vCC",
 983 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 984 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 985 |     ],
 986 |     "a4": [
 987 |         "90..af 23x",
 988 |         "shr-long vAA, vBB, vCC",
 989 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 990 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 991 |     ],
 992 |     "a5": [
 993 |         "90..af 23x",
 994 |         "ushr-long vAA, vBB, vCC",
 995 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
 996 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
 997 |     ],
 998 |     "a6": [
 999 |         "90..af 23x",
1000 |         "add-float vAA, vBB, vCC",
1001 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1002 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1003 |     ],
1004 |     "a7": [
1005 |         "90..af 23x",
1006 |         "sub-float vAA, vBB, vCC",
1007 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1008 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1009 |     ],
1010 |     "a8": [
1011 |         "90..af 23x",
1012 |         "mul-float vAA, vBB, vCC",
1013 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1014 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1015 |     ],
1016 |     "a9": [
1017 |         "90..af 23x",
1018 |         "div-float vAA, vBB, vCC",
1019 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1020 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1021 |     ],
1022 |     "aa": [
1023 |         "90..af 23x",
1024 |         "rem-float vAA, vBB, vCC",
1025 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1026 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1027 |     ],
1028 |     "ab": [
1029 |         "90..af 23x",
1030 |         "add-double vAA, vBB, vCC",
1031 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1032 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1033 |     ],
1034 |     "ac": [
1035 |         "90..af 23x",
1036 |         "sub-double vAA, vBB, vCC",
1037 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1038 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1039 |     ],
1040 |     "ad": [
1041 |         "90..af 23x",
1042 |         "mul-double vAA, vBB, vCC",
1043 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1044 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1045 |     ],
1046 |     "ae": [
1047 |         "90..af 23x",
1048 |         "div-double vAA, vBB, vCC",
1049 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1050 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1051 |     ],
1052 |     "af": [
1053 |         "90..af 23x",
1054 |         "rem-double vAA, vBB, vCC",
1055 |         "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)",
1056 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)."
1057 |     ],
1058 |     "b0": [
1059 |         "b0..cf 12x",
1060 |         "add-int/2addr vA, vB",
1061 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1062 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1063 |     ],
1064 |     "b1": [
1065 |         "b0..cf 12x",
1066 |         "sub-int/2addr vA, vB",
1067 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1068 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1069 |     ],
1070 |     "b2": [
1071 |         "b0..cf 12x",
1072 |         "mul-int/2addr vA, vB",
1073 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1074 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1075 |     ],
1076 |     "b3": [
1077 |         "b0..cf 12x",
1078 |         "div-int/2addr vA, vB",
1079 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1080 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1081 |     ],
1082 |     "b4": [
1083 |         "b0..cf 12x",
1084 |         "rem-int/2addr vA, vB",
1085 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1086 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1087 |     ],
1088 |     "b5": [
1089 |         "b0..cf 12x",
1090 |         "and-int/2addr vA, vB",
1091 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1092 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1093 |     ],
1094 |     "b6": [
1095 |         "b0..cf 12x",
1096 |         "or-int/2addr vA, vB",
1097 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1098 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1099 |     ],
1100 |     "b7": [
1101 |         "b0..cf 12x",
1102 |         "xor-int/2addr vA, vB",
1103 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1104 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1105 |     ],
1106 |     "b8": [
1107 |         "b0..cf 12x",
1108 |         "shl-int/2addr vA, vB",
1109 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1110 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1111 |     ],
1112 |     "b9": [
1113 |         "b0..cf 12x",
1114 |         "shr-int/2addr vA, vB",
1115 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1116 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1117 |     ],
1118 |     "ba": [
1119 |         "b0..cf 12x",
1120 |         "ushr-int/2addr vA, vB",
1121 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1122 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1123 |     ],
1124 |     "bb": [
1125 |         "b0..cf 12x",
1126 |         "add-long/2addr vA, vB",
1127 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1128 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1129 |     ],
1130 |     "bc": [
1131 |         "b0..cf 12x",
1132 |         "sub-long/2addr vA, vB",
1133 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1134 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1135 |     ],
1136 |     "bd": [
1137 |         "b0..cf 12x",
1138 |         "mul-long/2addr vA, vB",
1139 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1140 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1141 |     ],
1142 |     "be": [
1143 |         "b0..cf 12x",
1144 |         "div-long/2addr vA, vB",
1145 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1146 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1147 |     ],
1148 |     "bf": [
1149 |         "b0..cf 12x",
1150 |         "rem-long/2addr vA, vB",
1151 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1152 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1153 |     ],
1154 |     "c0": [
1155 |         "b0..cf 12x",
1156 |         "and-long/2addr vA, vB",
1157 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1158 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1159 |     ],
1160 |     "c1": [
1161 |         "b0..cf 12x",
1162 |         "or-long/2addr vA, vB",
1163 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1164 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1165 |     ],
1166 |     "c2": [
1167 |         "b0..cf 12x",
1168 |         "xor-long/2addr vA, vB",
1169 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1170 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1171 |     ],
1172 |     "c3": [
1173 |         "b0..cf 12x",
1174 |         "shl-long/2addr vA, vB",
1175 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1176 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1177 |     ],
1178 |     "c4": [
1179 |         "b0..cf 12x",
1180 |         "shr-long/2addr vA, vB",
1181 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1182 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1183 |     ],
1184 |     "c5": [
1185 |         "b0..cf 12x",
1186 |         "ushr-long/2addr vA, vB",
1187 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1188 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1189 |     ],
1190 |     "c6": [
1191 |         "b0..cf 12x",
1192 |         "add-float/2addr vA, vB",
1193 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1194 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1195 |     ],
1196 |     "c7": [
1197 |         "b0..cf 12x",
1198 |         "sub-float/2addr vA, vB",
1199 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1200 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1201 |     ],
1202 |     "c8": [
1203 |         "b0..cf 12x",
1204 |         "mul-float/2addr vA, vB",
1205 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1206 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1207 |     ],
1208 |     "c9": [
1209 |         "b0..cf 12x",
1210 |         "div-float/2addr vA, vB",
1211 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1212 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1213 |     ],
1214 |     "ca": [
1215 |         "b0..cf 12x",
1216 |         "rem-float/2addr vA, vB",
1217 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1218 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1219 |     ],
1220 |     "cb": [
1221 |         "b0..cf 12x",
1222 |         "add-double/2addr vA, vB",
1223 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1224 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1225 |     ],
1226 |     "cc": [
1227 |         "b0..cf 12x",
1228 |         "sub-double/2addr vA, vB",
1229 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1230 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1231 |     ],
1232 |     "cd": [
1233 |         "b0..cf 12x",
1234 |         "mul-double/2addr vA, vB",
1235 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1236 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1237 |     ],
1238 |     "ce": [
1239 |         "b0..cf 12x",
1240 |         "div-double/2addr vA, vB",
1241 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1242 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1243 |     ],
1244 |     "cf": [
1245 |         "b0..cf 12x",
1246 |         "rem-double/2addr vA, vB",
1247 |         "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)",
1248 |         "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)."
1249 |     ],
1250 |     "d0": [
1251 |         "d0..d7 22s",
1252 |         "add-int/lit16 vA, vB, #+CCCC",
1253 |         "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)",
1254 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics."
1255 |     ],
1256 |     "d1": [
1257 |         "d0..d7 22s",
1258 |         "rsub-int (reverse subtract) vA, vB, #+CCCC",
1259 |         "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)",
1260 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics."
1261 |     ],
1262 |     "d2": [
1263 |         "d0..d7 22s",
1264 |         "mul-int/lit16 vA, vB, #+CCCC",
1265 |         "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)",
1266 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics."
1267 |     ],
1268 |     "d3": [
1269 |         "d0..d7 22s",
1270 |         "div-int/lit16 vA, vB, #+CCCC",
1271 |         "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)",
1272 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics."
1273 |     ],
1274 |     "d4": [
1275 |         "d0..d7 22s",
1276 |         "rem-int/lit16 vA, vB, #+CCCC",
1277 |         "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)",
1278 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics."
1279 |     ],
1280 |     "d5": [
1281 |         "d0..d7 22s",
1282 |         "and-int/lit16 vA, vB, #+CCCC",
1283 |         "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)",
1284 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics."
1285 |     ],
1286 |     "d6": [
1287 |         "d0..d7 22s",
1288 |         "or-int/lit16 vA, vB, #+CCCC",
1289 |         "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)",
1290 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics."
1291 |     ],
1292 |     "d7": [
1293 |         "d0..d7 22s",
1294 |         "xor-int/lit16 vA, vB, #+CCCC",
1295 |         "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)",
1296 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics."
1297 |     ],
1298 |     "d8": [
1299 |         "d8..e2 22b",
1300 |         "add-int/lit8 vAA, vBB, #+CC",
1301 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1302 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1303 |     ],
1304 |     "d9": [
1305 |         "d8..e2 22b",
1306 |         "rsub-int/lit8 vAA, vBB, #+CC",
1307 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1308 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1309 |     ],
1310 |     "da": [
1311 |         "d8..e2 22b",
1312 |         "mul-int/lit8 vAA, vBB, #+CC",
1313 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1314 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1315 |     ],
1316 |     "db": [
1317 |         "d8..e2 22b",
1318 |         "div-int/lit8 vAA, vBB, #+CC",
1319 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1320 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1321 |     ],
1322 |     "dc": [
1323 |         "d8..e2 22b",
1324 |         "rem-int/lit8 vAA, vBB, #+CC",
1325 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1326 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1327 |     ],
1328 |     "dd": [
1329 |         "d8..e2 22b",
1330 |         "and-int/lit8 vAA, vBB, #+CC",
1331 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1332 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1333 |     ],
1334 |     "de": [
1335 |         "d8..e2 22b",
1336 |         "or-int/lit8 vAA, vBB, #+CC",
1337 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1338 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1339 |     ],
1340 |     "df": [
1341 |         "d8..e2 22b",
1342 |         "xor-int/lit8 vAA, vBB, #+CC",
1343 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1344 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1345 |     ],
1346 |     "e0": [
1347 |         "d8..e2 22b",
1348 |         "shl-int/lit8 vAA, vBB, #+CC",
1349 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1350 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1351 |     ],
1352 |     "e1": [
1353 |         "d8..e2 22b",
1354 |         "shr-int/lit8 vAA, vBB, #+CC",
1355 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1356 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1357 |     ],
1358 |     "e2": [
1359 |         "d8..e2 22b",
1360 |         "ushr-int/lit8 vAA, vBB, #+CC",
1361 |         "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)",
1362 |         "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int."
1363 |     ],
1364 |     "e3": [
1365 |         "e3..f9 10x",
1366 |         "(unused)",
1367 |         "",
1368 |         "(unused)"
1369 |     ],
1370 |     "e4": [
1371 |         "e3..f9 10x",
1372 |         "(unused)",
1373 |         "",
1374 |         "(unused)"
1375 |     ],
1376 |     "e5": [
1377 |         "e3..f9 10x",
1378 |         "(unused)",
1379 |         "",
1380 |         "(unused)"
1381 |     ],
1382 |     "e6": [
1383 |         "e3..f9 10x",
1384 |         "(unused)",
1385 |         "",
1386 |         "(unused)"
1387 |     ],
1388 |     "e7": [
1389 |         "e3..f9 10x",
1390 |         "(unused)",
1391 |         "",
1392 |         "(unused)"
1393 |     ],
1394 |     "e8": [
1395 |         "e3..f9 10x",
1396 |         "(unused)",
1397 |         "",
1398 |         "(unused)"
1399 |     ],
1400 |     "e9": [
1401 |         "e3..f9 10x",
1402 |         "(unused)",
1403 |         "",
1404 |         "(unused)"
1405 |     ],
1406 |     "ea": [
1407 |         "e3..f9 10x",
1408 |         "(unused)",
1409 |         "",
1410 |         "(unused)"
1411 |     ],
1412 |     "eb": [
1413 |         "e3..f9 10x",
1414 |         "(unused)",
1415 |         "",
1416 |         "(unused)"
1417 |     ],
1418 |     "ec": [
1419 |         "e3..f9 10x",
1420 |         "(unused)",
1421 |         "",
1422 |         "(unused)"
1423 |     ],
1424 |     "ed": [
1425 |         "e3..f9 10x",
1426 |         "(unused)",
1427 |         "",
1428 |         "(unused)"
1429 |     ],
1430 |     "ee": [
1431 |         "e3..f9 10x",
1432 |         "(unused)",
1433 |         "",
1434 |         "(unused)"
1435 |     ],
1436 |     "ef": [
1437 |         "e3..f9 10x",
1438 |         "(unused)",
1439 |         "",
1440 |         "(unused)"
1441 |     ],
1442 |     "f0": [
1443 |         "e3..f9 10x",
1444 |         "(unused)",
1445 |         "",
1446 |         "(unused)"
1447 |     ],
1448 |     "f1": [
1449 |         "e3..f9 10x",
1450 |         "(unused)",
1451 |         "",
1452 |         "(unused)"
1453 |     ],
1454 |     "f2": [
1455 |         "e3..f9 10x",
1456 |         "(unused)",
1457 |         "",
1458 |         "(unused)"
1459 |     ],
1460 |     "f3": [
1461 |         "e3..f9 10x",
1462 |         "(unused)",
1463 |         "",
1464 |         "(unused)"
1465 |     ],
1466 |     "f4": [
1467 |         "e3..f9 10x",
1468 |         "(unused)",
1469 |         "",
1470 |         "(unused)"
1471 |     ],
1472 |     "f5": [
1473 |         "e3..f9 10x",
1474 |         "(unused)",
1475 |         "",
1476 |         "(unused)"
1477 |     ],
1478 |     "f6": [
1479 |         "e3..f9 10x",
1480 |         "(unused)",
1481 |         "",
1482 |         "(unused)"
1483 |     ],
1484 |     "f7": [
1485 |         "e3..f9 10x",
1486 |         "(unused)",
1487 |         "",
1488 |         "(unused)"
1489 |     ],
1490 |     "f8": [
1491 |         "e3..f9 10x",
1492 |         "(unused)",
1493 |         "",
1494 |         "(unused)"
1495 |     ],
1496 |     "f9": [
1497 |         "e3..f9 10x",
1498 |         "(unused)",
1499 |         "",
1500 |         "(unused)"
1501 |     ],
1502 |     "fa": [
1503 |         "fa 45cc",
1504 |         "invoke-polymorphic {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH",
1505 |         "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC: receiver (16 bits)\nD..G: argument registers (4 bits each)\nH: prototype reference index (16 bits)",
1506 |         "Invoke the indicated signature polymorphic method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\nThe method reference must be to a signature polymorphic method, such as\njava.lang.invoke.MethodHandle.invoke or\njava.lang.invoke.MethodHandle.invokeExact.\nThe receiver must be an object supporting the signature polymorphic\nmethod being invoked.\nThe prototype reference describes the argument types provided\nand the expected return type.\nThe invoke-polymorphic bytecode may raise exceptions when it\nexecutes. The exceptions are described in the API documentation\nfor the signature polymorphic method being invoked.\nPresent in Dex files from version 038 onwards."
1507 |     ],
1508 |     "fb": [
1509 |         "fb 4rcc",
1510 |         "invoke-polymorphic/range {vCCCC .. vNNNN}, meth@BBBB, proto@HHHH",
1511 |         "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: receiver (16 bits)\nH: prototype reference index (16 bits)\nN = A + C - 1",
1512 |         "Invoke the indicated method handle. See the invoke-polymorphic\ndescription above for details.\nPresent in Dex files from version 038 onwards."
1513 |     ],
1514 |     "fc": [
1515 |         "fc 35c",
1516 |         "invoke-custom {vC, vD, vE, vF, vG}, call_site@BBBB",
1517 |         "A: argument word count (4 bits)\nB: call site reference index (16 bits)\nC..G: argument registers (4 bits each)",
1518 |         "Resolves and invokes the indicated call site.\nThe result from the invocation (if any) may be stored with an\nappropriate move-result* variant as the immediately\nsubsequent instruction.\n\nThis instruction executes in two phases: call site\nresolution and call site invocation.\n\nCall site resolution checks whether the indicated\ncall site has an associated java.lang.invoke.CallSite instance.\nIf not, the bootstrap linker method for the indicated call site is\ninvoked using arguments present in the DEX file\n(see call_site_item). The\nbootstrap linker method returns\na java.lang.invoke.CallSite instance that will then\nbe associated with the indicated call site if no association\nexists. Another thread may have already made the association first,\nand if so execution of the instruction continues with the\nfirst associated java.lang.invoke.CallSite instance.\n\nCall site invocation is made on the\njava.lang.invoke.MethodHandle target of the resolved\njava.lang.invoke.CallSite instance. The target is invoked as\nif executing invoke-polymorphic (described above) using the\nmethod handle and arguments to the invoke-custom instruction\nas the arguments to an exact method handle invocation.\n\nExceptions raised by the bootstrap linker method are wrapped\nin a java.lang.BootstrapMethodError.  A\nBootstrapMethodError is also raised if:\n\nthe bootstrap linker method fails to return a\njava.lang.invoke.CallSite instance.\nthe returned java.lang.invoke.CallSite has a\nnull method handle target.\nthe method handle target is not of the requested type.\n\nPresent in Dex files from version 038 onwards."
1519 |     ],
1520 |     "fd": [
1521 |         "fd 3rc",
1522 |         "invoke-custom/range {vCCCC .. vNNNN}, call_site@BBBB",
1523 |         "A: argument word count (8 bits)\nB: call site reference index (16 bits)\nC: first argument register (16-bits)\nN = A + C - 1",
1524 |         "Resolve and invoke a call site. See the invoke-custom\ndescription above for details.\nPresent in Dex files from version 038 onwards."
1525 |     ],
1526 |     "fe": [
1527 |         "fe 21c",
1528 |         "const-method-handle vAA, method_handle@BBBB",
1529 |         "A: destination register (8 bits)\nB: method handle index (16 bits)",
1530 |         "Move a reference to the method handle specified by the given index into the\nspecified register.\nPresent in Dex files from version 039 onwards."
1531 |     ],
1532 |     "ff": [
1533 |         "ff 21c",
1534 |         "const-method-type vAA, proto@BBBB",
1535 |         "A: destination register (8 bits)\nB: method prototype reference (16 bits)",
1536 |         "Move a reference to the method prototype specified by the given index into the\nspecified register.\nPresent in Dex files from version 039 onwards."
1537 |     ]
1538 | }


--------------------------------------------------------------------------------
/NetworkTraining/DAMD/Opcodes_all.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alewarne/explain-mlsec/a82530e1cb95f829a3147f1f5a0d3cf2b3e68975/NetworkTraining/DAMD/Opcodes_all.zip


--------------------------------------------------------------------------------
/NetworkTraining/DAMD/config.py:
--------------------------------------------------------------------------------
1 | token_path = 'Converted'
2 | no_tokens = 218
3 | batch_size = 32
4 | epochs = 50
5 | testset_size = 0.1
6 | vec_output = True  # if True output dimension is 2 (with softmax) else 1 with simple sigmoid


--------------------------------------------------------------------------------
/NetworkTraining/DAMD/config_preprocessing.py:
--------------------------------------------------------------------------------
1 | opcode_path = 'Opcodes_all'
2 | dalvik_opcode_path = 'DalvikOpcodes.txt'
3 | save_path = 'Converted'


--------------------------------------------------------------------------------
/NetworkTraining/DAMD/damd.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | from tqdm import tqdm
 5 | import pickle as pkl
 6 | import numpy as np
 7 | from keras import Sequential
 8 | from keras.layers import Dense, Conv1D, Embedding, GlobalMaxPooling1D
 9 | from keras.preprocessing.sequence import pad_sequences
10 | from sklearn.model_selection import train_test_split
11 | from keras.callbacks import ModelCheckpoint
12 | from config import *
13 | 
14 | sys.path.append('../../utils/')
15 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric
16 | 
17 | 
18 | # sorts a list of filenames in uprising order with respect to the number of tokens in the files
19 | def get_sorted_list_of_filenames(data_path):
20 |     fname_list = os.listdir(data_path)
21 |     fname_to_len = {}
22 |     print('Sorting input by length ...')
23 |     for fname in tqdm(fname_list):
24 |         fname_to_len[fname] = len(open(os.path.join(data_path, fname), 'r').read().split(','))
25 |     sorted_fname = sorted(fname_to_len.items(), key=lambda kv: kv[1])
26 |     return [tup[0] for tup in sorted_fname]
27 | 
28 | 
29 | # turns a (sorted) list of filenames of indices into two numpy arrays. One containing the indices and the other one
30 | # containing the labels. Assumes that the last part of the filename is '.1' for malicious and '.0' for benign
31 | def filename_list_to_numpy_arrays(filenames, root_path):
32 |     indices = []
33 |     # labels are either [1,0] or [0,1]
34 |     labels = np.zeros(shape=(len(filenames), 2))
35 |     for i, filename in enumerate(filenames):
36 |         full_path = os.path.join(root_path, filename)
37 |         with open(full_path, 'r') as f:
38 |             indices.append(np.array(f.read().split(','), dtype=np.uint8))
39 |         labels[i,:] = [1,0] if filename.split('.')[-1] == '0' else [0,1]
40 |     return np.array(indices), labels
41 | 
42 | 
43 | def get_damd_cnn(no_tokens, final_nonlinearity='softmax'):
44 |     embedding_dimensions = 8
45 |     no_convolutional_filters = 64
46 |     number_of_dense_units = 16
47 |     kernel_size = 8
48 |     no_labels = 2
49 |     model = Sequential()
50 |     model.add(Embedding(input_dim=no_tokens+1, output_dim=embedding_dimensions))
51 |     model.add(Conv1D(filters=no_convolutional_filters, kernel_size=kernel_size, padding='valid', activation='relu'))
52 |     model.add(GlobalMaxPooling1D())
53 |     model.add(Dense(number_of_dense_units, activation='relu'))
54 |     model.add(Dense(no_labels, activation=final_nonlinearity))
55 |     print(model.summary())
56 |     return model
57 | 
58 | 
59 | def train_network_batchwise(data_path, network, no_epochs, batch_size, testset_size, random_state=42):
60 |     if not os.path.isdir('models'):
61 |         os.makedirs('models')
62 |     network.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy',
63 |                                                                               custom_true_positive_metric(vec_output),
64 |                                                                               custom_false_positive_metric(vec_output)
65 |                                                                               ])
66 |     filenames_sorted = get_sorted_list_of_filenames(data_path)
67 |     names_train, names_test = train_test_split(filenames_sorted, test_size=testset_size, random_state=random_state)
68 |     for j in range(no_epochs):
69 |         acc_train, acc_test = [], []
70 |         print('Training epoch {}'.format(j+1))
71 |         for i in tqdm(range(0, len(names_train), batch_size)):
72 |             x, y = filename_list_to_numpy_arrays(names_train[i:i+batch_size], data_path)
73 |             res = network.train_on_batch(pad_sequences(x, len(x[-1]), dtype='uint8'), y)
74 |             acc_train.append(res[1])
75 |         print('Train accuracy after {} epochs: {}:'.format(j+1, np.mean(acc_train)))
76 |         for k in range(0, len(names_test), batch_size):
77 |             x, y = filename_list_to_numpy_arrays(names_test[k:k+batch_size], data_path)
78 |             res = network.test_on_batch(pad_sequences(x, len(x[-1]), dtype='uint8'), y)
79 |             acc_test.append(res[1])
80 |         print('Test accuracy after {} epochs: {}:'.format(j+1, np.mean(acc_test)))
81 |         network.save('models/damd_model_%d' % j)
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     damd_model = get_damd_cnn(no_tokens)
86 |     train_network_batchwise(token_path, damd_model, epochs, batch_size, testset_size)


--------------------------------------------------------------------------------
/NetworkTraining/DAMD/preprocessing.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from tqdm import tqdm
 4 | from config_preprocessing import *
 5 | 
 6 | 
 7 | # preprocessing used for the dalvik byte sequences. concatenates all lines and extracts 2-grams of them
 8 | def doc_preprocessing(s):
 9 |     N = 2
10 |     lines = ''.join(s.splitlines())
11 |     res = ' '.join([lines[i:i + N] for i in range(0, len(lines), N)])
12 |     return res.split()
13 | 
14 | 
15 | def get_dalvik_token_to_index(dalvik_opcode_path, save=False):
16 |     tokens = [line.split()[1] for line in open(dalvik_opcode_path).readlines()]
17 |     # index starts at 1 to have 0 for padding
18 |     token_to_idx = dict(zip(tokens, range(1,len(tokens)+1)))
19 |     if save:
20 |         with open(os.path.join('token2idx_damd'), 'w') as f:
21 |             for k,v in token_to_idx.items():
22 |                 print('{}:{}'.format(k,v), file=f)
23 |     return token_to_idx
24 | 
25 | 
26 | # converts files of dalvik opcode sequences into files of sequences of token indices where each token represents one
27 | # dalvik opcode. Assumes that the file in document_path folder have ending .1 for malicious and .0 for benign.
28 | # requires file containing all dalvid opcodes for conversion
29 | def convert_docs_to_idx(document_path, dalvik_opcode_path, saving_path):
30 |     token_to_idx = get_dalvik_token_to_index(dalvik_opcode_path, save=True)
31 |     print('Converting {} files to tokenized representation...'.format(len(os.listdir(document_path))))
32 |     if not os.path.isdir(saving_path):
33 |         os.makedirs(saving_path)
34 |     for fn in tqdm(os.listdir(document_path)):
35 |         tokens = doc_preprocessing(open(os.path.join(document_path, fn), 'r').read())
36 |         with open(os.path.join(saving_path, fn), 'w') as f:
37 |             print(','.join([str(token_to_idx[idx]) for idx in tokens]), file=f)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     convert_docs_to_idx(opcode_path, dalvik_opcode_path, save_path)
42 | 


--------------------------------------------------------------------------------
/NetworkTraining/Drebin/DrebinDataGenerator.py:
--------------------------------------------------------------------------------
 1 | import keras
 2 | import random
 3 | import numpy as np
 4 | import os
 5 | 
 6 | 
 7 | # data generator that takes as input a list of filenames. The generator yields batches of indices indicating which
 8 | # features are set to one during training
 9 | class DrebinDataGenerator(keras.utils.Sequence):
10 |     def __init__(self, vec, data_list, feature_path, label_dict, batch_size, shuffle=True, vec_labels=True):
11 |         self.data_names = data_list
12 |         self.data_paths = [os.path.join(feature_path, item) for item in data_list]
13 |         self.label_dict = label_dict
14 |         self.batch_size = batch_size
15 |         self.shuffle = shuffle
16 |         self.vec = vec
17 |         self.data = vec.transform(self.data_paths)
18 |         self.vec_labels = vec_labels
19 |         self.on_epoch_end()
20 |     '''
21 |     shuffle data names after epoch to have different batches every iteration. we have to update the data matrix
22 |     aswell to know which samples are in which row
23 |     '''
24 |     def on_epoch_end(self):
25 |         if self.shuffle:
26 |             random.shuffle(self.data_paths)
27 |             self.data = self.vec.transform(self.data_paths)
28 | 
29 |     def __len__(self):
30 |         return int(np.floor(len(self.data_names)/self.batch_size))
31 | 
32 |     # returns label representation for binary labels.
33 |     def label_to_representation(self, label):
34 |         if self.vec_labels:
35 |             if label == 0:
36 |                 return [1, 0]
37 |             else:
38 |                 return [0, 1]
39 |         else:
40 |             return label
41 | 
42 |     def __getitem__(self, idx):
43 |         data_batch = self.data[idx*self.batch_size:(idx+1)*self.batch_size, :].toarray()
44 |         labels = [self.label_dict[os.path.split(path)[1]] for path in self.data_paths[idx*self.batch_size:
45 |                                                                                       (idx + 1)*self.batch_size]]
46 |         label_batch = np.array([self.label_to_representation(label) for label in labels], dtype='uint8')
47 |         return data_batch, label_batch
48 | 


--------------------------------------------------------------------------------
/NetworkTraining/Drebin/Drebin_DNN.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import os
 3 | import sys
 4 | from keras.models import Sequential
 5 | from keras.layers import Dense, Dropout
 6 | from keras.optimizers import SGD
 7 | from keras.callbacks import ModelCheckpoint
 8 | from DrebinDataGenerator import DrebinDataGenerator
 9 | from drebin_datapipeline import virustotal_json_to_labels, get_train_test_valid_names, get_count_vectorizer
10 | from config import *
11 | 
12 | sys.path.append('../../utils/')
13 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric
14 | 
15 | # the network used by grosse et. al in the paper 'adversarial examples for malware detection'
16 | def get_network(no_features, final_nonlinearity, vec_output):
17 |     model = Sequential()
18 |     model.add(Dense(units=200, activation='relu', input_shape=(no_features, )))
19 |     model.add(Dropout(rate=0.5))
20 |     model.add(Dense(units=200, activation='relu'))
21 |     model.add(Dropout(rate=0.5))
22 |     if vec_output:
23 |         model.add(Dense(units=2, activation=final_nonlinearity))
24 |     else:
25 |         model.add(Dense(units=1, activation=final_nonlinearity))
26 |     return model
27 | 
28 | 
29 | # train the model
30 | def train_model(model, training_gen, test_gen, loss, epochs, vec_output, save_period=1):
31 |     print(model.summary())
32 |     if not os.path.isdir('models'):
33 |         os.makedirs('models')
34 |     model.compile(optimizer=SGD(), loss=loss, metrics=['accuracy', custom_true_positive_metric(vec_output),
35 |                                                        custom_false_positive_metric(vec_output)])
36 |     fname = 'models/model.{epoch:03d}--ACC_{val_accuracy:.4f}--FP_{val_false_positive_metric:.4f}--' \
37 |             'TP_{val_true_positive_metric:.4f}.hdf5'
38 |     model_checkpoint_tp = ModelCheckpoint(fname, monitor='true_positive_metric', save_best_only=True, mode='max',
39 |                                           period=save_period)
40 |     model_checkpoint_fn = ModelCheckpoint(fname, monitor='val_false_positive_metric', save_best_only=True, mode='min',
41 |                                           period=save_period)
42 |     model.fit_generator(generator=training_gen, epochs=epochs, class_weight={0:1, 1:6.5}, validation_data=test_gen,
43 |                         max_queue_size=10, callbacks=[model_checkpoint_tp, model_checkpoint_fn])
44 | 
45 | 
46 | # returns two lists of filenames, one for training and one for testing. The lists are specified by a doc path (to a file
47 | # containing the feature vectors) and a split path containing files with filenames for training, testing, validation.
48 | # since there are several splits for the dataset, the index spcifies which split to choose
49 | def get_train_test_data_names(split_path, index, label_dict):
50 |     names = get_train_test_valid_names(split_path)
51 |     train_names = names[index][0]
52 |     train_names = [name for name in train_names if name in label_dict]
53 |     test_names = names[index][1]
54 |     test_names = [name for name in test_names if name in label_dict]
55 |     # val_names = names[0][2]
56 |     return train_names, test_names
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     if not os.path.isfile('train_label_dict.pkl'):
61 |         print('Calculating label dict ...')
62 |         label_dict = virustotal_json_to_labels(json_path, threshold)
63 |         pickle.dump(label_dict, open('train_label_dict.pkl', 'wb'))
64 |     else:
65 |         label_dict = pickle.load(open('train_label_dict.pkl', 'rb'))
66 |     if not os.path.isfile('train_vec.pkl'):
67 |         print('Calculating count vectorizer ...')
68 |         vec = get_count_vectorizer(doc_path, label_dict)
69 |         pickle.dump(vec, open('train_vec.pkl', 'wb'))
70 |     else:
71 |         vec = pickle.load(open('train_vec.pkl', 'rb'))
72 |     no_tokens = len(vec.vocabulary_)
73 |     train_data_names, test_data_names = get_train_test_data_names(split_path, split_index, label_dict)
74 |     train_data_gen = DrebinDataGenerator(vec, train_data_names, doc_path, label_dict, batch_size, vec_labels=vec_output)
75 |     test_data_gen = DrebinDataGenerator(vec, test_data_names, doc_path, label_dict, batch_size, vec_labels=vec_output)
76 |     model = get_network(no_tokens, final_nonlinearity=nonlinearity, vec_output=vec_output)
77 |     print('training with %d tokens' % no_tokens)
78 |     train_model(model, train_data_gen, test_data_gen, loss, epochs=epochs, vec_output=vec_output)
79 | 


--------------------------------------------------------------------------------
/NetworkTraining/Drebin/config.py:
--------------------------------------------------------------------------------
 1 | # these paths actually come with the drebin dataset
 2 | doc_path = '' # insert path to feature vectors folder from drebin dataset
 3 | json_path = '' # insert path to virustotal.json from drebin dataset
 4 | split_path = '' # insert path to datasplits/all from drebin dataset
 5 | 
 6 | # parameters for learning
 7 | threshold = 10  # at least 10 scanners have to exist and classify the sample as malicious
 8 | batch_size = 64  # batch_size during training
 9 | split_index = 0  # which of the data splits from drebin dataset to use (0,..,10)
10 | vec_output = True  # whether to output vectors [1,0], [0,1] or float at the end of the network
11 | nonlinearity = 'softmax'  # nonlinearity in the final layer
12 | loss = 'binary_crossentropy'  # loss for training
13 | epochs = 50  # number of training epochs
14 | 


--------------------------------------------------------------------------------
/NetworkTraining/Drebin/drebin.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import os
 3 | import sys
 4 | from keras.models import Sequential
 5 | from keras.layers import Dense, Dropout
 6 | from keras.optimizers import SGD
 7 | from keras.callbacks import ModelCheckpoint
 8 | from DrebinDataGenerator import DrebinDataGenerator
 9 | from drebin_datapipeline import virustotal_json_to_labels, get_train_test_valid_names, get_count_vectorizer
10 | from config import *
11 | 
12 | sys.path.append('../../utils/')
13 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric
14 | 
15 | # the network used by grosse et. al in the paper 'adversarial examples for malware detection'
16 | def get_network(no_features, final_nonlinearity, vec_output):
17 |     model = Sequential()
18 |     model.add(Dense(units=200, activation='relu', input_shape=(no_features, )))
19 |     model.add(Dropout(rate=0.5))
20 |     model.add(Dense(units=200, activation='relu'))
21 |     model.add(Dropout(rate=0.5))
22 |     if vec_output:
23 |         model.add(Dense(units=2, activation=final_nonlinearity))
24 |     else:
25 |         model.add(Dense(units=1, activation=final_nonlinearity))
26 |     return model
27 | 
28 | 
29 | # train the model
30 | def train_model(model, training_gen, test_gen, loss, epochs, vec_output, save_period=1):
31 |     print(model.summary())
32 |     model.compile(optimizer=SGD(), loss=loss, metrics=['accuracy', custom_true_positive_metric(vec_output),
33 |                                                        custom_false_positive_metric(vec_output)])
34 |     fname = '../models/model.{epoch:03d}--ACC_{val_acc:.4f}--FP_{val_false_positive_metric:.4f}--' \
35 |             'TP_{val_true_positive_metric:.4f}.hdf5'
36 |     model_checkpoint_tp = ModelCheckpoint(fname, monitor='true_positive_metric', save_best_only=True, mode='max',
37 |                                           period=save_period)
38 |     model_checkpoint_fn = ModelCheckpoint(fname, monitor='val_false_positive_metric', save_best_only=True, mode='min',
39 |                                           period=save_period)
40 |     model.fit_generator(generator=training_gen, epochs=epochs, class_weight={0:1, 1:6.5}, validation_data=test_gen,
41 |                         max_queue_size=10, callbacks=[model_checkpoint_tp, model_checkpoint_fn])
42 | 
43 | 
44 | # returns two lists of filenames, one for training and one for testing. The lists are specified by a doc path (to a file
45 | # containing the feature vectors) and a split path containing files with filenames for training, testing, validation.
46 | # since there are several splits for the dataset, the index spcifies which split to choose
47 | def get_train_test_data_names(split_path, index, label_dict):
48 |     names = get_train_test_valid_names(split_path)
49 |     train_names = names[index][0]
50 |     train_names = [name for name in train_names if name in label_dict]
51 |     test_names = names[index][1]
52 |     test_names = [name for name in test_names if name in label_dict]
53 |     # val_names = names[0][2]
54 |     return train_names, test_names
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     if not os.path.isfile('train_label_dict.pkl'):
59 |         print('Calculating label dict ...')
60 |         label_dict = virustotal_json_to_labels(json_path, threshold)
61 |         pickle.dump(label_dict, open('train_label_dict.pkl', 'wb'))
62 |     else:
63 |         label_dict = pickle.load(open('train_label_dict.pkl', 'rb'))
64 |     if not os.path.isfile('train_vec.pkl'):
65 |         print('Calculating count vectorizer ...')
66 |         vec = get_count_vectorizer(doc_path, label_dict)
67 |         pickle.dump(vec, open('train_vec.pkl', 'wb'))
68 |     else:
69 |         vec = pickle.load(open('train_vec.pkl', 'rb'))
70 |     no_tokens = len(vec.vocabulary_)
71 |     train_data_names, test_data_names = get_train_test_data_names(split_path, split_index, label_dict)
72 |     train_data_gen = DrebinDataGenerator(vec, train_data_names, doc_path, label_dict, batch_size, vec_labels=vec_output)
73 |     test_data_gen = DrebinDataGenerator(vec, test_data_names, doc_path, label_dict, batch_size, vec_labels=vec_output)
74 |     model = get_network(no_tokens, final_nonlinearity=nonlinearity, vec_output=vec_output)
75 |     print('training with %d tokens' % no_tokens)
76 |     train_model(model, train_data_gen, test_data_gen, loss, epochs=epochs, vec_output=vec_output)
77 | 


--------------------------------------------------------------------------------
/NetworkTraining/Drebin/drebin_datapipeline.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from sklearn.feature_extraction.text import CountVectorizer
 3 | import os
 4 | 
 5 | 
 6 | # converts list of filepaths into sklearn count vectorizer
 7 | def get_count_vectorizer(documents_path, label_dict):
 8 |     all_paths = [os.path.join(documents_path, name) for name in os.listdir(documents_path) if name in label_dict]
 9 |     vec = CountVectorizer(input='filename', token_pattern='.+', lowercase=False)
10 |     vec.fit_transform(all_paths)
11 |     return vec
12 | 
13 | 
14 | # returns dict with key=sha256 hash of malware, value = 0/1 where 1 indicates malware and 0 indicates no malware
15 | # malware label is set if at least 'min_no_positive_scans' scanners return label "detected: true"
16 | def virustotal_json_to_labels(path_to_virustotal_json, min_no_positive_scans):
17 |     label_dict = {}
18 |     with open(path_to_virustotal_json, 'r') as f:
19 |         # json modules needs json dicts in a list seperated by comma
20 |         lines = f.readlines()
21 |         json_text = '[' + ','.join(lines) + ']'
22 |         data = json.loads(json_text)
23 |         for d in data:
24 |             # for successful scans...
25 |             if d['response_code'] == 1:
26 |                 sha256 = d['sha256']
27 |                 no_scanners = len(d['scans'])
28 |                 positive_results, negative_results = 0,0
29 |                 for result in d['scans'].values():
30 |                     if result['detected']:
31 |                         positive_results += 1
32 |                     else:
33 |                         negative_results += 1
34 |                 # if all scanners return benign, label is 0, if at least min_no_positive_scans return true, label is 1
35 |                 # else, sample is discarded
36 |                 if negative_results == no_scanners:
37 |                     label_dict[sha256] = 0
38 |                 elif positive_results >= min_no_positive_scans:
39 |                     label_dict[sha256] = 1
40 |     return label_dict
41 | 
42 | 
43 | # returns a list with tuples of (train_names, test_names, valid_names) for each split in the drebin dataset
44 | def get_train_test_valid_names(path_to_split):
45 |     split_names = []
46 |     for root, dir, files in os.walk(path_to_split):
47 |         if 'test_cs' in files and 'validate_cs' in files and 'train_cs' in files:
48 |             with open(os.path.join(root, 'test_cs'), 'r') as test_f, open(os.path.join(root,'validate_cs'), 'r') as val_f, open(os.path.join(root, 'train_cs'), 'r') as train_f:
49 |                 train_names = test_f.read().splitlines()
50 |                 test_names = train_f.read().splitlines()
51 |                 val_names = val_f.read().splitlines()
52 |                 split_names.append((train_names, test_names, val_names))
53 |     return split_names
54 | 


--------------------------------------------------------------------------------
/NetworkTraining/Mimicus/config.py:
--------------------------------------------------------------------------------
 1 | path_to_csv = 'contagio-all.csv'
 2 | 
 3 | random_seed = 123456
 4 | vec_output = True
 5 | epochs = 100
 6 | batch_size = 32
 7 | loss = 'binary_crossentropy'
 8 | final_nonlinearity = 'softmax'
 9 | optimizer = 'adam'
10 | binary_encoding = True
11 | 


--------------------------------------------------------------------------------
/NetworkTraining/Mimicus/mimicus.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import numpy as np
 4 | from keras.models import Sequential
 5 | from keras.layers import Dense, Dropout
 6 | from sklearn.model_selection import train_test_split
 7 | from keras.callbacks import ModelCheckpoint
 8 | from sklearn.metrics import confusion_matrix
 9 | from sklearn.preprocessing import normalize
10 | from config import *
11 | 
12 | sys.path.append('../../utils/')
13 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric
14 | 
15 | 
16 | def get_train_data_test_data(random_seed, binary_encoding=True):
17 |     non_relevant_columns = [1]  #filename
18 |     label_column = 0
19 |     arr = np.genfromtxt(path_to_csv, dtype=str, delimiter=',', skip_header=0)
20 |     filenames = arr[1:, 1]
21 |     no_features = arr.shape[1]
22 |     columns_to_use = [i for i in range(no_features) if i not in non_relevant_columns]
23 |     # feature_names = np.genfromtxt(path_to_csv, dtype=str, delimiter=',', skip_footer=9999, usecols=columns_to_use)[1:]
24 |     # idx_to_token = dict(zip(range(len(feature_names)), feature_names))
25 |     # pkl.dump(idx_to_token, open('data_mimicus/idx_to_token.pkl', 'wb'))
26 |     arr = np.genfromtxt(path_to_csv, dtype=np.float, delimiter=',', skip_header=1, usecols=columns_to_use)
27 |     labels = arr[:, label_column]
28 |     labels = np.array([[1,0] if l == 0 else [0,1] for l in labels])
29 |     data = np.delete(arr, 0, axis=1)
30 |     if binary_encoding:
31 |         data[np.where(data != 0)] = 1
32 |     else:
33 |         data = normalize(data, 'max', axis=0)
34 |     x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.25, random_state=random_seed)
35 |     _, filenames_test = train_test_split(filenames, test_size=0.25, random_state=random_seed)
36 |     return x_train, x_test, y_train, y_test, filenames_test
37 | 
38 | 
39 | # network used by geo et.al in the lemna paper. This is essentially the network from grosse et.al for the drebin dataset
40 | def get_network(no_features, final_nonlinearity, vec_output):
41 |     model = Sequential()
42 |     model.add(Dense(units=200, activation='relu', input_shape=(no_features, )))
43 |     model.add(Dropout(rate=0.5))
44 |     model.add(Dense(units=200, activation='relu'))
45 |     model.add(Dropout(rate=0.5))
46 |     if vec_output:
47 |         model.add(Dense(units=2, activation=final_nonlinearity))
48 |     else:
49 |         model.add(Dense(units=1, activation=final_nonlinearity))
50 |     return model
51 | 
52 | 
53 | def train_network(batch_size, epochs, loss, optimizer, vec_output, final_nonlinearity, random_seed):
54 |     if not os.path.isdir('models'):
55 |         os.makedirs('models')
56 |     x_train, x_test, y_train, y_test, filenames_test = get_train_data_test_data(random_seed)
57 |     # np.save('data_mimicus/test_data/float_encoded/test_data.npy', x_test)
58 |     # np.save('data_mimicus/test_data/float_encoded/test_labels.npy', y_test)
59 |     no_features = x_train.shape[1]
60 |     model = get_network(no_features, final_nonlinearity, vec_output)
61 |     fname = 'models/model.{epoch:03d}--ACC_{val_accuracy:.4f}--FP_{val_false_positive_metric:.4f}--' \
62 |             'TP_{val_true_positive_metric:.4f}.hdf5'
63 |     model_checkpoint_fp = ModelCheckpoint(fname, monitor='val_false_positive_metric', save_best_only=True, mode='min')
64 |     model_checkpoint_tp = ModelCheckpoint(fname, monitor='val_true_positive_metric', save_best_only=True, mode='max')
65 |     model.compile(optimizer, loss, metrics=['accuracy',custom_true_positive_metric(vec_output),
66 |                                             custom_false_positive_metric(vec_output),], )
67 |     print(model.summary())
68 |     model.fit(x_train, y_train, batch_size, epochs, validation_data=(x_test, y_test), verbose=2,
69 |               callbacks=[model_checkpoint_tp, model_checkpoint_fp])
70 |     get_statistics(model, x_test, y_test)
71 | 
72 | 
73 | # prints accuracy, precision, recall, fpr and f1 score for given model and test set with labels
74 | def get_statistics(model, x_test, y_test):
75 |     y_pred = np.argmax(model.predict(x_test), axis=1)
76 |     y_test = np.argmax(y_test, axis=1)
77 |     assert len(y_pred) == len(y_test)
78 |     acc = np.sum(y_pred==y_test)/np.float(len(y_pred))
79 |     cm = confusion_matrix(y_test, y_pred)
80 |     TN, FN, TP, FP = cm[0,0], cm[1,0], cm[1,1], cm[0,1]
81 |     TPR = TP/(TP+FN)
82 |     FPR = FP/(FP+TN)
83 |     precision = TP/(TP+FP)
84 |     F1 = 2*TP/(2*TP+FP+FN)
85 |     print('The model achieved: Accuracy:{}, Precision:{}, Recall:{}, FPR:{}, F1 score:{} on the test set.'.format(
86 |         acc, precision, TPR, FPR, F1))
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     train_network(batch_size, epochs, loss, optimizer, vec_output, final_nonlinearity, random_seed)
91 | 


--------------------------------------------------------------------------------
/NetworkTraining/README.md:
--------------------------------------------------------------------------------
 1 | # Network Training
 2 | 
 3 | ### This folder contains Training scripts for the four neural networks based on the papers:
 4 | 
 5 | * Adversarial examples for malware detection ([Grosse et al.](http://patrickmcdaniel.org/pubs/esorics17.pdf)) - called Drebin.
 6 | * Deep Android Malware Detection ([McLaughlin et al.](https://adamdoupe.com/publications/deep-android-malware-detection-codaspy2017.pdf)) - called DAMD.
 7 | * VulDeePecker: A Deep Learning-Based System for Vulnerability Detection ([Li et al.](https://arxiv.org/pdf/1801.01681.pdf)) - called VulDeePecker.
 8 | * LEMNA: Explaining Deep Learning based Security Applications ([Guo et al.](http://people.cs.vt.edu/gangwang/ccs18.pdf)) - called Mimicus.
 9 | 
10 | #### To keep the size of this repo handable, we do not deliver all the datasets with it but all of them are accessible for download online. Each folder contains a config file where you can adjust training parameters.
11 | * Mimicus: Call `python3 mimicus.py` to train the network.
12 | * Drebin: Adjust the paths in the config file to point to the location you downloaded the [drebin dataset](https://www.sec.cs.tu-bs.de/~danarp/drebin/) to. Call `drebin.py` to train the network.
13 | * VulDeePecker: Extract the json file from the zip. Afterwards run `python3 word2vec.py` to train a word2vec model. Then run `python3 vuldeepecker.py` to train the network.
14 | * DAMD: Extract the folder containig the dalvik opcodes. Afterwards run `python3 preprocessing.py` to convert them. Then run `python3 damd.py` to train the network.
15 | 
16 | #### The models with the best performance will be saved in the models folder.


--------------------------------------------------------------------------------
/NetworkTraining/VulDeePecker/VuldeeDataGenerator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import keras
 3 | 
 4 | 
 5 | # simple data generator yielding batches of data from a numpy array
 6 | class VuldeeDataGenerator(keras.utils.Sequence):
 7 |     def __init__(self, data, labels, batch_size):
 8 |         self.data = data
 9 |         self.labels = labels
10 |         self.batch_size = batch_size
11 | 
12 |     def __len__(self):
13 |         length = int(np.floor(len(self.data)/self.batch_size))
14 |         return length if len(self.data)%self.batch_size == 0 else length+1
15 | 
16 |     def __getitem__(self, idx):
17 |         data_batch = self.data[idx*self.batch_size:(idx+1)*self.batch_size]
18 |         label_batch = self.labels[idx*self.batch_size:(idx+1)*self.batch_size]
19 |         return data_batch, label_batch
20 | 


--------------------------------------------------------------------------------
/NetworkTraining/VulDeePecker/config_training.py:
--------------------------------------------------------------------------------
 1 | data_path = 'source-CWE-119-full.json'
 2 | w2v_path = 'w2v_model.bin'
 3 | 
 4 | no_lstm_units = 300
 5 | dropout_proba = 0.5
 6 | token_per_gadget = 50
 7 | embedding_dim = 200
 8 | batch_size = 64
 9 | epochs = 100
10 | loss = 'binary_crossentropy'
11 | sampling_random_seed = 42
12 | testset_size = 0.2
13 | vec_output = True
14 | 


--------------------------------------------------------------------------------
/NetworkTraining/VulDeePecker/config_word_to_vec.py:
--------------------------------------------------------------------------------
1 | data_paths = ['source-CWE-119-full.json']
2 | output_name = 'w2v_model.bin'
3 | w2v_vocab_name = None  # specify if you want to save vocab
4 | embedding_dim = 200
5 | iterations = 100
6 | workers = 2  # parallel training
7 | nice = 20  # the niceness value for this process
8 | 


--------------------------------------------------------------------------------
/NetworkTraining/VulDeePecker/source-CWE-119-full.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alewarne/explain-mlsec/a82530e1cb95f829a3147f1f5a0d3cf2b3e68975/NetworkTraining/VulDeePecker/source-CWE-119-full.zip


--------------------------------------------------------------------------------
/NetworkTraining/VulDeePecker/vuldeepecker.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright: 2018 Tim Dengel <t.dengel@tu-braunschweig.de>
  3 | # License: GPLv3+
  4 | 
  5 | import numpy as np
  6 | from config_training import *
  7 | from gensim.models.word2vec import Word2Vec
  8 | from keras import Sequential
  9 | from keras.layers import Dense, Dropout, LSTM, Bidirectional
 10 | from keras.callbacks import ModelCheckpoint
 11 | import json
 12 | import os
 13 | from sklearn.model_selection import train_test_split
 14 | import sys
 15 | from VuldeeDataGenerator import VuldeeDataGenerator
 16 | 
 17 | sys.path.append('../../utils/')
 18 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric
 19 | 
 20 | 
 21 | def load_data(gadgets, w2v):
 22 |     x = [[w2v[word] for word in gadget["tokens"]] for gadget in gadgets]
 23 |     y = [[1,0] if gadget["label"] == 0 else [0,1] for gadget in gadgets]
 24 | 
 25 |     types = [gadget["type"] for gadget in gadgets]
 26 |     return x, y, types
 27 | 
 28 | 
 29 | def pad_one(xi_typei):
 30 |     xi, typei = xi_typei
 31 |     if typei == 1:
 32 |         if len(xi) > token_per_gadget:
 33 |             ret = xi[0:token_per_gadget]
 34 |         elif len(xi) < token_per_gadget:
 35 |             ret = xi + [[0] * len(xi[0])] * (token_per_gadget - len(xi))
 36 |         else:
 37 |             ret = xi
 38 |     elif typei == 0 or typei == 2: # Trunc/append at the start
 39 |         if len(xi) > token_per_gadget:
 40 |             ret = xi[len(xi) - token_per_gadget:]
 41 |         elif len(xi) < token_per_gadget:
 42 |             ret = [[0] * len(xi[0])] * (token_per_gadget - len(xi)) + xi
 43 |         else:
 44 |             ret = xi
 45 |     else:
 46 |         raise Exception()
 47 | 
 48 |     return ret
 49 | 
 50 | 
 51 | def padding(x, types):
 52 |     return np.array([pad_one(bar) for bar in zip(x, types)])
 53 | 
 54 | 
 55 | def get_model(final_activation='softmax'):
 56 |     model = Sequential()
 57 |     model.add(Bidirectional(LSTM(units=no_lstm_units), input_shape=(token_per_gadget, embedding_dim)))
 58 |     model.add(Dropout(dropout_proba))
 59 |     model.add(Dense(2, activation=final_activation))
 60 |     model.compile(optimizer='adam', loss=loss, metrics=['accuracy',custom_true_positive_metric(vec_output),
 61 |                                             custom_false_positive_metric(vec_output)])
 62 |     return model
 63 | 
 64 | 
 65 | def train_model(model, training_generator, test_generator):
 66 |     if not os.path.isdir('models'):
 67 |         os.makedirs('models')
 68 |     fname = '/models/model.{epoch:03d}--ACC_{val_acc:.4f}--FP_{val_false_positive_metric:.4f}--' \
 69 |             'TP_{val_true_positive_metric:.4f}.hdf5'
 70 |     model_checkpoint_tp = ModelCheckpoint(fname, monitor='val_true_positive_metric', save_best_only=True, mode='max')
 71 |     model_checkpoint_fp = ModelCheckpoint(fname, monitor='val_false_positive_metric', save_best_only=True, mode='min')
 72 |     model.fit_generator(generator=training_generator, epochs=epochs, validation_data=test_generator, max_queue_size=10,
 73 |                         callbacks=[model_checkpoint_tp, model_checkpoint_fp])
 74 | 
 75 | 
 76 | def preprocess_data(x, y, types):
 77 |     x = padding(x, types)
 78 |     # Train/Test split
 79 |     x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=testset_size, random_state=sampling_random_seed)
 80 |     y_train, y_test = np.array(y_train), np.array(y_test)
 81 | 
 82 |     print(len(x_train), 'train sequences')
 83 |     print(len(x_test), 'test sequences')
 84 | 
 85 |     datagen_train = VuldeeDataGenerator(x_train, y_train, batch_size)
 86 |     datagen_test = VuldeeDataGenerator(x_test, y_test, batch_size)
 87 | 
 88 |     return datagen_train, datagen_test
 89 | 
 90 | 
 91 | if __name__ == "__main__":
 92 |     w2v = Word2Vec.load(w2v_path)
 93 |     with open(data_path) as f:
 94 |         gadgets = json.load(f)
 95 |         x, y, types = load_data(gadgets, w2v)
 96 |         del gadgets
 97 |         del w2v
 98 |         print("Loaded data.")
 99 |     # pad sequences, split data, create datagens
100 |     datagen_train, datagen_test = preprocess_data(x,y, types)
101 |     vuldee_model = get_model()
102 |     train_model(vuldee_model, datagen_train, datagen_test)
103 | 


--------------------------------------------------------------------------------
/NetworkTraining/VulDeePecker/word2vec.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import contextlib
 3 | import itertools
 4 | import json
 5 | import os
 6 | from gensim.models.word2vec import Word2Vec
 7 | from config_word_to_vec import *
 8 | 
 9 | 
10 | def train_word2vec(gadgets, vector_size=200, iter=100, workers=1):
11 |     x = [gadget["tokens"] for gadget in gadgets]
12 | 
13 |     # Train Word2Vec
14 |     w2v = Word2Vec(x, min_count=1, size=vector_size, iter=iter, workers=workers)
15 |     return w2v
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     with contextlib.ExitStack() as stack:
20 |         f_list = [stack.enter_context(open(dataset)) for dataset in data_paths]
21 |         gadgets = itertools.chain.from_iterable([json.load(f) for f in f_list])
22 | 
23 |     print("Training Word2Vec embedding...")
24 |     w2v = train_word2vec(gadgets, embedding_dim, iterations, workers)
25 | 
26 |     print("Trained Word2Vec embedding with weights of shape:", w2v.wv.syn0.shape)
27 |     if w2v_vocab_name:
28 |         with open(w2v_vocab_name, 'w') as f:
29 |             vocab = dict([(k, v.index) for k, v in w2v.wv.vocab.items()])
30 |             f.write(json.dumps(vocab, indent=4, sort_keys=True))
31 |     w2v.save(output_name)
32 |     print("Written model to: {}".format(output_name))
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Evaluating Explanation Methods for Deep Learning in Security
2 | 
3 | This repository contains code connected to the paper
4 | 
5 | [_Evaluating Explanation Methods for Deep Learning in Security_](https://www.sec.cs.tu-bs.de/pubs/2020-eurosp.pdf), _A.Warnecke, D.Arp, C. Wressnegger and K.Rieck, IEEE European Symposium on Security and Privacy (Euro S&P), 2020_.
6 | 
7 | * NetworkTraining contains training scripts for the different networks/datasets used in the paper.
8 | * Explanation contains scripts to calculate relevances for data and models with LEMNA and LIME.
9 | 


--------------------------------------------------------------------------------
/utils/custom_metrics.py:
--------------------------------------------------------------------------------
 1 | # implementation of TPR and FPR with keras backend in order to access them at the end of each learning epoch as a metric
 2 | # on the test set
 3 | 
 4 | import keras.backend as K
 5 | 
 6 | 
 7 | # calculates true positive metric using keras backend. predictions y_hat will be normalized and do not need to be a
 8 | # probability distribution. the vec_output parameter specifies whether the output/labels are one-hot encoded or
 9 | # one dimensional (which is only possible in a binary classification problem)
10 | def custom_true_positive_metric(vec_output):
11 | 
12 |     def true_positive_metric(y, y_hat):
13 |         y_hat_rounded = K.round(y_hat)
14 |         if vec_output:
15 |             ground_truth_labels = K.cast(K.argmax(y), dtype='float32')
16 |             predicted_labels = K.cast(K.argmax(y_hat_rounded), dtype='float32')
17 |         else:
18 |             ground_truth_labels = K.cast(y, dtype='float32')
19 |             predicted_labels = K.cast(y_hat_rounded, dtype='float32')
20 |         ground_truth_equal_one = K.cast(K.equal(K.ones(K.shape(ground_truth_labels)), ground_truth_labels), dtype='float32')
21 |         prediction_equal_one = K.cast(K.equal(K.ones(K.shape(predicted_labels)), predicted_labels), dtype='float32')
22 |         # product of these two vectors is 1 if and only if both conditions are met. Sum the product to get the number of samples
23 |         nominator_TPR = K.sum(ground_truth_equal_one * prediction_equal_one)
24 |         denominator_TPR = K.sum(ground_truth_equal_one)
25 |         return nominator_TPR / (denominator_TPR)#+K.epsilon())
26 | 
27 |     return true_positive_metric
28 | 
29 | 
30 | def custom_false_positive_metric(vec_output):
31 | 
32 |     def false_positive_metric(y, y_hat):
33 |         y_hat_rounded = K.round(y_hat)
34 |         if vec_output:
35 |             ground_truth_labels = K.cast(K.argmax(y), dtype='float32')
36 |             predicted_labels = K.cast(K.argmax(y_hat_rounded), dtype='float32')
37 |         else:
38 |             ground_truth_labels = K.cast(y, dtype='float32')
39 |             predicted_labels = K.cast(y_hat_rounded, dtype='float32')
40 |         ground_truth_equal_zero = K.cast(K.equal(K.zeros(K.shape(ground_truth_labels)), ground_truth_labels), dtype='float32')
41 |         prediction_equal_one = K.cast(K.equal(K.ones(K.shape(predicted_labels)), predicted_labels), dtype='float32')
42 |         # product of these two vectors is 1 if and only if both conditions are met. Sum the product to get the number of samples
43 |         nominator_FPR = K.sum(ground_truth_equal_zero*prediction_equal_one)
44 |         denominator_FPR = K.sum(ground_truth_equal_zero)
45 |         return nominator_FPR / (denominator_FPR)#+K.epsilon())
46 | 
47 |     return false_positive_metric
48 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # some useful methods for this project like normalization of relevances for various datatypes
  2 | import numpy as np
  3 | import copy
  4 | import sys
  5 | import os
  6 | import pickle as pkl
  7 | from scipy import sparse
  8 | from tqdm import tqdm
  9 | 
 10 | 
 11 | # given relevance array (either numpy or csr_sparse) normalizes it with respect to given method
 12 | # method can be one of [mean, max, abs_max].
 13 | # if macro is False, each sample will be normalized for itself, else the whole data is normalized
 14 | def normalize_relevances(relevances, method, macro=False):
 15 |     if method not in ['mean', 'max', 'abs_max']:
 16 |         print('Invalid method name! Choose one of {}'.format(['mean', 'max', 'abs_max']))
 17 |         sys.exit(1)
 18 |     normed_relevances = copy.deepcopy(relevances)
 19 |     if macro:
 20 |         normed_relevances = to_macro(normed_relevances)
 21 |     if type(normed_relevances).__module__ == 'scipy.sparse.csr':
 22 |         nonzero_rows = normed_relevances.nonzero()[0]
 23 |         print('Calculating normalization for {} samples ...'.format(normed_relevances.shape[0]))
 24 |         for idx in tqdm(np.unique(nonzero_rows)):
 25 |             data_idx = np.where(nonzero_rows == idx)[0]
 26 |             normed_row = normalize_array(normed_relevances.data[data_idx], method)
 27 |             normed_relevances.data[data_idx] = normed_row
 28 |     elif type(normed_relevances).__module__ == 'numpy' or type(normed_relevances) is list:
 29 |         print('Calculating normalization for {} samples ...'.format(len(normed_relevances)))
 30 |         for i in range(len(normed_relevances)):
 31 |             if type(normed_relevances[i]) is list:
 32 |                 normed_row = normalize_array(np.array(normed_relevances[i]), method)
 33 |                 normed_relevances[i] = list(normed_row)
 34 |             else:
 35 |                 normed_row = normalize_array(normed_relevances[i], method)
 36 |                 normed_relevances[i] = normed_row
 37 |     else:
 38 |         print('Datatype not understood!')
 39 |         sys.exit(1)
 40 |     return normed_relevances
 41 | 
 42 | 
 43 | # normalizes 1D numpy array with respect to some method
 44 | def normalize_array(arr, method):
 45 |     if method not in ['mean', 'max', 'abs_max']:
 46 |         print('Invalid method name! Choose one of {}'.format(['mean', 'max', 'abs_max']))
 47 |         sys.exit(1)
 48 |     arr_cpy = copy.deepcopy(arr)
 49 |     if method == 'abs_max':
 50 |         abs_max = np.max(np.abs(arr_cpy))
 51 |         if abs_max != 0:
 52 |             arr_cpy = 1. / abs_max * arr_cpy
 53 |     elif method == 'mean':
 54 |         mu = np.mean(arr_cpy)
 55 |         sigma = np.std(arr_cpy)
 56 |         if sigma == 0:
 57 |             sigma += 1e-5
 58 |         arr_cpy = (arr_cpy-mu)/sigma
 59 |     elif method == 'max':
 60 |         min, max = np.min(arr_cpy), np.max(arr_cpy)
 61 |         if max != min:
 62 |             arr_cpy = (arr_cpy-min)/(max-min)
 63 |             arr_cpy = 2 * arr_cpy - 1
 64 |         else:
 65 |             arr_cpy = (arr_cpy - min) / min
 66 |     return arr_cpy
 67 | 
 68 | 
 69 | def to_macro(normed_relevances):
 70 |     if type(normed_relevances).__module__ == 'scipy.sparse.csr':
 71 |         macro = np.array([normed_relevances.data])
 72 |     elif type(normed_relevances).__module__ == 'numpy':
 73 |         macro = np.array([normed_relevances.flatten])
 74 |     elif type(normed_relevances) is list:
 75 |         macro = np.array([x for l in normed_relevances for x in l])
 76 |     return macro
 77 | 
 78 | 
 79 | def get_error_type(y_true, y_pred):
 80 |     if y_true == 0:
 81 |         if y_pred == 0:
 82 |             return 'TN'
 83 |         else:
 84 |             return 'FP'
 85 |     else:
 86 |         if y_pred == 0:
 87 |             return 'FN'
 88 |         else:
 89 |             return 'TP'
 90 | 
 91 | 
 92 | # takes a filepath and loads the contained data for the data formats .npy, .pkl, .npz
 93 | def load_npy_npz_pkl(path_to_data):
 94 |     _, filetype = os.path.splitext(path_to_data)
 95 |     if filetype == '.npz':
 96 |         data = sparse.load_npz(path_to_data)
 97 |     elif filetype == '.npy':
 98 |         data = np.load(path_to_data)
 99 |     elif filetype == '.pkl':
100 |         data = pkl.load(open(path_to_data, 'rb'))
101 |     else:
102 |         print('Could not load filepath! Invalid datatype!')
103 |         data = None
104 |     return data
105 | 


--------------------------------------------------------------------------------