├── Explanation ├── Lemna.py ├── Lime.py ├── README.md └── perturbation_sampling.py ├── NetworkTraining ├── DAMD │ ├── DalvikOpcodes.txt │ ├── DalvikOpcodesDescription.json │ ├── Opcodes_all.zip │ ├── config.py │ ├── config_preprocessing.py │ ├── damd.py │ └── preprocessing.py ├── Drebin │ ├── DrebinDataGenerator.py │ ├── Drebin_DNN.py │ ├── config.py │ ├── drebin.py │ └── drebin_datapipeline.py ├── Mimicus │ ├── config.py │ ├── contagio-all.csv │ └── mimicus.py ├── README.md └── VulDeePecker │ ├── VuldeeDataGenerator.py │ ├── config_training.py │ ├── config_word_to_vec.py │ ├── source-CWE-119-full.zip │ ├── vuldeepecker.py │ └── word2vec.py ├── README.md └── utils ├── custom_metrics.py └── utils.py /Explanation/Lemna.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cvxpy as cvx 3 | from sklearn import linear_model 4 | import argparse 5 | import multiprocessing 6 | import pickle as pkl 7 | import time 8 | import sys 9 | 10 | 11 | # gaussian density function 12 | def gaussian(x, mu, sigma_squared): 13 | eps = 0 14 | return 1/(np.sqrt(2*np.pi*sigma_squared)+eps)*np.exp(-0.5*(x-mu)**2/(sigma_squared+eps)) 15 | 16 | 17 | # the expectation maximization algorithm of the lemna paper, calculation of indices can be found in appendix 18 | def em_regression_algorithm(data, labels, K, alpha_S, iterations, linreg_type, verbose=True, save_path=None): 19 | # determine if data is sparse 20 | sparse = True if type(data).__module__ == 'scipy.sparse.csr' else False 21 | no_samples = data.shape[0] 22 | sample_len = data.shape[1] 23 | label_sum = np.sum(np.abs(labels)) 24 | no_ones = len(np.where(labels == 1)[0]) 25 | no_zeros = len(labels) - no_ones 26 | #data = (data-np.mean(data))/np.std(data) 27 | if linreg_type not in ['lasso', 'fused_lasso']: 28 | print('Invalid linreg_type (%s)' %linreg_type) 29 | exit(1) 30 | elif sample_len <=1: 31 | if verbose: 32 | print('Encountered invaliddata sample!') 33 | with open(save_path, 'a') as f: 34 | print('Invalid sample.', file=f) 35 | return np.array([-1]), np.array([-1]) 36 | eps = 1e-6 37 | number_of_history_betas = 3 38 | convergence_threshold = 1e-2 39 | # initialize the parameters randomly 40 | pi, sigma_sq = np.random.uniform(0, 1, size=K), np.random.uniform(0, 1, size=K) 41 | # normalize pi 42 | pi = 1/np.sum(pi) * pi 43 | beta = np.random.uniform(-.1, .1, size=(K, sample_len)) 44 | z_hat = np.zeros(shape=(no_samples, K)) 45 | # check for convergence using last betas 46 | old_likelihoods = [] 47 | converged = False 48 | # run at most 'iterations' iterations but finish if the last 'number of history betas' log likelihood values are 49 | # close to each other 50 | initial_log_likelihood = 0 51 | for n in range(no_samples): 52 | if sparse: 53 | likelihood = sum([pi[k] * gaussian(labels[n], data.getrow(n).dot(beta[k,:])[0], sigma_sq[k]) 54 | for k in range(K)]) 55 | else: 56 | likelihood = sum([pi[k] * gaussian(labels[n], np.dot(data[n,:],beta[k, :]), sigma_sq[k]) 57 | for k in range(K)]) 58 | if likelihood != 0: 59 | initial_log_likelihood += np.log(likelihood) 60 | if verbose: 61 | print('Starting Expectation maximization algorithm for %d iterations with sum of labels %d' %(iterations, 62 | label_sum)) 63 | start_time = time.time() 64 | for iter in range(iterations): 65 | # E step 66 | for i in range(no_samples): 67 | if sparse: 68 | denom_e = sum([pi[k] * gaussian(labels[i], data.getrow(i).dot(beta[k, :])[0], sigma_sq[k]) for k in 69 | range(K)]) 70 | else: 71 | denom_e = sum([pi[k] * gaussian(labels[i], np.dot(data[i,:], beta[k,:]), sigma_sq[k]) for k in 72 | range(K)]) 73 | if denom_e == 0: 74 | denom_e = eps 75 | if verbose: 76 | print('set denom_e to eps') 77 | for k in range(K): 78 | pred_2 = data.getrow(i).dot(beta[k, :])[0] if sparse else np.dot(data[i,:], beta[k,:]) 79 | z_hat[i, k] = pi[k]*gaussian(labels[i], pred_2, sigma_sq[k])/denom_e 80 | # M step 81 | for k in range(K): 82 | denom_m = np.sum(z_hat[:, k]) 83 | if denom_m == 0: 84 | denom_m = eps 85 | if sparse: 86 | sigma_sq[k] = sum([z_hat[i, k] * (labels[i] - data.getrow(i).dot(beta[k, :])[0])**2 for i in 87 | range(no_samples)]) / denom_m 88 | else: 89 | sigma_sq[k] = sum([z_hat[i, k] * (labels[i] - np.dot(data[i, :], beta[k, :])) ** 2 for i in 90 | range(no_samples)]) / denom_m 91 | if sigma_sq[k] == 0: 92 | sigma_sq[k] += eps 93 | if verbose: 94 | print('added eps to sigma') 95 | pi[k] = np.sum(z_hat[:, k])/no_samples 96 | component_assignments = np.argmax(z_hat, axis=1) 97 | # estimate betas by linear regression with fused lasso loss 98 | for k in range(K): 99 | sample_indices_of_k = np.where(component_assignments == k)[0] 100 | samples_of_k = data[sample_indices_of_k, :] 101 | labels_of_k = labels[sample_indices_of_k] 102 | if len(labels_of_k) > 0: 103 | if linreg_type == 'fused_lasso': 104 | beta[k,:] = solve_fused_lasso_regression(samples_of_k, labels_of_k, alpha_S) 105 | elif linreg_type == 'lasso': 106 | reg = linear_model.Lasso(alpha=alpha_S, precompute=True, normalize=True, max_iter=3000) 107 | reg.fit(samples_of_k, labels_of_k) 108 | beta[k, :] = reg.coef_ 109 | # recompute log_likelihood in order to check for convergence 110 | log_likelihood = 0 111 | for n in range(no_samples): 112 | if sparse: 113 | likelihood = sum([pi[k] * gaussian(labels[n], data.getrow(n).dot(beta[k, :])[0], sigma_sq[k]) 114 | for k in range(K)]) 115 | else: 116 | likelihood = sum([pi[k] * gaussian(labels[n], np.dot(data[n, :], beta[k, :]), sigma_sq[k]) 117 | for k in range(K)]) 118 | if likelihood != 0: 119 | log_likelihood += np.log(likelihood) 120 | if len(old_likelihoods) < number_of_history_betas: 121 | old_likelihoods.append(log_likelihood) 122 | else: 123 | abs_diffs = [] 124 | for beta_idx in range(number_of_history_betas): 125 | diff = np.abs(old_likelihoods[beta_idx]-log_likelihood) 126 | abs_diffs.append(diff) 127 | convergence_check = [np.sum(diff <= convergence_threshold) for diff in abs_diffs] 128 | if np.sum(convergence_check) == number_of_history_betas: 129 | converged = True 130 | old_likelihoods.pop(0) 131 | old_likelihoods.append(log_likelihood) 132 | if verbose: 133 | print('likelihood history', old_likelihoods) 134 | if converged: 135 | end_time = time.time() 136 | if verbose: 137 | print('EM-Alogirthm converged after %d iterations (%d seconds).' %(iter, end_time-start_time)) 138 | argm = np.argmax(z_hat, axis=1) 139 | for k in range(K): 140 | indices_of_k = np.where(argm==k)[0] 141 | labels_of_k = labels[indices_of_k] 142 | labels_in_k = np.unique(labels_of_k) 143 | d = {} 144 | for label in labels_in_k: 145 | d[label] = len(np.where(labels_of_k==label)[0]) 146 | print('labels in cluster %d'%k, d) 147 | break 148 | if save_path: 149 | with open(save_path, 'a') as f: 150 | projections = np.dot((beta * pi[:, np.newaxis]), np.transpose(data)) 151 | projections = np.sum(projections, axis=0) 152 | diff = (projections - labels) ** 2 153 | mse = 1. / len(diff) * np.sum(diff) 154 | if converged: 155 | print('S=%.3f_K=%d_linreg_type=%s_no_ones=%d_no_zeros=%d_time=%.4f_mse=%.4f' 156 | % (alpha_S, K, linreg_type, no_ones, no_zeros, end_time - start_time, mse), file=f) 157 | else: 158 | print('S=%.3f_K=%d_linreg_type=%s_no_ones=%d_no_zeros=%d_time=%.4f_mse=%.4f' 159 | % (alpha_S, K, linreg_type, no_ones, no_zeros, -1, mse), file=f) 160 | # print('mse', mse) 161 | # return the parameters by choosing the cluster belonging to the first row of the perturbations which is by 162 | # assumption the sample to be explained 163 | cluster_idx_sample = np.argmax(z_hat[0]) 164 | return beta[cluster_idx_sample], sigma_sq[cluster_idx_sample] 165 | 166 | 167 | # returns matrix A such that sum(abs(A*x)) is the fused lasso constraint on x 168 | def get_band_matrix_fused_lasso(dim): 169 | if dim <= 1: 170 | print('Invalid dimension for band matrix (%d)!'%dim) 171 | return None 172 | A = np.diag(-1*np.ones(dim)) 173 | rng = np.arange(dim-1) 174 | A[rng, rng+1] = 1 175 | A[dim-1,:] = 0 176 | return A 177 | 178 | 179 | def solve_fused_lasso_regression(samples, labels, S): 180 | # for the sake of clarity 181 | A = cvx.Constant(samples) 182 | no_dimensions = samples.shape[1] 183 | beta = cvx.Variable(no_dimensions) 184 | # careful: the band matrix can get large very fast if dimension is high 185 | # D = get_band_matrix_fused_lasso(no_dimensions) 186 | regularization = beta[1:] - beta[:no_dimensions - 1] 187 | objective = cvx.Minimize(cvx.sum_squares(A*beta - labels)) 188 | # the constraint is the sum of the (absolute) differences of the neighbored betas to be bounded by S 189 | # constraints = [cvx.sum(cvx.abs(D*beta)) <= S] 190 | constraints = [cvx.sum(cvx.abs(regularization)) <= S] 191 | problem = cvx.Problem(objective, constraints) 192 | problem.solve() 193 | return beta.value 194 | 195 | 196 | def lemna_parallel(perturbation_data, perturbation_labels, K, alpha_S, iterations, no_processes, linreg_type, 197 | repetitions=1, verbose=False, save_path=None): 198 | assert len(perturbation_data) == len(perturbation_labels) 199 | no_samples = len(perturbation_data) * repetitions 200 | # repeat each perturbation repetitions times for parallel processing 201 | perturbations_repeated = [] 202 | for p in perturbation_data: 203 | perturbations_repeated += [p]*repetitions 204 | labels_repeated = np.repeat(perturbation_labels, repetitions, axis=0) 205 | if save_path: 206 | filenames = np.array([save_path+str(i) for i in range(len(perturbation_data))]) 207 | filenames = np.repeat(filenames, repetitions) 208 | else: 209 | filenames = no_samples*[None] 210 | arg_gen = zip(perturbations_repeated, labels_repeated, no_samples*[K], no_samples*[alpha_S], no_samples*[iterations], 211 | no_samples*[linreg_type], no_samples*[verbose], filenames) 212 | with multiprocessing.Pool(processes=no_processes) as pool: 213 | lemna_betas = pool.starmap(em_regression_algorithm, arg_gen) 214 | if type(perturbation_data) is list: 215 | betas = [lemna_beta[0] for lemna_beta in lemna_betas] 216 | else: 217 | betas = np.array([lemna_beta[0] for lemna_beta in lemna_betas]).reshape((len(perturbation_data), repetitions, 218 | perturbation_data.shape[-1])) 219 | sigmas = np.array([lemna_beta[1] for lemna_beta in lemna_betas]).reshape((len(perturbation_data), repetitions)) 220 | return betas, sigmas 221 | 222 | 223 | if __name__ == '__main__': 224 | parser = argparse.ArgumentParser(description='Implementation of the Lemna algorithm.') 225 | parser.add_argument('data_path', type=str, 226 | help='Path to list (.pkl) containing perturbations of shape (no_perturbations, no_features).') 227 | parser.add_argument('label_path', type=str, 228 | help='Path to array containing labels of shape (no_samples, no_perturbations). Labels are' 229 | ' assumed to be binary (0/1).') 230 | parser.add_argument('save_path', type=str, help='Runtime, mse and more is documented in a file for each sample.') 231 | parser.add_argument('K', type=int, help='K parameter (number of components) of the algorithm.') 232 | parser.add_argument('linreg_type', nargs=2, 233 | help='Lasso for linear regression with L1 regularization, fused_lasso for fused lasso. Second' 234 | 'parameter is alpha for lasso and S for fused_lasso.') 235 | parser.add_argument('iterations', type=int, help='Number of maximum iterations during EM Algorithm.') 236 | parser.add_argument('repetitions', type=int, help='Number of repetitions of EM Algorithm as its' 237 | 'output is not deterministic.') 238 | parser.add_argument('--no_processes', type=int, default=2, help='Number of processes for running parallel.') 239 | parser.add_argument('--verbose', type=int, default=0, help='Detailed output of EM algorithm.') 240 | args = vars(parser.parse_args()) 241 | for k,v in args.items(): 242 | print('{} = {}'.format(k, v)) 243 | if args['data_path'].split('.')[-1] == 'npy': 244 | data = np.load(args['data_path']) 245 | elif args['data_path'].split('.')[-1] == 'pkl': 246 | data = pkl.load(open(args['data_path'], 'rb')) 247 | else: 248 | print('Data format was not understood. Data could not be loaded.') 249 | sys.exit(1) 250 | labels = np.load(args['label_path']) 251 | betas, sigmas = lemna_parallel(data, labels, args['K'], float(args['linreg_type'][1]), args['iterations'], 252 | args['no_processes'], args['linreg_type'][0], repetitions=args['repetitions'], 253 | verbose=bool(args['verbose']), save_path=args['save_path'] 254 | ) 255 | if type(betas) is list: 256 | pkl.dump(betas, open(args['save_path'] + 'K=%d_S=%.4f_betas.pkl'%(args['K'], float(args['linreg_type'][1])), 'wb')) 257 | else: 258 | np.save(args['save_path'] + 'K=%d_S=%.4f_betas.npy'%(args['K'], float(args['linreg_type'][1])), betas) 259 | np.save(args['save_path'] + 'K=%d_S=%.4f_sigmas.npy' % (args['K'], float(args['linreg_type'][1])), sigmas) 260 | -------------------------------------------------------------------------------- /Explanation/Lime.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle as pkl 3 | import argparse 4 | import time 5 | import sys 6 | from sklearn.linear_model import Ridge 7 | from tqdm import tqdm 8 | from scipy.spatial.distance import cosine 9 | from lemna_postprocessing_scripts.relevances_to_linreg import linreg_relevances_to_vector_space 10 | from scipy.sparse import load_npz 11 | 12 | 13 | # returns l^2 weight for two points 14 | def get_weight(x,y,sigma=1.0): 15 | dist = cosine(x,y) 16 | return np.exp(-dist/sigma) 17 | 18 | 19 | # calculates lime weights for each feature 20 | # perturbations is a 2d numpy array where the first row corresponds to the original sample 21 | # labels is a 1d numpy array containing the labels for the perturbations and the original sample is supposed to be 22 | # given label 1 by the classifier always. (this way, positive relevances will always speak _for_ the original 23 | # classification of the classifier) 24 | def get_lime_weights(perturbations, labels, random_state): 25 | assert perturbations.shape[0] == labels.shape[0] 26 | model_regressor = Ridge(alpha=1, fit_intercept=True, random_state=random_state) 27 | weights = np.array([get_weight(perturbations[0], y) for y in perturbations]) 28 | model_regressor.fit(perturbations, labels, sample_weight=weights) 29 | return model_regressor.coef_ 30 | 31 | 32 | # calculates lime weights for several perturbations and labels 33 | # perturbations is a list where each list entry is a 2d numpy array suitable for get_lime_weights 34 | # labels is a 2d numpy array with shape (no_samples, no_perturbations) 35 | def explain_samples(perturbations, labels, random_state=None): 36 | relevances = [] 37 | start_time = time.time() 38 | for p, l in tqdm(zip(perturbations, labels), total=len(perturbations)): 39 | w = get_lime_weights(p, l, random_state) 40 | relevances.append(w) 41 | end_time = time.time() 42 | print('Calculation of {} relevances took on {} seconds ({} seconds per sample).'.format(len(perturbations), 43 | end_time-start_time, 44 | (end_time-start_time)/ 45 | len(perturbations), 46 | )) 47 | return relevances 48 | 49 | if __name__ == '__main__': 50 | parser = argparse.ArgumentParser(description='Implementation of the LIME algorithm.') 51 | parser.add_argument('perturbation_path', type=str, 52 | help='Path to list (.pkl) of perturbations for data of shape (no_perturbations, no_features).') 53 | parser.add_argument('label_path', type=str, 54 | help='Path to array containing labels of perturbations of shape (no_samples, no_perturbations).' 55 | 'Labels are assumed to be binary (0/1).') 56 | parser.add_argument('save_path', type=str, help='Folder to save results.') 57 | parser.add_argument('--data_path', type=str, help='Path to data. Can be .npy, .npz, .pkl (sparse,numpy,list)') 58 | args = parser.parse_args() 59 | perturbations = pkl.load(open(args.perturbation_path, 'rb')) 60 | labels = np.load(args.label_path) 61 | if args.data_path: 62 | is_sparse = args.data_path.split('.')[-1] == 'npz' 63 | if args.data_path.split('.')[-1] == 'npy': 64 | data = np.load(args.data_path) 65 | elif args.data_path.split('.')[-1] == 'pkl': 66 | data = pkl.load(open(args.data_path, 'rb')) 67 | elif args.data_path.split('.')[-1] == 'npz': 68 | data = load_npz(args.data_path) 69 | else: 70 | print('Data format was not understood. Data could not be loaded.') 71 | sys.exit(1) 72 | rels = explain_samples(perturbations, labels) 73 | if args.data_path: 74 | linreg_relevances_to_vector_space(rels, data, args.save_path, is_sparse) 75 | else: 76 | pkl.dump(rels, open(args.save_path+'relevances_lime.pkl', 'wb')) 77 | -------------------------------------------------------------------------------- /Explanation/README.md: -------------------------------------------------------------------------------- 1 | # Explanations 2 | 3 | This folder contains scripts to create explanations for the network architectures using LIME ([Ribeiro et al.](https://www.kdd.org/kdd2016/papers/files/rfp0573-ribeiroA.pdf)) and LEMNA ([Guo et al.](http://people.cs.vt.edu/gangwang/ccs18.pdf)). Based on the finding of our paper we do not recommend to use these methods but [white-box methods](https://github.com/albermax/innvestigate) instead. Still, we want to publish our implementations for the sake of open access. 4 | 5 | * The input data for usage of this repository can be of three types: numpy array of shape (n_samples, n_features) like in Mimicus or VulDeePecker, scipy.sparse.csr_matrix of shape (n_samples, n_features) like in Drebin or a list of length n_samples where each entry in the list is a numpy array of different length like in DAMD, for example. 6 | * To use LIME or LEMNA you firstly need perturbations of the data. You can call `python3 perturbation_sampling.py --help` to find out how to generate them. 7 | * With the perturbations you can calculate relevances for the features your models use. Check `python3 Lemna.py --help` or `python3 Lime.py --help` to find out how. -------------------------------------------------------------------------------- /Explanation/perturbation_sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | import pickle as pkl 4 | import sys 5 | import os 6 | import argparse 7 | from tqdm import tqdm 8 | from keras.models import load_model 9 | 10 | sys.path.append('../utils/') 11 | from utils import load_npy_npz_pkl 12 | 13 | 14 | # samples perturbations from a data_sample (vector) by choosing a random number of random features from the original 15 | # sample and setting the non-chosen features to 0 16 | def sample_data_points(data_sample, no_samples): 17 | if type(data_sample).__module__ == 'scipy.sparse.csr': 18 | is_sparse = True 19 | # in sparse case we remember all row and column indices of the perturbations and create the matrix at one point 20 | nonzero_row_indices, nonzero_column_indices = [], [] 21 | else: 22 | is_sparse = False 23 | # in the non-sparse case we save the perturbation directly after creation 24 | samples = np.zeros(shape=(no_samples,) + data_sample.shape, dtype=data_sample.dtype) 25 | non_zeros = np.nonzero(data_sample) 26 | # if data_sample is a (sparse) vector each nonzero index appears exactly once in the non_zeros 27 | if is_sparse: 28 | no_nonzero_entries = len(non_zeros[0]) 29 | sampling_values = non_zeros[1] 30 | elif len(data_sample.shape) < 2: 31 | no_nonzero_entries = len(non_zeros[0]) 32 | sampling_values = non_zeros[0] 33 | # if data sample is a vector of vectors, each nonzero index appears multiple times for each vector 34 | else: 35 | no_nonzero_entries = len(np.unique(non_zeros[0])) 36 | sampling_values = np.unique(non_zeros[0]) 37 | for i in range(no_samples): 38 | # the first row contains the original sample 39 | if i == 0: 40 | no_samples_indices = no_nonzero_entries 41 | else: 42 | # how many entries are we going to draw (at least one!) 43 | no_samples_indices = np.random.randint(1, no_nonzero_entries + 1) 44 | # which samples are we actually drawing 45 | if is_sparse: 46 | sample_indices = np.random.choice(non_zeros[1], no_samples_indices, replace=False) 47 | nonzero_row_indices += [i] * no_samples_indices 48 | nonzero_column_indices += list(sample_indices) 49 | continue 50 | else: 51 | sample_indices = np.random.choice(sampling_values, no_samples_indices, replace=False) 52 | perturbed_data = np.zeros(shape=data_sample.shape) 53 | perturbed_data[sample_indices] = data_sample[sample_indices] 54 | samples[i][:] = perturbed_data 55 | if is_sparse: 56 | data = [1] * len(nonzero_row_indices) 57 | samples = sparse.csr_matrix((data, (nonzero_row_indices, nonzero_column_indices)), 58 | shape=(no_samples, data_sample.shape[1]), dtype=data_sample.dtype) 59 | return samples 60 | 61 | 62 | # given this method creates 'no_samples' perturbations of each datapoint in data. Careful, perturbations can easily 63 | # become very big in memory. Pre calculate if no_samples*data fits into memory. 64 | def get_pertubations(data, no_samples): 65 | perturbations = [] 66 | if type(data).__module__ in ['scipy.sparse.csr', 'numpy']: 67 | total = data.shape[0] 68 | else: 69 | total = len(data) 70 | print('Sampling data points...') 71 | for data_sample in tqdm(data, total=total): 72 | perturbations.append(sample_data_points(data_sample, no_samples)) 73 | # if all data points have the same shape, save one big numpy array 74 | if type(data).__module__ == 'scipy.sparse.csr': 75 | perturbations = sparse.vstack(perturbations) 76 | return perturbations 77 | 78 | 79 | # classifies a batch of perturbation data. We assume that samples are of shape (no_perturbations, sample_dimension) 80 | # and that the model can predict this sort of data 81 | def get_classification(model, samples, batch_size=500): 82 | # if samples is list we assume that each sample has a different shape and we have to classify sample-wise 83 | if type(samples) is list: 84 | labels = [] 85 | for sample in samples: 86 | labels.append(np.argmax(model.predict(sample.reshape((1,)+sample.shape)), axis=1)) 87 | labels = np.array(labels).reshape((len(samples),)) 88 | # else we can just predict the entire sample set 89 | else: 90 | labels = np.argmax(model.predict(samples, batch_size=batch_size), axis=1) 91 | labels = labels.astype(np.uint8) 92 | # we take care that label 1 is the label the classifier assigns to the sample (which is in row 0 of the 93 | # perturbations) and 0 is the one of differently classified perturbations 94 | classifier_label = labels[0] 95 | targets = np.where(labels == classifier_label) 96 | nontargets = np.where(labels != classifier_label) 97 | labels[targets] = 1 98 | labels[nontargets] = 0 99 | return labels 100 | 101 | 102 | # transforms a tuple of non zero indices (like output of scipy.sparse.nonzero or np.nonzero) to suited representation 103 | # for linear regression. Assumes original sample in the first row 104 | def perturbation_block_to_regression_sample(nonzero_tuple): 105 | nonzero_indices_rows, nonzero_indices_columns = nonzero_tuple[0], nonzero_tuple[1] 106 | nonzero_samples_indices = nonzero_indices_columns[np.where(nonzero_indices_rows==0)] 107 | feature_size = len(np.unique(nonzero_samples_indices)) 108 | no_samples = len(np.unique(nonzero_indices_rows)) 109 | orig_idx_2_reg_idx = dict(zip(np.unique(nonzero_samples_indices), range(feature_size))) 110 | linreg_block = np.zeros(shape=(no_samples, feature_size), dtype=np.uint8) 111 | for row_no in np.unique(nonzero_indices_rows): 112 | nonzero_entries = nonzero_indices_columns[np.where(nonzero_indices_rows == row_no)[0]] 113 | reg_indices = [orig_idx_2_reg_idx[idx] for idx in np.unique(nonzero_entries)] 114 | linreg_block[row_no, reg_indices] = 1 115 | return linreg_block 116 | 117 | 118 | # takes (test) data and computes perturbations and the labels of the perturbations aswell as a linear representation of 119 | # the perturbation data. Delete specifies if the non-selected features of the perturbations will be deleted 120 | # or (if false) set to zero. 121 | def perturbation_pipeline(data, model, no_perturbations_per_sample, save_path, save_perturbations, delete): 122 | seed = 40 123 | np.random.seed(seed) 124 | no_samples = data.shape[0] if not type(data) is list else len(data) 125 | all_labels, all_linregs, all_perturbations = [], [], [] 126 | print('Computing perturbations for {} samples ...'.format(no_samples)) 127 | for data_sample in tqdm(data): 128 | if len(data_sample.shape) < 3 and delete: 129 | print('Error. Delete = 1 is only allowed for sequential data!') 130 | sys.exit(1) 131 | perturbations = sample_data_points(data_sample, no_perturbations_per_sample) 132 | if delete: 133 | perturbations_deleted = [] 134 | for i in range(perturbations.shape[0]): 135 | zero_vectors = np.array([(x == 0).all() for x in perturbations[i]]) 136 | indices_to_delete = np.where(zero_vectors > 0)[0] 137 | perturbations_deleted.append(np.delete(perturbations[i], indices_to_delete, axis=0)) 138 | labels = get_classification(model, perturbations_deleted) 139 | else: 140 | labels = get_classification(model, perturbations) 141 | all_labels.append(labels) 142 | all_linregs.append(perturbation_block_to_regression_sample(perturbations.nonzero())) 143 | if save_perturbations: 144 | all_perturbations.append(perturbations) 145 | np.save(os.path.join(save_path, 'perturbation_labels_seed_{}.npy'.format(seed)), np.array(all_labels)) 146 | pkl.dump(all_linregs, open(os.path.join(save_path, 'linreg_representations_seed_{}.pkl'.format(seed)), 'wb')) 147 | if save_perturbations: 148 | if type(data) is list: 149 | pkl.dump(all_perturbations, open(os.path.join(save_path, 'perturbation_data_seed_{}.pkl'.format(seed)), 'wb')) 150 | elif type(data).__module__ == 'scipy.sparse.csr': 151 | sparse.save_npz(os.path.join(save_path, 'perturbation_data_seed_{}.npz'.format(seed)), sparse.vstack(all_perturbations)) 152 | else: 153 | np.save(os.path.join(save_path, 'perturbation_data_seed_{}.npy'.format(seed)), np.array(all_perturbations)) 154 | 155 | 156 | if __name__ == '__main__': 157 | parser = argparse.ArgumentParser(description='Perturbation sampling process for lemna algorithm.') 158 | parser.add_argument('data_path', type=str, help='Path to data structure containing data samples.') 159 | parser.add_argument('model_path', type=str, help='Path to a keras model (*.hdf5) that can be loaded with model.load().') 160 | parser.add_argument('save_path', type=str, help='Where to store the results.') 161 | parser.add_argument('no_perturbations', type=int, help='How many perturbations of each sample will be created.') 162 | parser.add_argument('--save_perturbations', type=int, default=0, help='If 1 the real perturbations (not only the' 163 | 'binary representation of them) will be saved. This can be useful' 164 | 'for debugging but can use a lot of memory.') 165 | parser.add_argument('--delete', type=int, default=0, help='If 1, features that are not selected for a perturbation' 166 | 'will be deleted from the sample instead of setting them' 167 | 'to zero.') 168 | args = vars(parser.parse_args()) 169 | for k,v in args.items(): 170 | print('{} = {}'.format(k, v)) 171 | data = load_npy_npz_pkl(args['data_path']) 172 | model = load_model(args['model_path']) 173 | perturbation_pipeline(data, model, args['no_perturbations'], args['save_path'], bool(args['save_perturbations']), 174 | bool(args['delete'])) 175 | 176 | -------------------------------------------------------------------------------- /NetworkTraining/DAMD/DalvikOpcodes.txt: -------------------------------------------------------------------------------- 1 | nop 00 2 | move 01 3 | move/from16 02 4 | move/16 03 5 | move-wide 04 6 | move-wide/from16 05 7 | move-wide/16 06 8 | move-object 07 9 | move-object/from16 08 10 | move-object/16 09 11 | move-result 0a 12 | move-result-wide 0b 13 | move-result-object 0c 14 | move-exception 0d 15 | return-void 0e 16 | return 0f 17 | return-wide 10 18 | return-object 11 19 | const/4 12 20 | const/16 13 21 | const 14 22 | const/high16 15 23 | const-wide/16 16 24 | const-wide/32 17 25 | const-wide 18 26 | const-wide/high16 19 27 | const-string 1a 28 | const-string/jumbo 1b 29 | const-class 1c 30 | monitor-enter 1d 31 | monitor-exit 1e 32 | check-cast 1f 33 | instance-of 20 34 | array-length 21 35 | new-instance 22 36 | new-array 23 37 | filled-new-array 24 38 | filled-new-array/range 25 39 | fill-array-data 26 40 | throw 27 41 | goto 28 42 | goto/16 29 43 | goto/32 2a 44 | packed-switch 2b 45 | sparse-switch 2c 46 | cmpl-float 2d 47 | cmpg-float 2e 48 | cmpl-double 2f 49 | cmpg-double 30 50 | cmp-long 31 51 | if-eq 32 52 | if-ne 33 53 | if-lt 34 54 | if-ge 35 55 | if-gt 36 56 | if-le 37 57 | if-eqz 38 58 | if-nez 39 59 | if-ltz 3a 60 | if-gez 3b 61 | if-gtz 3c 62 | if-lez 3d 63 | aget 44 64 | aget-wide 45 65 | aget-object 46 66 | aget-boolean 47 67 | aget-byte 48 68 | aget-char 49 69 | aget-short 4a 70 | aput 4b 71 | aput-wide 4c 72 | aput-object 4d 73 | aput-boolean 4e 74 | aput-byte 4f 75 | aput-char 50 76 | aput-short 51 77 | iget 52 78 | iget-wide 53 79 | iget-object 54 80 | iget-boolean 55 81 | iget-byte 56 82 | iget-char 57 83 | iget-short 58 84 | iput 59 85 | iput-wide 5a 86 | iput-object 5b 87 | iput-boolean 5c 88 | iput-byte 5d 89 | iput-char 5e 90 | iput-short 5f 91 | sget 60 92 | sget-wide 61 93 | sget-object 62 94 | sget-boolean 63 95 | sget-byte 64 96 | sget-char 65 97 | sget-short 66 98 | sput 67 99 | sput-wide 68 100 | sput-object 69 101 | sput-boolean 6a 102 | sput-byte 6b 103 | sput-char 6c 104 | sput-short 6d 105 | invoke-virtual 6e 106 | invoke-super 6f 107 | invoke-direct 70 108 | invoke-static 71 109 | invoke-interface 72 110 | invoke-virtual/range 74 111 | invoke-super/range 75 112 | invoke-direct/range 76 113 | invoke-static/range 77 114 | invoke-interface/range 78 115 | neg-int 7b 116 | not-int 7c 117 | neg-long 7d 118 | not-long 7e 119 | neg-float 7f 120 | neg-double 80 121 | int-to-long 81 122 | int-to-float 82 123 | int-to-double 83 124 | long-to-int 84 125 | long-to-float 85 126 | long-to-double 86 127 | float-to-int 87 128 | float-to-long 88 129 | float-to-double 89 130 | double-to-int 8a 131 | double-to-long 8b 132 | double-to-float 8c 133 | int-to-byte 8d 134 | int-to-char 8e 135 | int-to-short 8f 136 | add-int 90 137 | sub-int 91 138 | mul-int 92 139 | div-int 93 140 | rem-int 94 141 | and-int 95 142 | or-int 96 143 | xor-int 97 144 | shl-int 98 145 | shr-int 99 146 | ushr-int 9a 147 | add-long 9b 148 | sub-long 9c 149 | mul-long 9d 150 | div-long 9e 151 | rem-long 9f 152 | and-long a0 153 | or-long a1 154 | xor-long a2 155 | shl-long a3 156 | shr-long a4 157 | ushr-long a5 158 | add-float a6 159 | sub-float a7 160 | mul-float a8 161 | div-float a9 162 | rem-float aa 163 | add-double ab 164 | sub-double ac 165 | mul-double ad 166 | div-double ae 167 | rem-double af 168 | add-int/2addr b0 169 | sub-int/2addr b1 170 | mul-int/2addr b2 171 | div-int/2addr b3 172 | rem-int/2addr b4 173 | and-int/2addr b5 174 | or-int/2addr b6 175 | xor-int/2addr b7 176 | shl-int/2addr b8 177 | shr-int/2addr b9 178 | ushr-int/2addr ba 179 | add-long/2addr bb 180 | sub-long/2addr bc 181 | mul-long/2addr bd 182 | div-long/2addr be 183 | rem-long/2addr bf 184 | and-long/2addr c0 185 | or-long/2addr c1 186 | xor-long/2addr c2 187 | shl-long/2addr c3 188 | shr-long/2addr c4 189 | ushr-long/2addr c5 190 | add-float/2addr c6 191 | sub-float/2addr c7 192 | mul-float/2addr c8 193 | div-float/2addr c9 194 | rem-float/2addr ca 195 | add-double/2addr cb 196 | sub-double/2addr cc 197 | mul-double/2addr cd 198 | div-double/2addr ce 199 | rem-double/2addr cf 200 | add-int/lit16 d0 201 | rsub-int d1 202 | mul-int/lit16 d2 203 | div-int/lit16 d3 204 | rem-int/lit16 d4 205 | and-int/lit16 d5 206 | or-int/lit16 d6 207 | xor-int/lit16 d7 208 | add-int/lit8 d8 209 | rsub-int/lit8 d9 210 | mul-int/lit8 da 211 | div-int/lit8 db 212 | rem-int/lit8 dc 213 | and-int/lit8 dd 214 | or-int/lit8 de 215 | xor-int/lit8 df 216 | shl-int/lit8 e0 217 | shr-int/lit8 e1 218 | ushr-int/lit8 e2 219 | -------------------------------------------------------------------------------- /NetworkTraining/DAMD/DalvikOpcodesDescription.json: -------------------------------------------------------------------------------- 1 | { 2 | "00": [ 3 | "00 10x", 4 | "nop", 5 | "", 6 | "Waste cycles.\nNote:\nData-bearing pseudo-instructions are tagged with this opcode, in which\ncase the high-order byte of the opcode unit indicates the nature of\nthe data. See \"packed-switch-payload Format\",\n\"sparse-switch-payload Format\", and\n\"fill-array-data-payload Format\" below." 7 | ], 8 | "01": [ 9 | "01 12x", 10 | "move vA, vB", 11 | "A: destination register (4 bits)\nB: source register (4 bits)", 12 | "Move the contents of one non-object register to another." 13 | ], 14 | "02": [ 15 | "02 22x", 16 | "move/from16 vAA, vBBBB", 17 | "A: destination register (8 bits)\nB: source register (16 bits)", 18 | "Move the contents of one non-object register to another." 19 | ], 20 | "03": [ 21 | "03 32x", 22 | "move/16 vAAAA, vBBBB", 23 | "A: destination register (16 bits)\nB: source register (16 bits)", 24 | "Move the contents of one non-object register to another." 25 | ], 26 | "04": [ 27 | "04 12x", 28 | "move-wide vA, vB", 29 | "A: destination register pair (4 bits)\nB: source register pair (4 bits)", 30 | "Move the contents of one register-pair to another.\nNote:\nIt is legal to move from vN to either\nvN-1 or vN+1, so implementations\nmust arrange for both halves of a register pair to be read before\nanything is written." 31 | ], 32 | "05": [ 33 | "05 22x", 34 | "move-wide/from16 vAA, vBBBB", 35 | "A: destination register pair (8 bits)\nB: source register pair (16 bits)", 36 | "Move the contents of one register-pair to another.\nNote:\nImplementation considerations are the same as move-wide,\nabove." 37 | ], 38 | "06": [ 39 | "06 32x", 40 | "move-wide/16 vAAAA, vBBBB", 41 | "A: destination register pair (16 bits)\nB: source register pair (16 bits)", 42 | "Move the contents of one register-pair to another.\nNote:\nImplementation considerations are the same as move-wide,\nabove." 43 | ], 44 | "07": [ 45 | "07 12x", 46 | "move-object vA, vB", 47 | "A: destination register (4 bits)\nB: source register (4 bits)", 48 | "Move the contents of one object-bearing register to another." 49 | ], 50 | "08": [ 51 | "08 22x", 52 | "move-object/from16 vAA, vBBBB", 53 | "A: destination register (8 bits)\nB: source register (16 bits)", 54 | "Move the contents of one object-bearing register to another." 55 | ], 56 | "09": [ 57 | "09 32x", 58 | "move-object/16 vAAAA, vBBBB", 59 | "A: destination register (16 bits)\nB: source register (16 bits)", 60 | "Move the contents of one object-bearing register to another." 61 | ], 62 | "0a": [ 63 | "0a 11x", 64 | "move-result vAA", 65 | "A: destination register (8 bits)", 66 | "Move the single-word non-object result of the most recent\ninvoke-kind into the indicated register.\nThis must be done as the instruction immediately after an\ninvoke-kind whose (single-word, non-object) result\nis not to be ignored; anywhere else is invalid." 67 | ], 68 | "0b": [ 69 | "0b 11x", 70 | "move-result-wide vAA", 71 | "A: destination register pair (8 bits)", 72 | "Move the double-word result of the most recent\ninvoke-kind into the indicated register pair.\nThis must be done as the instruction immediately after an\ninvoke-kind whose (double-word) result\nis not to be ignored; anywhere else is invalid." 73 | ], 74 | "0c": [ 75 | "0c 11x", 76 | "move-result-object vAA", 77 | "A: destination register (8 bits)", 78 | "Move the object result of the most recent invoke-kind\ninto the indicated register. This must be done as the instruction\nimmediately after an invoke-kind or\nfilled-new-array\nwhose (object) result is not to be ignored; anywhere else is invalid." 79 | ], 80 | "0d": [ 81 | "0d 11x", 82 | "move-exception vAA", 83 | "A: destination register (8 bits)", 84 | "Save a just-caught exception into the given register. This must\nbe the first instruction of any exception handler whose caught\nexception is not to be ignored, and this instruction must only\never occur as the first instruction of an exception handler; anywhere\nelse is invalid." 85 | ], 86 | "0e": [ 87 | "0e 10x", 88 | "return-void", 89 | "", 90 | "Return from a void method." 91 | ], 92 | "0f": [ 93 | "0f 11x", 94 | "return vAA", 95 | "A: return value register (8 bits)", 96 | "Return from a single-width (32-bit) non-object value-returning\nmethod." 97 | ], 98 | "10": [ 99 | "10 11x", 100 | "return-wide vAA", 101 | "A: return value register-pair (8 bits)", 102 | "Return from a double-width (64-bit) value-returning method." 103 | ], 104 | "11": [ 105 | "11 11x", 106 | "return-object vAA", 107 | "A: return value register (8 bits)", 108 | "Return from an object-returning method." 109 | ], 110 | "12": [ 111 | "12 11n", 112 | "const/4 vA, #+B", 113 | "A: destination register (4 bits)\nB: signed int (4 bits)", 114 | "Move the given literal value (sign-extended to 32 bits) into\nthe specified register." 115 | ], 116 | "13": [ 117 | "13 21s", 118 | "const/16 vAA, #+BBBB", 119 | "A: destination register (8 bits)\nB: signed int (16 bits)", 120 | "Move the given literal value (sign-extended to 32 bits) into\nthe specified register." 121 | ], 122 | "14": [ 123 | "14 31i", 124 | "const vAA, #+BBBBBBBB", 125 | "A: destination register (8 bits)\nB: arbitrary 32-bit constant", 126 | "Move the given literal value into the specified register." 127 | ], 128 | "15": [ 129 | "15 21h", 130 | "const/high16 vAA, #+BBBB0000", 131 | "A: destination register (8 bits)\nB: signed int (16 bits)", 132 | "Move the given literal value (right-zero-extended to 32 bits) into\nthe specified register." 133 | ], 134 | "16": [ 135 | "16 21s", 136 | "const-wide/16 vAA, #+BBBB", 137 | "A: destination register (8 bits)\nB: signed int (16 bits)", 138 | "Move the given literal value (sign-extended to 64 bits) into\nthe specified register-pair." 139 | ], 140 | "17": [ 141 | "17 31i", 142 | "const-wide/32 vAA, #+BBBBBBBB", 143 | "A: destination register (8 bits)\nB: signed int (32 bits)", 144 | "Move the given literal value (sign-extended to 64 bits) into\nthe specified register-pair." 145 | ], 146 | "18": [ 147 | "18 51l", 148 | "const-wide vAA, #+BBBBBBBBBBBBBBBB", 149 | "A: destination register (8 bits)\nB: arbitrary double-width (64-bit) constant", 150 | "Move the given literal value into\nthe specified register-pair." 151 | ], 152 | "19": [ 153 | "19 21h", 154 | "const-wide/high16 vAA, #+BBBB000000000000", 155 | "A: destination register (8 bits)\nB: signed int (16 bits)", 156 | "Move the given literal value (right-zero-extended to 64 bits) into\nthe specified register-pair." 157 | ], 158 | "1a": [ 159 | "1a 21c", 160 | "const-string vAA, string@BBBB", 161 | "A: destination register (8 bits)\nB: string index", 162 | "Move a reference to the string specified by the given index into the\nspecified register." 163 | ], 164 | "1b": [ 165 | "1b 31c", 166 | "const-string/jumbo vAA, string@BBBBBBBB", 167 | "A: destination register (8 bits)\nB: string index", 168 | "Move a reference to the string specified by the given index into the\nspecified register." 169 | ], 170 | "1c": [ 171 | "1c 21c", 172 | "const-class vAA, type@BBBB", 173 | "A: destination register (8 bits)\nB: type index", 174 | "Move a reference to the class specified by the given index into the\nspecified register. In the case where the indicated type is primitive,\nthis will store a reference to the primitive type's degenerate\nclass." 175 | ], 176 | "1d": [ 177 | "1d 11x", 178 | "monitor-enter vAA", 179 | "A: reference-bearing register (8 bits)", 180 | "Acquire the monitor for the indicated object." 181 | ], 182 | "1e": [ 183 | "1e 11x", 184 | "monitor-exit vAA", 185 | "A: reference-bearing register (8 bits)", 186 | "Release the monitor for the indicated object.\nNote:\nIf this instruction needs to throw an exception, it must do\nso as if the pc has already advanced past the instruction.\nIt may be useful to think of this as the instruction successfully\nexecuting (in a sense), and the exception getting thrown after\nthe instruction but before the next one gets a chance to\nrun. This definition makes it possible for a method to use\na monitor cleanup catch-all (e.g., finally) block as\nthe monitor cleanup for that block itself, as a way to handle the\narbitrary exceptions that might get thrown due to the historical\nimplementation of Thread.stop(), while still managing\nto have proper monitor hygiene." 187 | ], 188 | "1f": [ 189 | "1f 21c", 190 | "check-cast vAA, type@BBBB", 191 | "A: reference-bearing register (8 bits)\nB: type index (16 bits)", 192 | "Throw a ClassCastException if the reference in the\ngiven register cannot be cast to the indicated type.\nNote: Since A must always be a reference\n(and not a primitive value), this will necessarily fail at runtime\n(that is, it will throw an exception) if B refers to a\nprimitive type." 193 | ], 194 | "20": [ 195 | "20 22c", 196 | "instance-of vA, vB, type@CCCC", 197 | "A: destination register (4 bits)\nB: reference-bearing register (4 bits)\nC: type index (16 bits)", 198 | "Store in the given destination register 1\nif the indicated reference is an instance of the given type,\nor 0 if not.\nNote: Since B must always be a reference\n(and not a primitive value), this will always result\nin 0 being stored if C refers to a primitive\ntype." 199 | ], 200 | "21": [ 201 | "21 12x", 202 | "array-length vA, vB", 203 | "A: destination register (4 bits)\nB: array reference-bearing register (4 bits)", 204 | "Store in the given destination register the length of the indicated\narray, in entries" 205 | ], 206 | "22": [ 207 | "22 21c", 208 | "new-instance vAA, type@BBBB", 209 | "A: destination register (8 bits)\nB: type index", 210 | "Construct a new instance of the indicated type, storing a\nreference to it in the destination. The type must refer to a\nnon-array class." 211 | ], 212 | "23": [ 213 | "23 22c", 214 | "new-array vA, vB, type@CCCC", 215 | "A: destination register (4 bits)\nB: size register\nC: type index", 216 | "Construct a new array of the indicated type and size. The type\nmust be an array type." 217 | ], 218 | "24": [ 219 | "24 35c", 220 | "filled-new-array {vC, vD, vE, vF, vG}, type@BBBB", 221 | "A: array size and argument word count (4 bits)\nB: type index (16 bits)\nC..G: argument registers (4 bits each)", 222 | "Construct an array of the given type and size, filling it with the\nsupplied contents. The type must be an array type. The array's\ncontents must be single-word (that is,\nno arrays of long or double, but reference\ntypes are acceptable). The constructed\ninstance is stored as a \"result\" in the same way that the method invocation\ninstructions store their results, so the constructed instance must\nbe moved to a register with an immediately subsequent\nmove-result-object instruction (if it is to be used)." 223 | ], 224 | "25": [ 225 | "25 3rc", 226 | "filled-new-array/range {vCCCC .. vNNNN}, type@BBBB", 227 | "A: array size and argument word count (8 bits)\nB: type index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1", 228 | "Construct an array of the given type and size, filling it with\nthe supplied contents. Clarifications and restrictions are the same\nas filled-new-array, described above." 229 | ], 230 | "26": [ 231 | "26 31t", 232 | "fill-array-data vAA, +BBBBBBBB (with supplemental data as specified\nbelow in \"fill-array-data-payload Format\")", 233 | "A: array reference (8 bits)\nB: signed \"branch\" offset to table data pseudo-instruction\n(32 bits)", 234 | "Fill the given array with the indicated data. The reference must be\nto an array of primitives, and the data table must match it in type and\nmust contain no more elements than will fit in the array. That is,\nthe array may be larger than the table, and if so, only the initial\nelements of the array are set, leaving the remainder alone." 235 | ], 236 | "27": [ 237 | "27 11x", 238 | "throw vAA", 239 | "A: exception-bearing register (8 bits)", 240 | "Throw the indicated exception." 241 | ], 242 | "28": [ 243 | "28 10t", 244 | "goto +AA", 245 | "A: signed branch offset (8 bits)", 246 | "Unconditionally jump to the indicated instruction.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either with goto/32 or\nby including a nop as a target before the branch.)" 247 | ], 248 | "29": [ 249 | "29 20t", 250 | "goto/16 +AAAA", 251 | "A: signed branch offset (16 bits)", 252 | "Unconditionally jump to the indicated instruction.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either with goto/32 or\nby including a nop as a target before the branch.)" 253 | ], 254 | "2a": [ 255 | "2a 30t", 256 | "goto/32 +AAAAAAAA", 257 | "A: signed branch offset (32 bits)", 258 | "Unconditionally jump to the indicated instruction." 259 | ], 260 | "2b": [ 261 | "2b 31t", 262 | "packed-switch vAA, +BBBBBBBB (with supplemental data as\nspecified below in \"packed-switch-payload Format\")", 263 | "A: register to test\nB: signed \"branch\" offset to table data pseudo-instruction\n(32 bits)", 264 | "Jump to a new instruction based on the value in the\ngiven register, using a table of offsets corresponding to each value\nin a particular integral range, or fall through to the next\ninstruction if there is no match." 265 | ], 266 | "2c": [ 267 | "2c 31t", 268 | "sparse-switch vAA, +BBBBBBBB (with supplemental data as\nspecified below in \"sparse-switch-payload Format\")", 269 | "A: register to test\nB: signed \"branch\" offset to table data pseudo-instruction\n(32 bits)", 270 | "Jump to a new instruction based on the value in the given\nregister, using an ordered table of value-offset pairs, or fall\nthrough to the next instruction if there is no match." 271 | ], 272 | "2d": [ 273 | "2d..31 23x", 274 | "cmpl-float (lt bias) vAA, vBB, vCC", 275 | "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair", 276 | "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN." 277 | ], 278 | "2e": [ 279 | "2d..31 23x", 280 | "cmpg-float (gt bias) vAA, vBB, vCC", 281 | "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair", 282 | "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN." 283 | ], 284 | "2f": [ 285 | "2d..31 23x", 286 | "cmpl-double (lt bias) vAA, vBB, vCC", 287 | "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair", 288 | "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN." 289 | ], 290 | "30": [ 291 | "2d..31 23x", 292 | "cmpg-double (gt bias) vAA, vBB, vCC", 293 | "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair", 294 | "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN." 295 | ], 296 | "31": [ 297 | "2d..31 23x", 298 | "cmp-long vAA, vBB, vCC", 299 | "A: destination register (8 bits)\nB: first source register or pair\nC: second source register or pair", 300 | "Perform the indicated floating point or long comparison,\nsetting a to 0 if b == c,\n1 if b > c,\nor -1 if b < c.\nThe \"bias\" listed for the floating point operations\nindicates how NaN comparisons are treated: \"gt bias\"\ninstructions return 1 for NaN comparisons,\nand \"lt bias\" instructions return -1.\nFor example, to check to see if floating point\nx < y it is advisable to use\ncmpg-float; a result of -1 indicates that\nthe test was true, and the other values indicate it was false either\ndue to a valid comparison or because one of the values was\nNaN." 301 | ], 302 | "32": [ 303 | "32..37 22t", 304 | "if-eq vA, vB, +CCCC", 305 | "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)", 306 | "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 307 | ], 308 | "33": [ 309 | "32..37 22t", 310 | "if-ne vA, vB, +CCCC", 311 | "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)", 312 | "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 313 | ], 314 | "34": [ 315 | "32..37 22t", 316 | "if-lt vA, vB, +CCCC", 317 | "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)", 318 | "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 319 | ], 320 | "35": [ 321 | "32..37 22t", 322 | "if-ge vA, vB, +CCCC", 323 | "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)", 324 | "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 325 | ], 326 | "36": [ 327 | "32..37 22t", 328 | "if-gt vA, vB, +CCCC", 329 | "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)", 330 | "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 331 | ], 332 | "37": [ 333 | "32..37 22t", 334 | "if-le vA, vB, +CCCC", 335 | "A: first register to test (4 bits)\nB: second register to test (4 bits)\nC: signed branch offset (16 bits)", 336 | "Branch to the given destination if the given two registers' values\ncompare as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 337 | ], 338 | "38": [ 339 | "38..3d 21t", 340 | "if-eqz vAA, +BBBB", 341 | "A: register to test (8 bits)\nB: signed branch offset (16 bits)", 342 | "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 343 | ], 344 | "39": [ 345 | "38..3d 21t", 346 | "if-nez vAA, +BBBB", 347 | "A: register to test (8 bits)\nB: signed branch offset (16 bits)", 348 | "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 349 | ], 350 | "3a": [ 351 | "38..3d 21t", 352 | "if-ltz vAA, +BBBB", 353 | "A: register to test (8 bits)\nB: signed branch offset (16 bits)", 354 | "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 355 | ], 356 | "3b": [ 357 | "38..3d 21t", 358 | "if-gez vAA, +BBBB", 359 | "A: register to test (8 bits)\nB: signed branch offset (16 bits)", 360 | "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 361 | ], 362 | "3c": [ 363 | "38..3d 21t", 364 | "if-gtz vAA, +BBBB", 365 | "A: register to test (8 bits)\nB: signed branch offset (16 bits)", 366 | "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 367 | ], 368 | "3d": [ 369 | "38..3d 21t", 370 | "if-lez vAA, +BBBB", 371 | "A: register to test (8 bits)\nB: signed branch offset (16 bits)", 372 | "Branch to the given destination if the given register's value compares\nwith 0 as specified.\nNote:\nThe branch offset must not be 0. (A spin\nloop may be legally constructed either by branching around a\nbackward goto or by including a nop as\na target before the branch.)" 373 | ], 374 | "3e": [ 375 | "3e..43 10x", 376 | "(unused)", 377 | "", 378 | "(unused)" 379 | ], 380 | "3f": [ 381 | "3e..43 10x", 382 | "(unused)", 383 | "", 384 | "(unused)" 385 | ], 386 | "40": [ 387 | "3e..43 10x", 388 | "(unused)", 389 | "", 390 | "(unused)" 391 | ], 392 | "41": [ 393 | "3e..43 10x", 394 | "(unused)", 395 | "", 396 | "(unused)" 397 | ], 398 | "42": [ 399 | "3e..43 10x", 400 | "(unused)", 401 | "", 402 | "(unused)" 403 | ], 404 | "43": [ 405 | "3e..43 10x", 406 | "(unused)", 407 | "", 408 | "(unused)" 409 | ], 410 | "44": [ 411 | "44..51 23x", 412 | "aget vAA, vBB, vCC", 413 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 414 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 415 | ], 416 | "45": [ 417 | "44..51 23x", 418 | "aget-wide vAA, vBB, vCC", 419 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 420 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 421 | ], 422 | "46": [ 423 | "44..51 23x", 424 | "aget-object vAA, vBB, vCC", 425 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 426 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 427 | ], 428 | "47": [ 429 | "44..51 23x", 430 | "aget-boolean vAA, vBB, vCC", 431 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 432 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 433 | ], 434 | "48": [ 435 | "44..51 23x", 436 | "aget-byte vAA, vBB, vCC", 437 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 438 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 439 | ], 440 | "49": [ 441 | "44..51 23x", 442 | "aget-char vAA, vBB, vCC", 443 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 444 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 445 | ], 446 | "4a": [ 447 | "44..51 23x", 448 | "aget-short vAA, vBB, vCC", 449 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 450 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 451 | ], 452 | "4b": [ 453 | "44..51 23x", 454 | "aput vAA, vBB, vCC", 455 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 456 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 457 | ], 458 | "4c": [ 459 | "44..51 23x", 460 | "aput-wide vAA, vBB, vCC", 461 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 462 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 463 | ], 464 | "4d": [ 465 | "44..51 23x", 466 | "aput-object vAA, vBB, vCC", 467 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 468 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 469 | ], 470 | "4e": [ 471 | "44..51 23x", 472 | "aput-boolean vAA, vBB, vCC", 473 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 474 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 475 | ], 476 | "4f": [ 477 | "44..51 23x", 478 | "aput-byte vAA, vBB, vCC", 479 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 480 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 481 | ], 482 | "50": [ 483 | "44..51 23x", 484 | "aput-char vAA, vBB, vCC", 485 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 486 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 487 | ], 488 | "51": [ 489 | "44..51 23x", 490 | "aput-short vAA, vBB, vCC", 491 | "A: value register or pair; may be source or dest\n(8 bits)\nB: array register (8 bits)\nC: index register (8 bits)", 492 | "Perform the identified array operation at the identified index of\nthe given array, loading or storing into the value register." 493 | ], 494 | "52": [ 495 | "52..5f 22c", 496 | "iget vA, vB, field@CCCC", 497 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 498 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 499 | ], 500 | "53": [ 501 | "52..5f 22c", 502 | "iget-wide vA, vB, field@CCCC", 503 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 504 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 505 | ], 506 | "54": [ 507 | "52..5f 22c", 508 | "iget-object vA, vB, field@CCCC", 509 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 510 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 511 | ], 512 | "55": [ 513 | "52..5f 22c", 514 | "iget-boolean vA, vB, field@CCCC", 515 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 516 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 517 | ], 518 | "56": [ 519 | "52..5f 22c", 520 | "iget-byte vA, vB, field@CCCC", 521 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 522 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 523 | ], 524 | "57": [ 525 | "52..5f 22c", 526 | "iget-char vA, vB, field@CCCC", 527 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 528 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 529 | ], 530 | "58": [ 531 | "52..5f 22c", 532 | "iget-short vA, vB, field@CCCC", 533 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 534 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 535 | ], 536 | "59": [ 537 | "52..5f 22c", 538 | "iput vA, vB, field@CCCC", 539 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 540 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 541 | ], 542 | "5a": [ 543 | "52..5f 22c", 544 | "iput-wide vA, vB, field@CCCC", 545 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 546 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 547 | ], 548 | "5b": [ 549 | "52..5f 22c", 550 | "iput-object vA, vB, field@CCCC", 551 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 552 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 553 | ], 554 | "5c": [ 555 | "52..5f 22c", 556 | "iput-boolean vA, vB, field@CCCC", 557 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 558 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 559 | ], 560 | "5d": [ 561 | "52..5f 22c", 562 | "iput-byte vA, vB, field@CCCC", 563 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 564 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 565 | ], 566 | "5e": [ 567 | "52..5f 22c", 568 | "iput-char vA, vB, field@CCCC", 569 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 570 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 571 | ], 572 | "5f": [ 573 | "52..5f 22c", 574 | "iput-short vA, vB, field@CCCC", 575 | "A: value register or pair; may be source or dest\n(4 bits)\nB: object register (4 bits)\nC: instance field reference index (16 bits)", 576 | "Perform the identified object instance field operation with\nthe identified field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 577 | ], 578 | "60": [ 579 | "60..6d 21c", 580 | "sget vAA, field@BBBB", 581 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 582 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 583 | ], 584 | "61": [ 585 | "60..6d 21c", 586 | "sget-wide vAA, field@BBBB", 587 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 588 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 589 | ], 590 | "62": [ 591 | "60..6d 21c", 592 | "sget-object vAA, field@BBBB", 593 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 594 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 595 | ], 596 | "63": [ 597 | "60..6d 21c", 598 | "sget-boolean vAA, field@BBBB", 599 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 600 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 601 | ], 602 | "64": [ 603 | "60..6d 21c", 604 | "sget-byte vAA, field@BBBB", 605 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 606 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 607 | ], 608 | "65": [ 609 | "60..6d 21c", 610 | "sget-char vAA, field@BBBB", 611 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 612 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 613 | ], 614 | "66": [ 615 | "60..6d 21c", 616 | "sget-short vAA, field@BBBB", 617 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 618 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 619 | ], 620 | "67": [ 621 | "60..6d 21c", 622 | "sput vAA, field@BBBB", 623 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 624 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 625 | ], 626 | "68": [ 627 | "60..6d 21c", 628 | "sput-wide vAA, field@BBBB", 629 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 630 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 631 | ], 632 | "69": [ 633 | "60..6d 21c", 634 | "sput-object vAA, field@BBBB", 635 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 636 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 637 | ], 638 | "6a": [ 639 | "60..6d 21c", 640 | "sput-boolean vAA, field@BBBB", 641 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 642 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 643 | ], 644 | "6b": [ 645 | "60..6d 21c", 646 | "sput-byte vAA, field@BBBB", 647 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 648 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 649 | ], 650 | "6c": [ 651 | "60..6d 21c", 652 | "sput-char vAA, field@BBBB", 653 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 654 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 655 | ], 656 | "6d": [ 657 | "60..6d 21c", 658 | "sput-short vAA, field@BBBB", 659 | "A: value register or pair; may be source or dest\n(8 bits)\nB: static field reference index (16 bits)", 660 | "Perform the identified object static field operation with the identified\nstatic field, loading or storing into the value register.\nNote: These opcodes are reasonable candidates for static linking,\naltering the field argument to be a more direct offset." 661 | ], 662 | "6e": [ 663 | "6e..72 35c", 664 | "invoke-virtual {vC, vD, vE, vF, vG}, meth@BBBB", 665 | "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)", 666 | "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface. The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)." 667 | ], 668 | "6f": [ 669 | "6e..72 35c", 670 | "invoke-super {vC, vD, vE, vF, vG}, meth@BBBB", 671 | "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)", 672 | "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface. The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)." 673 | ], 674 | "70": [ 675 | "6e..72 35c", 676 | "invoke-direct {vC, vD, vE, vF, vG}, meth@BBBB", 677 | "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)", 678 | "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface. The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)." 679 | ], 680 | "71": [ 681 | "6e..72 35c", 682 | "invoke-static {vC, vD, vE, vF, vG}, meth@BBBB", 683 | "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)", 684 | "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface. The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)." 685 | ], 686 | "72": [ 687 | "6e..72 35c", 688 | "invoke-interface {vC, vD, vE, vF, vG}, meth@BBBB", 689 | "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC..G: argument registers (4 bits each)", 690 | "Call the indicated method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\ninvoke-virtual is used to invoke a normal virtual\nmethod (a method that is not private, static,\nor final, and is also not a constructor).\nWhen the method_id references a method of a non-interface\nclass, invoke-super is used to invoke the closest superclass's\nvirtual method (as opposed to the one with the same method_id\nin the calling class). The same method restrictions hold as for\ninvoke-virtual.\nIn Dex files version 037 or later, if the\nmethod_id refers to an interface method,\ninvoke-super is used to invoke the most specific,\nnon-overridden version of that method defined on that interface. The same\nmethod restrictions hold as for invoke-virtual. In Dex files\nprior to version 037, having an interface\nmethod_id is illegal and undefined.\ninvoke-direct is used to invoke a non-static\ndirect method (that is, an instance method that is by its nature\nnon-overridable, namely either a private instance method or a\nconstructor).\ninvoke-static is used to invoke a static\nmethod (which is always considered a direct method).\ninvoke-interface is used to invoke an\ninterface method, that is, on an object whose concrete\nclass isn't known, using a method_id that refers to\nan interface.\nNote: These opcodes are reasonable candidates for static linking,\naltering the method argument to be a more direct offset\n(or pair thereof)." 691 | ], 692 | "73": [ 693 | "73 10x", 694 | "(unused)", 695 | "", 696 | "(unused)" 697 | ], 698 | "74": [ 699 | "74..78 3rc", 700 | "invoke-virtual/range {vCCCC .. vNNNN}, meth@BBBB", 701 | "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1", 702 | "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions." 703 | ], 704 | "75": [ 705 | "74..78 3rc", 706 | "invoke-super/range {vCCCC .. vNNNN}, meth@BBBB", 707 | "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1", 708 | "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions." 709 | ], 710 | "76": [ 711 | "74..78 3rc", 712 | "invoke-direct/range {vCCCC .. vNNNN}, meth@BBBB", 713 | "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1", 714 | "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions." 715 | ], 716 | "77": [ 717 | "74..78 3rc", 718 | "invoke-static/range {vCCCC .. vNNNN}, meth@BBBB", 719 | "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1", 720 | "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions." 721 | ], 722 | "78": [ 723 | "74..78 3rc", 724 | "invoke-interface/range {vCCCC .. vNNNN}, meth@BBBB", 725 | "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: first argument register (16 bits)\nN = A + C - 1", 726 | "Call the indicated method. See first invoke-kind\ndescription above for details, caveats, and suggestions." 727 | ], 728 | "79": [ 729 | "79..7a 10x", 730 | "(unused)", 731 | "", 732 | "(unused)" 733 | ], 734 | "7a": [ 735 | "79..7a 10x", 736 | "(unused)", 737 | "", 738 | "(unused)" 739 | ], 740 | "7b": [ 741 | "7b..8f 12x", 742 | "neg-int vA, vB", 743 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 744 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 745 | ], 746 | "7c": [ 747 | "7b..8f 12x", 748 | "not-int vA, vB", 749 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 750 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 751 | ], 752 | "7d": [ 753 | "7b..8f 12x", 754 | "neg-long vA, vB", 755 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 756 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 757 | ], 758 | "7e": [ 759 | "7b..8f 12x", 760 | "not-long vA, vB", 761 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 762 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 763 | ], 764 | "7f": [ 765 | "7b..8f 12x", 766 | "neg-float vA, vB", 767 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 768 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 769 | ], 770 | "80": [ 771 | "7b..8f 12x", 772 | "neg-double vA, vB", 773 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 774 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 775 | ], 776 | "81": [ 777 | "7b..8f 12x", 778 | "int-to-long vA, vB", 779 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 780 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 781 | ], 782 | "82": [ 783 | "7b..8f 12x", 784 | "int-to-float vA, vB", 785 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 786 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 787 | ], 788 | "83": [ 789 | "7b..8f 12x", 790 | "int-to-double vA, vB", 791 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 792 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 793 | ], 794 | "84": [ 795 | "7b..8f 12x", 796 | "long-to-int vA, vB", 797 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 798 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 799 | ], 800 | "85": [ 801 | "7b..8f 12x", 802 | "long-to-float vA, vB", 803 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 804 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 805 | ], 806 | "86": [ 807 | "7b..8f 12x", 808 | "long-to-double vA, vB", 809 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 810 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 811 | ], 812 | "87": [ 813 | "7b..8f 12x", 814 | "float-to-int vA, vB", 815 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 816 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 817 | ], 818 | "88": [ 819 | "7b..8f 12x", 820 | "float-to-long vA, vB", 821 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 822 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 823 | ], 824 | "89": [ 825 | "7b..8f 12x", 826 | "float-to-double vA, vB", 827 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 828 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 829 | ], 830 | "8a": [ 831 | "7b..8f 12x", 832 | "double-to-int vA, vB", 833 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 834 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 835 | ], 836 | "8b": [ 837 | "7b..8f 12x", 838 | "double-to-long vA, vB", 839 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 840 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 841 | ], 842 | "8c": [ 843 | "7b..8f 12x", 844 | "double-to-float vA, vB", 845 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 846 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 847 | ], 848 | "8d": [ 849 | "7b..8f 12x", 850 | "int-to-byte vA, vB", 851 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 852 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 853 | ], 854 | "8e": [ 855 | "7b..8f 12x", 856 | "int-to-char vA, vB", 857 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 858 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 859 | ], 860 | "8f": [ 861 | "7b..8f 12x", 862 | "int-to-short vA, vB", 863 | "A: destination register or pair (4 bits)\nB: source register or pair (4 bits)", 864 | "Perform the identified unary operation on the source register,\nstoring the result in the destination register." 865 | ], 866 | "90": [ 867 | "90..af 23x", 868 | "add-int vAA, vBB, vCC", 869 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 870 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 871 | ], 872 | "91": [ 873 | "90..af 23x", 874 | "sub-int vAA, vBB, vCC", 875 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 876 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 877 | ], 878 | "92": [ 879 | "90..af 23x", 880 | "mul-int vAA, vBB, vCC", 881 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 882 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 883 | ], 884 | "93": [ 885 | "90..af 23x", 886 | "div-int vAA, vBB, vCC", 887 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 888 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 889 | ], 890 | "94": [ 891 | "90..af 23x", 892 | "rem-int vAA, vBB, vCC", 893 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 894 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 895 | ], 896 | "95": [ 897 | "90..af 23x", 898 | "and-int vAA, vBB, vCC", 899 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 900 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 901 | ], 902 | "96": [ 903 | "90..af 23x", 904 | "or-int vAA, vBB, vCC", 905 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 906 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 907 | ], 908 | "97": [ 909 | "90..af 23x", 910 | "xor-int vAA, vBB, vCC", 911 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 912 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 913 | ], 914 | "98": [ 915 | "90..af 23x", 916 | "shl-int vAA, vBB, vCC", 917 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 918 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 919 | ], 920 | "99": [ 921 | "90..af 23x", 922 | "shr-int vAA, vBB, vCC", 923 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 924 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 925 | ], 926 | "9a": [ 927 | "90..af 23x", 928 | "ushr-int vAA, vBB, vCC", 929 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 930 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 931 | ], 932 | "9b": [ 933 | "90..af 23x", 934 | "add-long vAA, vBB, vCC", 935 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 936 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 937 | ], 938 | "9c": [ 939 | "90..af 23x", 940 | "sub-long vAA, vBB, vCC", 941 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 942 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 943 | ], 944 | "9d": [ 945 | "90..af 23x", 946 | "mul-long vAA, vBB, vCC", 947 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 948 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 949 | ], 950 | "9e": [ 951 | "90..af 23x", 952 | "div-long vAA, vBB, vCC", 953 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 954 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 955 | ], 956 | "9f": [ 957 | "90..af 23x", 958 | "rem-long vAA, vBB, vCC", 959 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 960 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 961 | ], 962 | "a0": [ 963 | "90..af 23x", 964 | "and-long vAA, vBB, vCC", 965 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 966 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 967 | ], 968 | "a1": [ 969 | "90..af 23x", 970 | "or-long vAA, vBB, vCC", 971 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 972 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 973 | ], 974 | "a2": [ 975 | "90..af 23x", 976 | "xor-long vAA, vBB, vCC", 977 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 978 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 979 | ], 980 | "a3": [ 981 | "90..af 23x", 982 | "shl-long vAA, vBB, vCC", 983 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 984 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 985 | ], 986 | "a4": [ 987 | "90..af 23x", 988 | "shr-long vAA, vBB, vCC", 989 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 990 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 991 | ], 992 | "a5": [ 993 | "90..af 23x", 994 | "ushr-long vAA, vBB, vCC", 995 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 996 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 997 | ], 998 | "a6": [ 999 | "90..af 23x", 1000 | "add-float vAA, vBB, vCC", 1001 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1002 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1003 | ], 1004 | "a7": [ 1005 | "90..af 23x", 1006 | "sub-float vAA, vBB, vCC", 1007 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1008 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1009 | ], 1010 | "a8": [ 1011 | "90..af 23x", 1012 | "mul-float vAA, vBB, vCC", 1013 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1014 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1015 | ], 1016 | "a9": [ 1017 | "90..af 23x", 1018 | "div-float vAA, vBB, vCC", 1019 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1020 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1021 | ], 1022 | "aa": [ 1023 | "90..af 23x", 1024 | "rem-float vAA, vBB, vCC", 1025 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1026 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1027 | ], 1028 | "ab": [ 1029 | "90..af 23x", 1030 | "add-double vAA, vBB, vCC", 1031 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1032 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1033 | ], 1034 | "ac": [ 1035 | "90..af 23x", 1036 | "sub-double vAA, vBB, vCC", 1037 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1038 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1039 | ], 1040 | "ad": [ 1041 | "90..af 23x", 1042 | "mul-double vAA, vBB, vCC", 1043 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1044 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1045 | ], 1046 | "ae": [ 1047 | "90..af 23x", 1048 | "div-double vAA, vBB, vCC", 1049 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1050 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1051 | ], 1052 | "af": [ 1053 | "90..af 23x", 1054 | "rem-double vAA, vBB, vCC", 1055 | "A: destination register or pair (8 bits)\nB: first source register or pair (8 bits)\nC: second source register or pair (8 bits)", 1056 | "Perform the identified binary operation on the two source registers,\nstoring the result in the destination register.\nNote:\nContrary to other -long mathematical operations (which\ntake register pairs for both their first and their second source),\nshl-long, shr-long, and ushr-long\ntake a register pair for their first source (the value to be shifted),\nbut a single register for their second source (the shifting distance)." 1057 | ], 1058 | "b0": [ 1059 | "b0..cf 12x", 1060 | "add-int/2addr vA, vB", 1061 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1062 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1063 | ], 1064 | "b1": [ 1065 | "b0..cf 12x", 1066 | "sub-int/2addr vA, vB", 1067 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1068 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1069 | ], 1070 | "b2": [ 1071 | "b0..cf 12x", 1072 | "mul-int/2addr vA, vB", 1073 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1074 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1075 | ], 1076 | "b3": [ 1077 | "b0..cf 12x", 1078 | "div-int/2addr vA, vB", 1079 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1080 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1081 | ], 1082 | "b4": [ 1083 | "b0..cf 12x", 1084 | "rem-int/2addr vA, vB", 1085 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1086 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1087 | ], 1088 | "b5": [ 1089 | "b0..cf 12x", 1090 | "and-int/2addr vA, vB", 1091 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1092 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1093 | ], 1094 | "b6": [ 1095 | "b0..cf 12x", 1096 | "or-int/2addr vA, vB", 1097 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1098 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1099 | ], 1100 | "b7": [ 1101 | "b0..cf 12x", 1102 | "xor-int/2addr vA, vB", 1103 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1104 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1105 | ], 1106 | "b8": [ 1107 | "b0..cf 12x", 1108 | "shl-int/2addr vA, vB", 1109 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1110 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1111 | ], 1112 | "b9": [ 1113 | "b0..cf 12x", 1114 | "shr-int/2addr vA, vB", 1115 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1116 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1117 | ], 1118 | "ba": [ 1119 | "b0..cf 12x", 1120 | "ushr-int/2addr vA, vB", 1121 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1122 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1123 | ], 1124 | "bb": [ 1125 | "b0..cf 12x", 1126 | "add-long/2addr vA, vB", 1127 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1128 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1129 | ], 1130 | "bc": [ 1131 | "b0..cf 12x", 1132 | "sub-long/2addr vA, vB", 1133 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1134 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1135 | ], 1136 | "bd": [ 1137 | "b0..cf 12x", 1138 | "mul-long/2addr vA, vB", 1139 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1140 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1141 | ], 1142 | "be": [ 1143 | "b0..cf 12x", 1144 | "div-long/2addr vA, vB", 1145 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1146 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1147 | ], 1148 | "bf": [ 1149 | "b0..cf 12x", 1150 | "rem-long/2addr vA, vB", 1151 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1152 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1153 | ], 1154 | "c0": [ 1155 | "b0..cf 12x", 1156 | "and-long/2addr vA, vB", 1157 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1158 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1159 | ], 1160 | "c1": [ 1161 | "b0..cf 12x", 1162 | "or-long/2addr vA, vB", 1163 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1164 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1165 | ], 1166 | "c2": [ 1167 | "b0..cf 12x", 1168 | "xor-long/2addr vA, vB", 1169 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1170 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1171 | ], 1172 | "c3": [ 1173 | "b0..cf 12x", 1174 | "shl-long/2addr vA, vB", 1175 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1176 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1177 | ], 1178 | "c4": [ 1179 | "b0..cf 12x", 1180 | "shr-long/2addr vA, vB", 1181 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1182 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1183 | ], 1184 | "c5": [ 1185 | "b0..cf 12x", 1186 | "ushr-long/2addr vA, vB", 1187 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1188 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1189 | ], 1190 | "c6": [ 1191 | "b0..cf 12x", 1192 | "add-float/2addr vA, vB", 1193 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1194 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1195 | ], 1196 | "c7": [ 1197 | "b0..cf 12x", 1198 | "sub-float/2addr vA, vB", 1199 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1200 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1201 | ], 1202 | "c8": [ 1203 | "b0..cf 12x", 1204 | "mul-float/2addr vA, vB", 1205 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1206 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1207 | ], 1208 | "c9": [ 1209 | "b0..cf 12x", 1210 | "div-float/2addr vA, vB", 1211 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1212 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1213 | ], 1214 | "ca": [ 1215 | "b0..cf 12x", 1216 | "rem-float/2addr vA, vB", 1217 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1218 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1219 | ], 1220 | "cb": [ 1221 | "b0..cf 12x", 1222 | "add-double/2addr vA, vB", 1223 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1224 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1225 | ], 1226 | "cc": [ 1227 | "b0..cf 12x", 1228 | "sub-double/2addr vA, vB", 1229 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1230 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1231 | ], 1232 | "cd": [ 1233 | "b0..cf 12x", 1234 | "mul-double/2addr vA, vB", 1235 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1236 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1237 | ], 1238 | "ce": [ 1239 | "b0..cf 12x", 1240 | "div-double/2addr vA, vB", 1241 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1242 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1243 | ], 1244 | "cf": [ 1245 | "b0..cf 12x", 1246 | "rem-double/2addr vA, vB", 1247 | "A: destination and first source register or pair\n(4 bits)\nB: second source register or pair (4 bits)", 1248 | "Perform the identified binary operation on the two source registers,\nstoring the result in the first source register.\nNote:\nContrary to other -long/2addr mathematical operations\n(which take register pairs for both their destination/first source and\ntheir second source), shl-long/2addr,\nshr-long/2addr, and ushr-long/2addr take a\nregister pair for their destination/first source (the value to be\nshifted), but a single register for their second source (the shifting\ndistance)." 1249 | ], 1250 | "d0": [ 1251 | "d0..d7 22s", 1252 | "add-int/lit16 vA, vB, #+CCCC", 1253 | "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)", 1254 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics." 1255 | ], 1256 | "d1": [ 1257 | "d0..d7 22s", 1258 | "rsub-int (reverse subtract) vA, vB, #+CCCC", 1259 | "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)", 1260 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics." 1261 | ], 1262 | "d2": [ 1263 | "d0..d7 22s", 1264 | "mul-int/lit16 vA, vB, #+CCCC", 1265 | "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)", 1266 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics." 1267 | ], 1268 | "d3": [ 1269 | "d0..d7 22s", 1270 | "div-int/lit16 vA, vB, #+CCCC", 1271 | "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)", 1272 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics." 1273 | ], 1274 | "d4": [ 1275 | "d0..d7 22s", 1276 | "rem-int/lit16 vA, vB, #+CCCC", 1277 | "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)", 1278 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics." 1279 | ], 1280 | "d5": [ 1281 | "d0..d7 22s", 1282 | "and-int/lit16 vA, vB, #+CCCC", 1283 | "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)", 1284 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics." 1285 | ], 1286 | "d6": [ 1287 | "d0..d7 22s", 1288 | "or-int/lit16 vA, vB, #+CCCC", 1289 | "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)", 1290 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics." 1291 | ], 1292 | "d7": [ 1293 | "d0..d7 22s", 1294 | "xor-int/lit16 vA, vB, #+CCCC", 1295 | "A: destination register (4 bits)\nB: source register (4 bits)\nC: signed int constant (16 bits)", 1296 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result in\nthe destination register.\nNote:\nrsub-int does not have a suffix since this version is the\nmain opcode of its family. Also, see below for details on its semantics." 1297 | ], 1298 | "d8": [ 1299 | "d8..e2 22b", 1300 | "add-int/lit8 vAA, vBB, #+CC", 1301 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1302 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1303 | ], 1304 | "d9": [ 1305 | "d8..e2 22b", 1306 | "rsub-int/lit8 vAA, vBB, #+CC", 1307 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1308 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1309 | ], 1310 | "da": [ 1311 | "d8..e2 22b", 1312 | "mul-int/lit8 vAA, vBB, #+CC", 1313 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1314 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1315 | ], 1316 | "db": [ 1317 | "d8..e2 22b", 1318 | "div-int/lit8 vAA, vBB, #+CC", 1319 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1320 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1321 | ], 1322 | "dc": [ 1323 | "d8..e2 22b", 1324 | "rem-int/lit8 vAA, vBB, #+CC", 1325 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1326 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1327 | ], 1328 | "dd": [ 1329 | "d8..e2 22b", 1330 | "and-int/lit8 vAA, vBB, #+CC", 1331 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1332 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1333 | ], 1334 | "de": [ 1335 | "d8..e2 22b", 1336 | "or-int/lit8 vAA, vBB, #+CC", 1337 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1338 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1339 | ], 1340 | "df": [ 1341 | "d8..e2 22b", 1342 | "xor-int/lit8 vAA, vBB, #+CC", 1343 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1344 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1345 | ], 1346 | "e0": [ 1347 | "d8..e2 22b", 1348 | "shl-int/lit8 vAA, vBB, #+CC", 1349 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1350 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1351 | ], 1352 | "e1": [ 1353 | "d8..e2 22b", 1354 | "shr-int/lit8 vAA, vBB, #+CC", 1355 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1356 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1357 | ], 1358 | "e2": [ 1359 | "d8..e2 22b", 1360 | "ushr-int/lit8 vAA, vBB, #+CC", 1361 | "A: destination register (8 bits)\nB: source register (8 bits)\nC: signed int constant (8 bits)", 1362 | "Perform the indicated binary op on the indicated register (first\nargument) and literal value (second argument), storing the result\nin the destination register.\nNote: See below for details on the semantics of\nrsub-int." 1363 | ], 1364 | "e3": [ 1365 | "e3..f9 10x", 1366 | "(unused)", 1367 | "", 1368 | "(unused)" 1369 | ], 1370 | "e4": [ 1371 | "e3..f9 10x", 1372 | "(unused)", 1373 | "", 1374 | "(unused)" 1375 | ], 1376 | "e5": [ 1377 | "e3..f9 10x", 1378 | "(unused)", 1379 | "", 1380 | "(unused)" 1381 | ], 1382 | "e6": [ 1383 | "e3..f9 10x", 1384 | "(unused)", 1385 | "", 1386 | "(unused)" 1387 | ], 1388 | "e7": [ 1389 | "e3..f9 10x", 1390 | "(unused)", 1391 | "", 1392 | "(unused)" 1393 | ], 1394 | "e8": [ 1395 | "e3..f9 10x", 1396 | "(unused)", 1397 | "", 1398 | "(unused)" 1399 | ], 1400 | "e9": [ 1401 | "e3..f9 10x", 1402 | "(unused)", 1403 | "", 1404 | "(unused)" 1405 | ], 1406 | "ea": [ 1407 | "e3..f9 10x", 1408 | "(unused)", 1409 | "", 1410 | "(unused)" 1411 | ], 1412 | "eb": [ 1413 | "e3..f9 10x", 1414 | "(unused)", 1415 | "", 1416 | "(unused)" 1417 | ], 1418 | "ec": [ 1419 | "e3..f9 10x", 1420 | "(unused)", 1421 | "", 1422 | "(unused)" 1423 | ], 1424 | "ed": [ 1425 | "e3..f9 10x", 1426 | "(unused)", 1427 | "", 1428 | "(unused)" 1429 | ], 1430 | "ee": [ 1431 | "e3..f9 10x", 1432 | "(unused)", 1433 | "", 1434 | "(unused)" 1435 | ], 1436 | "ef": [ 1437 | "e3..f9 10x", 1438 | "(unused)", 1439 | "", 1440 | "(unused)" 1441 | ], 1442 | "f0": [ 1443 | "e3..f9 10x", 1444 | "(unused)", 1445 | "", 1446 | "(unused)" 1447 | ], 1448 | "f1": [ 1449 | "e3..f9 10x", 1450 | "(unused)", 1451 | "", 1452 | "(unused)" 1453 | ], 1454 | "f2": [ 1455 | "e3..f9 10x", 1456 | "(unused)", 1457 | "", 1458 | "(unused)" 1459 | ], 1460 | "f3": [ 1461 | "e3..f9 10x", 1462 | "(unused)", 1463 | "", 1464 | "(unused)" 1465 | ], 1466 | "f4": [ 1467 | "e3..f9 10x", 1468 | "(unused)", 1469 | "", 1470 | "(unused)" 1471 | ], 1472 | "f5": [ 1473 | "e3..f9 10x", 1474 | "(unused)", 1475 | "", 1476 | "(unused)" 1477 | ], 1478 | "f6": [ 1479 | "e3..f9 10x", 1480 | "(unused)", 1481 | "", 1482 | "(unused)" 1483 | ], 1484 | "f7": [ 1485 | "e3..f9 10x", 1486 | "(unused)", 1487 | "", 1488 | "(unused)" 1489 | ], 1490 | "f8": [ 1491 | "e3..f9 10x", 1492 | "(unused)", 1493 | "", 1494 | "(unused)" 1495 | ], 1496 | "f9": [ 1497 | "e3..f9 10x", 1498 | "(unused)", 1499 | "", 1500 | "(unused)" 1501 | ], 1502 | "fa": [ 1503 | "fa 45cc", 1504 | "invoke-polymorphic {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH", 1505 | "A: argument word count (4 bits)\nB: method reference index (16 bits)\nC: receiver (16 bits)\nD..G: argument registers (4 bits each)\nH: prototype reference index (16 bits)", 1506 | "Invoke the indicated signature polymorphic method. The result (if any) may be stored\nwith an appropriate move-result* variant as the immediately\nsubsequent instruction.\nThe method reference must be to a signature polymorphic method, such as\njava.lang.invoke.MethodHandle.invoke or\njava.lang.invoke.MethodHandle.invokeExact.\nThe receiver must be an object supporting the signature polymorphic\nmethod being invoked.\nThe prototype reference describes the argument types provided\nand the expected return type.\nThe invoke-polymorphic bytecode may raise exceptions when it\nexecutes. The exceptions are described in the API documentation\nfor the signature polymorphic method being invoked.\nPresent in Dex files from version 038 onwards." 1507 | ], 1508 | "fb": [ 1509 | "fb 4rcc", 1510 | "invoke-polymorphic/range {vCCCC .. vNNNN}, meth@BBBB, proto@HHHH", 1511 | "A: argument word count (8 bits)\nB: method reference index (16 bits)\nC: receiver (16 bits)\nH: prototype reference index (16 bits)\nN = A + C - 1", 1512 | "Invoke the indicated method handle. See the invoke-polymorphic\ndescription above for details.\nPresent in Dex files from version 038 onwards." 1513 | ], 1514 | "fc": [ 1515 | "fc 35c", 1516 | "invoke-custom {vC, vD, vE, vF, vG}, call_site@BBBB", 1517 | "A: argument word count (4 bits)\nB: call site reference index (16 bits)\nC..G: argument registers (4 bits each)", 1518 | "Resolves and invokes the indicated call site.\nThe result from the invocation (if any) may be stored with an\nappropriate move-result* variant as the immediately\nsubsequent instruction.\n\nThis instruction executes in two phases: call site\nresolution and call site invocation.\n\nCall site resolution checks whether the indicated\ncall site has an associated java.lang.invoke.CallSite instance.\nIf not, the bootstrap linker method for the indicated call site is\ninvoked using arguments present in the DEX file\n(see call_site_item). The\nbootstrap linker method returns\na java.lang.invoke.CallSite instance that will then\nbe associated with the indicated call site if no association\nexists. Another thread may have already made the association first,\nand if so execution of the instruction continues with the\nfirst associated java.lang.invoke.CallSite instance.\n\nCall site invocation is made on the\njava.lang.invoke.MethodHandle target of the resolved\njava.lang.invoke.CallSite instance. The target is invoked as\nif executing invoke-polymorphic (described above) using the\nmethod handle and arguments to the invoke-custom instruction\nas the arguments to an exact method handle invocation.\n\nExceptions raised by the bootstrap linker method are wrapped\nin a java.lang.BootstrapMethodError. A\nBootstrapMethodError is also raised if:\n\nthe bootstrap linker method fails to return a\njava.lang.invoke.CallSite instance.\nthe returned java.lang.invoke.CallSite has a\nnull method handle target.\nthe method handle target is not of the requested type.\n\nPresent in Dex files from version 038 onwards." 1519 | ], 1520 | "fd": [ 1521 | "fd 3rc", 1522 | "invoke-custom/range {vCCCC .. vNNNN}, call_site@BBBB", 1523 | "A: argument word count (8 bits)\nB: call site reference index (16 bits)\nC: first argument register (16-bits)\nN = A + C - 1", 1524 | "Resolve and invoke a call site. See the invoke-custom\ndescription above for details.\nPresent in Dex files from version 038 onwards." 1525 | ], 1526 | "fe": [ 1527 | "fe 21c", 1528 | "const-method-handle vAA, method_handle@BBBB", 1529 | "A: destination register (8 bits)\nB: method handle index (16 bits)", 1530 | "Move a reference to the method handle specified by the given index into the\nspecified register.\nPresent in Dex files from version 039 onwards." 1531 | ], 1532 | "ff": [ 1533 | "ff 21c", 1534 | "const-method-type vAA, proto@BBBB", 1535 | "A: destination register (8 bits)\nB: method prototype reference (16 bits)", 1536 | "Move a reference to the method prototype specified by the given index into the\nspecified register.\nPresent in Dex files from version 039 onwards." 1537 | ] 1538 | } -------------------------------------------------------------------------------- /NetworkTraining/DAMD/Opcodes_all.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/explain-mlsec/a82530e1cb95f829a3147f1f5a0d3cf2b3e68975/NetworkTraining/DAMD/Opcodes_all.zip -------------------------------------------------------------------------------- /NetworkTraining/DAMD/config.py: -------------------------------------------------------------------------------- 1 | token_path = 'Converted' 2 | no_tokens = 218 3 | batch_size = 32 4 | epochs = 50 5 | testset_size = 0.1 6 | vec_output = True # if True output dimension is 2 (with softmax) else 1 with simple sigmoid -------------------------------------------------------------------------------- /NetworkTraining/DAMD/config_preprocessing.py: -------------------------------------------------------------------------------- 1 | opcode_path = 'Opcodes_all' 2 | dalvik_opcode_path = 'DalvikOpcodes.txt' 3 | save_path = 'Converted' -------------------------------------------------------------------------------- /NetworkTraining/DAMD/damd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | from tqdm import tqdm 5 | import pickle as pkl 6 | import numpy as np 7 | from keras import Sequential 8 | from keras.layers import Dense, Conv1D, Embedding, GlobalMaxPooling1D 9 | from keras.preprocessing.sequence import pad_sequences 10 | from sklearn.model_selection import train_test_split 11 | from keras.callbacks import ModelCheckpoint 12 | from config import * 13 | 14 | sys.path.append('../../utils/') 15 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric 16 | 17 | 18 | # sorts a list of filenames in uprising order with respect to the number of tokens in the files 19 | def get_sorted_list_of_filenames(data_path): 20 | fname_list = os.listdir(data_path) 21 | fname_to_len = {} 22 | print('Sorting input by length ...') 23 | for fname in tqdm(fname_list): 24 | fname_to_len[fname] = len(open(os.path.join(data_path, fname), 'r').read().split(',')) 25 | sorted_fname = sorted(fname_to_len.items(), key=lambda kv: kv[1]) 26 | return [tup[0] for tup in sorted_fname] 27 | 28 | 29 | # turns a (sorted) list of filenames of indices into two numpy arrays. One containing the indices and the other one 30 | # containing the labels. Assumes that the last part of the filename is '.1' for malicious and '.0' for benign 31 | def filename_list_to_numpy_arrays(filenames, root_path): 32 | indices = [] 33 | # labels are either [1,0] or [0,1] 34 | labels = np.zeros(shape=(len(filenames), 2)) 35 | for i, filename in enumerate(filenames): 36 | full_path = os.path.join(root_path, filename) 37 | with open(full_path, 'r') as f: 38 | indices.append(np.array(f.read().split(','), dtype=np.uint8)) 39 | labels[i,:] = [1,0] if filename.split('.')[-1] == '0' else [0,1] 40 | return np.array(indices), labels 41 | 42 | 43 | def get_damd_cnn(no_tokens, final_nonlinearity='softmax'): 44 | embedding_dimensions = 8 45 | no_convolutional_filters = 64 46 | number_of_dense_units = 16 47 | kernel_size = 8 48 | no_labels = 2 49 | model = Sequential() 50 | model.add(Embedding(input_dim=no_tokens+1, output_dim=embedding_dimensions)) 51 | model.add(Conv1D(filters=no_convolutional_filters, kernel_size=kernel_size, padding='valid', activation='relu')) 52 | model.add(GlobalMaxPooling1D()) 53 | model.add(Dense(number_of_dense_units, activation='relu')) 54 | model.add(Dense(no_labels, activation=final_nonlinearity)) 55 | print(model.summary()) 56 | return model 57 | 58 | 59 | def train_network_batchwise(data_path, network, no_epochs, batch_size, testset_size, random_state=42): 60 | if not os.path.isdir('models'): 61 | os.makedirs('models') 62 | network.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 63 | custom_true_positive_metric(vec_output), 64 | custom_false_positive_metric(vec_output) 65 | ]) 66 | filenames_sorted = get_sorted_list_of_filenames(data_path) 67 | names_train, names_test = train_test_split(filenames_sorted, test_size=testset_size, random_state=random_state) 68 | for j in range(no_epochs): 69 | acc_train, acc_test = [], [] 70 | print('Training epoch {}'.format(j+1)) 71 | for i in tqdm(range(0, len(names_train), batch_size)): 72 | x, y = filename_list_to_numpy_arrays(names_train[i:i+batch_size], data_path) 73 | res = network.train_on_batch(pad_sequences(x, len(x[-1]), dtype='uint8'), y) 74 | acc_train.append(res[1]) 75 | print('Train accuracy after {} epochs: {}:'.format(j+1, np.mean(acc_train))) 76 | for k in range(0, len(names_test), batch_size): 77 | x, y = filename_list_to_numpy_arrays(names_test[k:k+batch_size], data_path) 78 | res = network.test_on_batch(pad_sequences(x, len(x[-1]), dtype='uint8'), y) 79 | acc_test.append(res[1]) 80 | print('Test accuracy after {} epochs: {}:'.format(j+1, np.mean(acc_test))) 81 | network.save('models/damd_model_%d' % j) 82 | 83 | 84 | if __name__ == '__main__': 85 | damd_model = get_damd_cnn(no_tokens) 86 | train_network_batchwise(token_path, damd_model, epochs, batch_size, testset_size) -------------------------------------------------------------------------------- /NetworkTraining/DAMD/preprocessing.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from tqdm import tqdm 4 | from config_preprocessing import * 5 | 6 | 7 | # preprocessing used for the dalvik byte sequences. concatenates all lines and extracts 2-grams of them 8 | def doc_preprocessing(s): 9 | N = 2 10 | lines = ''.join(s.splitlines()) 11 | res = ' '.join([lines[i:i + N] for i in range(0, len(lines), N)]) 12 | return res.split() 13 | 14 | 15 | def get_dalvik_token_to_index(dalvik_opcode_path, save=False): 16 | tokens = [line.split()[1] for line in open(dalvik_opcode_path).readlines()] 17 | # index starts at 1 to have 0 for padding 18 | token_to_idx = dict(zip(tokens, range(1,len(tokens)+1))) 19 | if save: 20 | with open(os.path.join('token2idx_damd'), 'w') as f: 21 | for k,v in token_to_idx.items(): 22 | print('{}:{}'.format(k,v), file=f) 23 | return token_to_idx 24 | 25 | 26 | # converts files of dalvik opcode sequences into files of sequences of token indices where each token represents one 27 | # dalvik opcode. Assumes that the file in document_path folder have ending .1 for malicious and .0 for benign. 28 | # requires file containing all dalvid opcodes for conversion 29 | def convert_docs_to_idx(document_path, dalvik_opcode_path, saving_path): 30 | token_to_idx = get_dalvik_token_to_index(dalvik_opcode_path, save=True) 31 | print('Converting {} files to tokenized representation...'.format(len(os.listdir(document_path)))) 32 | if not os.path.isdir(saving_path): 33 | os.makedirs(saving_path) 34 | for fn in tqdm(os.listdir(document_path)): 35 | tokens = doc_preprocessing(open(os.path.join(document_path, fn), 'r').read()) 36 | with open(os.path.join(saving_path, fn), 'w') as f: 37 | print(','.join([str(token_to_idx[idx]) for idx in tokens]), file=f) 38 | 39 | 40 | if __name__ == '__main__': 41 | convert_docs_to_idx(opcode_path, dalvik_opcode_path, save_path) 42 | -------------------------------------------------------------------------------- /NetworkTraining/Drebin/DrebinDataGenerator.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import random 3 | import numpy as np 4 | import os 5 | 6 | 7 | # data generator that takes as input a list of filenames. The generator yields batches of indices indicating which 8 | # features are set to one during training 9 | class DrebinDataGenerator(keras.utils.Sequence): 10 | def __init__(self, vec, data_list, feature_path, label_dict, batch_size, shuffle=True, vec_labels=True): 11 | self.data_names = data_list 12 | self.data_paths = [os.path.join(feature_path, item) for item in data_list] 13 | self.label_dict = label_dict 14 | self.batch_size = batch_size 15 | self.shuffle = shuffle 16 | self.vec = vec 17 | self.data = vec.transform(self.data_paths) 18 | self.vec_labels = vec_labels 19 | self.on_epoch_end() 20 | ''' 21 | shuffle data names after epoch to have different batches every iteration. we have to update the data matrix 22 | aswell to know which samples are in which row 23 | ''' 24 | def on_epoch_end(self): 25 | if self.shuffle: 26 | random.shuffle(self.data_paths) 27 | self.data = self.vec.transform(self.data_paths) 28 | 29 | def __len__(self): 30 | return int(np.floor(len(self.data_names)/self.batch_size)) 31 | 32 | # returns label representation for binary labels. 33 | def label_to_representation(self, label): 34 | if self.vec_labels: 35 | if label == 0: 36 | return [1, 0] 37 | else: 38 | return [0, 1] 39 | else: 40 | return label 41 | 42 | def __getitem__(self, idx): 43 | data_batch = self.data[idx*self.batch_size:(idx+1)*self.batch_size, :].toarray() 44 | labels = [self.label_dict[os.path.split(path)[1]] for path in self.data_paths[idx*self.batch_size: 45 | (idx + 1)*self.batch_size]] 46 | label_batch = np.array([self.label_to_representation(label) for label in labels], dtype='uint8') 47 | return data_batch, label_batch 48 | -------------------------------------------------------------------------------- /NetworkTraining/Drebin/Drebin_DNN.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | import sys 4 | from keras.models import Sequential 5 | from keras.layers import Dense, Dropout 6 | from keras.optimizers import SGD 7 | from keras.callbacks import ModelCheckpoint 8 | from DrebinDataGenerator import DrebinDataGenerator 9 | from drebin_datapipeline import virustotal_json_to_labels, get_train_test_valid_names, get_count_vectorizer 10 | from config import * 11 | 12 | sys.path.append('../../utils/') 13 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric 14 | 15 | # the network used by grosse et. al in the paper 'adversarial examples for malware detection' 16 | def get_network(no_features, final_nonlinearity, vec_output): 17 | model = Sequential() 18 | model.add(Dense(units=200, activation='relu', input_shape=(no_features, ))) 19 | model.add(Dropout(rate=0.5)) 20 | model.add(Dense(units=200, activation='relu')) 21 | model.add(Dropout(rate=0.5)) 22 | if vec_output: 23 | model.add(Dense(units=2, activation=final_nonlinearity)) 24 | else: 25 | model.add(Dense(units=1, activation=final_nonlinearity)) 26 | return model 27 | 28 | 29 | # train the model 30 | def train_model(model, training_gen, test_gen, loss, epochs, vec_output, save_period=1): 31 | print(model.summary()) 32 | if not os.path.isdir('models'): 33 | os.makedirs('models') 34 | model.compile(optimizer=SGD(), loss=loss, metrics=['accuracy', custom_true_positive_metric(vec_output), 35 | custom_false_positive_metric(vec_output)]) 36 | fname = 'models/model.{epoch:03d}--ACC_{val_accuracy:.4f}--FP_{val_false_positive_metric:.4f}--' \ 37 | 'TP_{val_true_positive_metric:.4f}.hdf5' 38 | model_checkpoint_tp = ModelCheckpoint(fname, monitor='true_positive_metric', save_best_only=True, mode='max', 39 | period=save_period) 40 | model_checkpoint_fn = ModelCheckpoint(fname, monitor='val_false_positive_metric', save_best_only=True, mode='min', 41 | period=save_period) 42 | model.fit_generator(generator=training_gen, epochs=epochs, class_weight={0:1, 1:6.5}, validation_data=test_gen, 43 | max_queue_size=10, callbacks=[model_checkpoint_tp, model_checkpoint_fn]) 44 | 45 | 46 | # returns two lists of filenames, one for training and one for testing. The lists are specified by a doc path (to a file 47 | # containing the feature vectors) and a split path containing files with filenames for training, testing, validation. 48 | # since there are several splits for the dataset, the index spcifies which split to choose 49 | def get_train_test_data_names(split_path, index, label_dict): 50 | names = get_train_test_valid_names(split_path) 51 | train_names = names[index][0] 52 | train_names = [name for name in train_names if name in label_dict] 53 | test_names = names[index][1] 54 | test_names = [name for name in test_names if name in label_dict] 55 | # val_names = names[0][2] 56 | return train_names, test_names 57 | 58 | 59 | if __name__ == '__main__': 60 | if not os.path.isfile('train_label_dict.pkl'): 61 | print('Calculating label dict ...') 62 | label_dict = virustotal_json_to_labels(json_path, threshold) 63 | pickle.dump(label_dict, open('train_label_dict.pkl', 'wb')) 64 | else: 65 | label_dict = pickle.load(open('train_label_dict.pkl', 'rb')) 66 | if not os.path.isfile('train_vec.pkl'): 67 | print('Calculating count vectorizer ...') 68 | vec = get_count_vectorizer(doc_path, label_dict) 69 | pickle.dump(vec, open('train_vec.pkl', 'wb')) 70 | else: 71 | vec = pickle.load(open('train_vec.pkl', 'rb')) 72 | no_tokens = len(vec.vocabulary_) 73 | train_data_names, test_data_names = get_train_test_data_names(split_path, split_index, label_dict) 74 | train_data_gen = DrebinDataGenerator(vec, train_data_names, doc_path, label_dict, batch_size, vec_labels=vec_output) 75 | test_data_gen = DrebinDataGenerator(vec, test_data_names, doc_path, label_dict, batch_size, vec_labels=vec_output) 76 | model = get_network(no_tokens, final_nonlinearity=nonlinearity, vec_output=vec_output) 77 | print('training with %d tokens' % no_tokens) 78 | train_model(model, train_data_gen, test_data_gen, loss, epochs=epochs, vec_output=vec_output) 79 | -------------------------------------------------------------------------------- /NetworkTraining/Drebin/config.py: -------------------------------------------------------------------------------- 1 | # these paths actually come with the drebin dataset 2 | doc_path = '' # insert path to feature vectors folder from drebin dataset 3 | json_path = '' # insert path to virustotal.json from drebin dataset 4 | split_path = '' # insert path to datasplits/all from drebin dataset 5 | 6 | # parameters for learning 7 | threshold = 10 # at least 10 scanners have to exist and classify the sample as malicious 8 | batch_size = 64 # batch_size during training 9 | split_index = 0 # which of the data splits from drebin dataset to use (0,..,10) 10 | vec_output = True # whether to output vectors [1,0], [0,1] or float at the end of the network 11 | nonlinearity = 'softmax' # nonlinearity in the final layer 12 | loss = 'binary_crossentropy' # loss for training 13 | epochs = 50 # number of training epochs 14 | -------------------------------------------------------------------------------- /NetworkTraining/Drebin/drebin.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | import sys 4 | from keras.models import Sequential 5 | from keras.layers import Dense, Dropout 6 | from keras.optimizers import SGD 7 | from keras.callbacks import ModelCheckpoint 8 | from DrebinDataGenerator import DrebinDataGenerator 9 | from drebin_datapipeline import virustotal_json_to_labels, get_train_test_valid_names, get_count_vectorizer 10 | from config import * 11 | 12 | sys.path.append('../../utils/') 13 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric 14 | 15 | # the network used by grosse et. al in the paper 'adversarial examples for malware detection' 16 | def get_network(no_features, final_nonlinearity, vec_output): 17 | model = Sequential() 18 | model.add(Dense(units=200, activation='relu', input_shape=(no_features, ))) 19 | model.add(Dropout(rate=0.5)) 20 | model.add(Dense(units=200, activation='relu')) 21 | model.add(Dropout(rate=0.5)) 22 | if vec_output: 23 | model.add(Dense(units=2, activation=final_nonlinearity)) 24 | else: 25 | model.add(Dense(units=1, activation=final_nonlinearity)) 26 | return model 27 | 28 | 29 | # train the model 30 | def train_model(model, training_gen, test_gen, loss, epochs, vec_output, save_period=1): 31 | print(model.summary()) 32 | model.compile(optimizer=SGD(), loss=loss, metrics=['accuracy', custom_true_positive_metric(vec_output), 33 | custom_false_positive_metric(vec_output)]) 34 | fname = '../models/model.{epoch:03d}--ACC_{val_acc:.4f}--FP_{val_false_positive_metric:.4f}--' \ 35 | 'TP_{val_true_positive_metric:.4f}.hdf5' 36 | model_checkpoint_tp = ModelCheckpoint(fname, monitor='true_positive_metric', save_best_only=True, mode='max', 37 | period=save_period) 38 | model_checkpoint_fn = ModelCheckpoint(fname, monitor='val_false_positive_metric', save_best_only=True, mode='min', 39 | period=save_period) 40 | model.fit_generator(generator=training_gen, epochs=epochs, class_weight={0:1, 1:6.5}, validation_data=test_gen, 41 | max_queue_size=10, callbacks=[model_checkpoint_tp, model_checkpoint_fn]) 42 | 43 | 44 | # returns two lists of filenames, one for training and one for testing. The lists are specified by a doc path (to a file 45 | # containing the feature vectors) and a split path containing files with filenames for training, testing, validation. 46 | # since there are several splits for the dataset, the index spcifies which split to choose 47 | def get_train_test_data_names(split_path, index, label_dict): 48 | names = get_train_test_valid_names(split_path) 49 | train_names = names[index][0] 50 | train_names = [name for name in train_names if name in label_dict] 51 | test_names = names[index][1] 52 | test_names = [name for name in test_names if name in label_dict] 53 | # val_names = names[0][2] 54 | return train_names, test_names 55 | 56 | 57 | if __name__ == '__main__': 58 | if not os.path.isfile('train_label_dict.pkl'): 59 | print('Calculating label dict ...') 60 | label_dict = virustotal_json_to_labels(json_path, threshold) 61 | pickle.dump(label_dict, open('train_label_dict.pkl', 'wb')) 62 | else: 63 | label_dict = pickle.load(open('train_label_dict.pkl', 'rb')) 64 | if not os.path.isfile('train_vec.pkl'): 65 | print('Calculating count vectorizer ...') 66 | vec = get_count_vectorizer(doc_path, label_dict) 67 | pickle.dump(vec, open('train_vec.pkl', 'wb')) 68 | else: 69 | vec = pickle.load(open('train_vec.pkl', 'rb')) 70 | no_tokens = len(vec.vocabulary_) 71 | train_data_names, test_data_names = get_train_test_data_names(split_path, split_index, label_dict) 72 | train_data_gen = DrebinDataGenerator(vec, train_data_names, doc_path, label_dict, batch_size, vec_labels=vec_output) 73 | test_data_gen = DrebinDataGenerator(vec, test_data_names, doc_path, label_dict, batch_size, vec_labels=vec_output) 74 | model = get_network(no_tokens, final_nonlinearity=nonlinearity, vec_output=vec_output) 75 | print('training with %d tokens' % no_tokens) 76 | train_model(model, train_data_gen, test_data_gen, loss, epochs=epochs, vec_output=vec_output) 77 | -------------------------------------------------------------------------------- /NetworkTraining/Drebin/drebin_datapipeline.py: -------------------------------------------------------------------------------- 1 | import json 2 | from sklearn.feature_extraction.text import CountVectorizer 3 | import os 4 | 5 | 6 | # converts list of filepaths into sklearn count vectorizer 7 | def get_count_vectorizer(documents_path, label_dict): 8 | all_paths = [os.path.join(documents_path, name) for name in os.listdir(documents_path) if name in label_dict] 9 | vec = CountVectorizer(input='filename', token_pattern='.+', lowercase=False) 10 | vec.fit_transform(all_paths) 11 | return vec 12 | 13 | 14 | # returns dict with key=sha256 hash of malware, value = 0/1 where 1 indicates malware and 0 indicates no malware 15 | # malware label is set if at least 'min_no_positive_scans' scanners return label "detected: true" 16 | def virustotal_json_to_labels(path_to_virustotal_json, min_no_positive_scans): 17 | label_dict = {} 18 | with open(path_to_virustotal_json, 'r') as f: 19 | # json modules needs json dicts in a list seperated by comma 20 | lines = f.readlines() 21 | json_text = '[' + ','.join(lines) + ']' 22 | data = json.loads(json_text) 23 | for d in data: 24 | # for successful scans... 25 | if d['response_code'] == 1: 26 | sha256 = d['sha256'] 27 | no_scanners = len(d['scans']) 28 | positive_results, negative_results = 0,0 29 | for result in d['scans'].values(): 30 | if result['detected']: 31 | positive_results += 1 32 | else: 33 | negative_results += 1 34 | # if all scanners return benign, label is 0, if at least min_no_positive_scans return true, label is 1 35 | # else, sample is discarded 36 | if negative_results == no_scanners: 37 | label_dict[sha256] = 0 38 | elif positive_results >= min_no_positive_scans: 39 | label_dict[sha256] = 1 40 | return label_dict 41 | 42 | 43 | # returns a list with tuples of (train_names, test_names, valid_names) for each split in the drebin dataset 44 | def get_train_test_valid_names(path_to_split): 45 | split_names = [] 46 | for root, dir, files in os.walk(path_to_split): 47 | if 'test_cs' in files and 'validate_cs' in files and 'train_cs' in files: 48 | with open(os.path.join(root, 'test_cs'), 'r') as test_f, open(os.path.join(root,'validate_cs'), 'r') as val_f, open(os.path.join(root, 'train_cs'), 'r') as train_f: 49 | train_names = test_f.read().splitlines() 50 | test_names = train_f.read().splitlines() 51 | val_names = val_f.read().splitlines() 52 | split_names.append((train_names, test_names, val_names)) 53 | return split_names 54 | -------------------------------------------------------------------------------- /NetworkTraining/Mimicus/config.py: -------------------------------------------------------------------------------- 1 | path_to_csv = 'contagio-all.csv' 2 | 3 | random_seed = 123456 4 | vec_output = True 5 | epochs = 100 6 | batch_size = 32 7 | loss = 'binary_crossentropy' 8 | final_nonlinearity = 'softmax' 9 | optimizer = 'adam' 10 | binary_encoding = True 11 | -------------------------------------------------------------------------------- /NetworkTraining/Mimicus/mimicus.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import numpy as np 4 | from keras.models import Sequential 5 | from keras.layers import Dense, Dropout 6 | from sklearn.model_selection import train_test_split 7 | from keras.callbacks import ModelCheckpoint 8 | from sklearn.metrics import confusion_matrix 9 | from sklearn.preprocessing import normalize 10 | from config import * 11 | 12 | sys.path.append('../../utils/') 13 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric 14 | 15 | 16 | def get_train_data_test_data(random_seed, binary_encoding=True): 17 | non_relevant_columns = [1] #filename 18 | label_column = 0 19 | arr = np.genfromtxt(path_to_csv, dtype=str, delimiter=',', skip_header=0) 20 | filenames = arr[1:, 1] 21 | no_features = arr.shape[1] 22 | columns_to_use = [i for i in range(no_features) if i not in non_relevant_columns] 23 | # feature_names = np.genfromtxt(path_to_csv, dtype=str, delimiter=',', skip_footer=9999, usecols=columns_to_use)[1:] 24 | # idx_to_token = dict(zip(range(len(feature_names)), feature_names)) 25 | # pkl.dump(idx_to_token, open('data_mimicus/idx_to_token.pkl', 'wb')) 26 | arr = np.genfromtxt(path_to_csv, dtype=np.float, delimiter=',', skip_header=1, usecols=columns_to_use) 27 | labels = arr[:, label_column] 28 | labels = np.array([[1,0] if l == 0 else [0,1] for l in labels]) 29 | data = np.delete(arr, 0, axis=1) 30 | if binary_encoding: 31 | data[np.where(data != 0)] = 1 32 | else: 33 | data = normalize(data, 'max', axis=0) 34 | x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.25, random_state=random_seed) 35 | _, filenames_test = train_test_split(filenames, test_size=0.25, random_state=random_seed) 36 | return x_train, x_test, y_train, y_test, filenames_test 37 | 38 | 39 | # network used by geo et.al in the lemna paper. This is essentially the network from grosse et.al for the drebin dataset 40 | def get_network(no_features, final_nonlinearity, vec_output): 41 | model = Sequential() 42 | model.add(Dense(units=200, activation='relu', input_shape=(no_features, ))) 43 | model.add(Dropout(rate=0.5)) 44 | model.add(Dense(units=200, activation='relu')) 45 | model.add(Dropout(rate=0.5)) 46 | if vec_output: 47 | model.add(Dense(units=2, activation=final_nonlinearity)) 48 | else: 49 | model.add(Dense(units=1, activation=final_nonlinearity)) 50 | return model 51 | 52 | 53 | def train_network(batch_size, epochs, loss, optimizer, vec_output, final_nonlinearity, random_seed): 54 | if not os.path.isdir('models'): 55 | os.makedirs('models') 56 | x_train, x_test, y_train, y_test, filenames_test = get_train_data_test_data(random_seed) 57 | # np.save('data_mimicus/test_data/float_encoded/test_data.npy', x_test) 58 | # np.save('data_mimicus/test_data/float_encoded/test_labels.npy', y_test) 59 | no_features = x_train.shape[1] 60 | model = get_network(no_features, final_nonlinearity, vec_output) 61 | fname = 'models/model.{epoch:03d}--ACC_{val_accuracy:.4f}--FP_{val_false_positive_metric:.4f}--' \ 62 | 'TP_{val_true_positive_metric:.4f}.hdf5' 63 | model_checkpoint_fp = ModelCheckpoint(fname, monitor='val_false_positive_metric', save_best_only=True, mode='min') 64 | model_checkpoint_tp = ModelCheckpoint(fname, monitor='val_true_positive_metric', save_best_only=True, mode='max') 65 | model.compile(optimizer, loss, metrics=['accuracy',custom_true_positive_metric(vec_output), 66 | custom_false_positive_metric(vec_output),], ) 67 | print(model.summary()) 68 | model.fit(x_train, y_train, batch_size, epochs, validation_data=(x_test, y_test), verbose=2, 69 | callbacks=[model_checkpoint_tp, model_checkpoint_fp]) 70 | get_statistics(model, x_test, y_test) 71 | 72 | 73 | # prints accuracy, precision, recall, fpr and f1 score for given model and test set with labels 74 | def get_statistics(model, x_test, y_test): 75 | y_pred = np.argmax(model.predict(x_test), axis=1) 76 | y_test = np.argmax(y_test, axis=1) 77 | assert len(y_pred) == len(y_test) 78 | acc = np.sum(y_pred==y_test)/np.float(len(y_pred)) 79 | cm = confusion_matrix(y_test, y_pred) 80 | TN, FN, TP, FP = cm[0,0], cm[1,0], cm[1,1], cm[0,1] 81 | TPR = TP/(TP+FN) 82 | FPR = FP/(FP+TN) 83 | precision = TP/(TP+FP) 84 | F1 = 2*TP/(2*TP+FP+FN) 85 | print('The model achieved: Accuracy:{}, Precision:{}, Recall:{}, FPR:{}, F1 score:{} on the test set.'.format( 86 | acc, precision, TPR, FPR, F1)) 87 | 88 | 89 | if __name__ == '__main__': 90 | train_network(batch_size, epochs, loss, optimizer, vec_output, final_nonlinearity, random_seed) 91 | -------------------------------------------------------------------------------- /NetworkTraining/README.md: -------------------------------------------------------------------------------- 1 | # Network Training 2 | 3 | ### This folder contains Training scripts for the four neural networks based on the papers: 4 | 5 | * Adversarial examples for malware detection ([Grosse et al.](http://patrickmcdaniel.org/pubs/esorics17.pdf)) - called Drebin. 6 | * Deep Android Malware Detection ([McLaughlin et al.](https://adamdoupe.com/publications/deep-android-malware-detection-codaspy2017.pdf)) - called DAMD. 7 | * VulDeePecker: A Deep Learning-Based System for Vulnerability Detection ([Li et al.](https://arxiv.org/pdf/1801.01681.pdf)) - called VulDeePecker. 8 | * LEMNA: Explaining Deep Learning based Security Applications ([Guo et al.](http://people.cs.vt.edu/gangwang/ccs18.pdf)) - called Mimicus. 9 | 10 | #### To keep the size of this repo handable, we do not deliver all the datasets with it but all of them are accessible for download online. Each folder contains a config file where you can adjust training parameters. 11 | * Mimicus: Call `python3 mimicus.py` to train the network. 12 | * Drebin: Adjust the paths in the config file to point to the location you downloaded the [drebin dataset](https://www.sec.cs.tu-bs.de/~danarp/drebin/) to. Call `drebin.py` to train the network. 13 | * VulDeePecker: Extract the json file from the zip. Afterwards run `python3 word2vec.py` to train a word2vec model. Then run `python3 vuldeepecker.py` to train the network. 14 | * DAMD: Extract the folder containig the dalvik opcodes. Afterwards run `python3 preprocessing.py` to convert them. Then run `python3 damd.py` to train the network. 15 | 16 | #### The models with the best performance will be saved in the models folder. -------------------------------------------------------------------------------- /NetworkTraining/VulDeePecker/VuldeeDataGenerator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import keras 3 | 4 | 5 | # simple data generator yielding batches of data from a numpy array 6 | class VuldeeDataGenerator(keras.utils.Sequence): 7 | def __init__(self, data, labels, batch_size): 8 | self.data = data 9 | self.labels = labels 10 | self.batch_size = batch_size 11 | 12 | def __len__(self): 13 | length = int(np.floor(len(self.data)/self.batch_size)) 14 | return length if len(self.data)%self.batch_size == 0 else length+1 15 | 16 | def __getitem__(self, idx): 17 | data_batch = self.data[idx*self.batch_size:(idx+1)*self.batch_size] 18 | label_batch = self.labels[idx*self.batch_size:(idx+1)*self.batch_size] 19 | return data_batch, label_batch 20 | -------------------------------------------------------------------------------- /NetworkTraining/VulDeePecker/config_training.py: -------------------------------------------------------------------------------- 1 | data_path = 'source-CWE-119-full.json' 2 | w2v_path = 'w2v_model.bin' 3 | 4 | no_lstm_units = 300 5 | dropout_proba = 0.5 6 | token_per_gadget = 50 7 | embedding_dim = 200 8 | batch_size = 64 9 | epochs = 100 10 | loss = 'binary_crossentropy' 11 | sampling_random_seed = 42 12 | testset_size = 0.2 13 | vec_output = True 14 | -------------------------------------------------------------------------------- /NetworkTraining/VulDeePecker/config_word_to_vec.py: -------------------------------------------------------------------------------- 1 | data_paths = ['source-CWE-119-full.json'] 2 | output_name = 'w2v_model.bin' 3 | w2v_vocab_name = None # specify if you want to save vocab 4 | embedding_dim = 200 5 | iterations = 100 6 | workers = 2 # parallel training 7 | nice = 20 # the niceness value for this process 8 | -------------------------------------------------------------------------------- /NetworkTraining/VulDeePecker/source-CWE-119-full.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alewarne/explain-mlsec/a82530e1cb95f829a3147f1f5a0d3cf2b3e68975/NetworkTraining/VulDeePecker/source-CWE-119-full.zip -------------------------------------------------------------------------------- /NetworkTraining/VulDeePecker/vuldeepecker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright: 2018 Tim Dengel 3 | # License: GPLv3+ 4 | 5 | import numpy as np 6 | from config_training import * 7 | from gensim.models.word2vec import Word2Vec 8 | from keras import Sequential 9 | from keras.layers import Dense, Dropout, LSTM, Bidirectional 10 | from keras.callbacks import ModelCheckpoint 11 | import json 12 | import os 13 | from sklearn.model_selection import train_test_split 14 | import sys 15 | from VuldeeDataGenerator import VuldeeDataGenerator 16 | 17 | sys.path.append('../../utils/') 18 | from custom_metrics import custom_true_positive_metric, custom_false_positive_metric 19 | 20 | 21 | def load_data(gadgets, w2v): 22 | x = [[w2v[word] for word in gadget["tokens"]] for gadget in gadgets] 23 | y = [[1,0] if gadget["label"] == 0 else [0,1] for gadget in gadgets] 24 | 25 | types = [gadget["type"] for gadget in gadgets] 26 | return x, y, types 27 | 28 | 29 | def pad_one(xi_typei): 30 | xi, typei = xi_typei 31 | if typei == 1: 32 | if len(xi) > token_per_gadget: 33 | ret = xi[0:token_per_gadget] 34 | elif len(xi) < token_per_gadget: 35 | ret = xi + [[0] * len(xi[0])] * (token_per_gadget - len(xi)) 36 | else: 37 | ret = xi 38 | elif typei == 0 or typei == 2: # Trunc/append at the start 39 | if len(xi) > token_per_gadget: 40 | ret = xi[len(xi) - token_per_gadget:] 41 | elif len(xi) < token_per_gadget: 42 | ret = [[0] * len(xi[0])] * (token_per_gadget - len(xi)) + xi 43 | else: 44 | ret = xi 45 | else: 46 | raise Exception() 47 | 48 | return ret 49 | 50 | 51 | def padding(x, types): 52 | return np.array([pad_one(bar) for bar in zip(x, types)]) 53 | 54 | 55 | def get_model(final_activation='softmax'): 56 | model = Sequential() 57 | model.add(Bidirectional(LSTM(units=no_lstm_units), input_shape=(token_per_gadget, embedding_dim))) 58 | model.add(Dropout(dropout_proba)) 59 | model.add(Dense(2, activation=final_activation)) 60 | model.compile(optimizer='adam', loss=loss, metrics=['accuracy',custom_true_positive_metric(vec_output), 61 | custom_false_positive_metric(vec_output)]) 62 | return model 63 | 64 | 65 | def train_model(model, training_generator, test_generator): 66 | if not os.path.isdir('models'): 67 | os.makedirs('models') 68 | fname = '/models/model.{epoch:03d}--ACC_{val_acc:.4f}--FP_{val_false_positive_metric:.4f}--' \ 69 | 'TP_{val_true_positive_metric:.4f}.hdf5' 70 | model_checkpoint_tp = ModelCheckpoint(fname, monitor='val_true_positive_metric', save_best_only=True, mode='max') 71 | model_checkpoint_fp = ModelCheckpoint(fname, monitor='val_false_positive_metric', save_best_only=True, mode='min') 72 | model.fit_generator(generator=training_generator, epochs=epochs, validation_data=test_generator, max_queue_size=10, 73 | callbacks=[model_checkpoint_tp, model_checkpoint_fp]) 74 | 75 | 76 | def preprocess_data(x, y, types): 77 | x = padding(x, types) 78 | # Train/Test split 79 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=testset_size, random_state=sampling_random_seed) 80 | y_train, y_test = np.array(y_train), np.array(y_test) 81 | 82 | print(len(x_train), 'train sequences') 83 | print(len(x_test), 'test sequences') 84 | 85 | datagen_train = VuldeeDataGenerator(x_train, y_train, batch_size) 86 | datagen_test = VuldeeDataGenerator(x_test, y_test, batch_size) 87 | 88 | return datagen_train, datagen_test 89 | 90 | 91 | if __name__ == "__main__": 92 | w2v = Word2Vec.load(w2v_path) 93 | with open(data_path) as f: 94 | gadgets = json.load(f) 95 | x, y, types = load_data(gadgets, w2v) 96 | del gadgets 97 | del w2v 98 | print("Loaded data.") 99 | # pad sequences, split data, create datagens 100 | datagen_train, datagen_test = preprocess_data(x,y, types) 101 | vuldee_model = get_model() 102 | train_model(vuldee_model, datagen_train, datagen_test) 103 | -------------------------------------------------------------------------------- /NetworkTraining/VulDeePecker/word2vec.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import contextlib 3 | import itertools 4 | import json 5 | import os 6 | from gensim.models.word2vec import Word2Vec 7 | from config_word_to_vec import * 8 | 9 | 10 | def train_word2vec(gadgets, vector_size=200, iter=100, workers=1): 11 | x = [gadget["tokens"] for gadget in gadgets] 12 | 13 | # Train Word2Vec 14 | w2v = Word2Vec(x, min_count=1, size=vector_size, iter=iter, workers=workers) 15 | return w2v 16 | 17 | 18 | if __name__ == "__main__": 19 | with contextlib.ExitStack() as stack: 20 | f_list = [stack.enter_context(open(dataset)) for dataset in data_paths] 21 | gadgets = itertools.chain.from_iterable([json.load(f) for f in f_list]) 22 | 23 | print("Training Word2Vec embedding...") 24 | w2v = train_word2vec(gadgets, embedding_dim, iterations, workers) 25 | 26 | print("Trained Word2Vec embedding with weights of shape:", w2v.wv.syn0.shape) 27 | if w2v_vocab_name: 28 | with open(w2v_vocab_name, 'w') as f: 29 | vocab = dict([(k, v.index) for k, v in w2v.wv.vocab.items()]) 30 | f.write(json.dumps(vocab, indent=4, sort_keys=True)) 31 | w2v.save(output_name) 32 | print("Written model to: {}".format(output_name)) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Evaluating Explanation Methods for Deep Learning in Security 2 | 3 | This repository contains code connected to the paper 4 | 5 | [_Evaluating Explanation Methods for Deep Learning in Security_](https://www.sec.cs.tu-bs.de/pubs/2020-eurosp.pdf), _A.Warnecke, D.Arp, C. Wressnegger and K.Rieck, IEEE European Symposium on Security and Privacy (Euro S&P), 2020_. 6 | 7 | * NetworkTraining contains training scripts for the different networks/datasets used in the paper. 8 | * Explanation contains scripts to calculate relevances for data and models with LEMNA and LIME. 9 | -------------------------------------------------------------------------------- /utils/custom_metrics.py: -------------------------------------------------------------------------------- 1 | # implementation of TPR and FPR with keras backend in order to access them at the end of each learning epoch as a metric 2 | # on the test set 3 | 4 | import keras.backend as K 5 | 6 | 7 | # calculates true positive metric using keras backend. predictions y_hat will be normalized and do not need to be a 8 | # probability distribution. the vec_output parameter specifies whether the output/labels are one-hot encoded or 9 | # one dimensional (which is only possible in a binary classification problem) 10 | def custom_true_positive_metric(vec_output): 11 | 12 | def true_positive_metric(y, y_hat): 13 | y_hat_rounded = K.round(y_hat) 14 | if vec_output: 15 | ground_truth_labels = K.cast(K.argmax(y), dtype='float32') 16 | predicted_labels = K.cast(K.argmax(y_hat_rounded), dtype='float32') 17 | else: 18 | ground_truth_labels = K.cast(y, dtype='float32') 19 | predicted_labels = K.cast(y_hat_rounded, dtype='float32') 20 | ground_truth_equal_one = K.cast(K.equal(K.ones(K.shape(ground_truth_labels)), ground_truth_labels), dtype='float32') 21 | prediction_equal_one = K.cast(K.equal(K.ones(K.shape(predicted_labels)), predicted_labels), dtype='float32') 22 | # product of these two vectors is 1 if and only if both conditions are met. Sum the product to get the number of samples 23 | nominator_TPR = K.sum(ground_truth_equal_one * prediction_equal_one) 24 | denominator_TPR = K.sum(ground_truth_equal_one) 25 | return nominator_TPR / (denominator_TPR)#+K.epsilon()) 26 | 27 | return true_positive_metric 28 | 29 | 30 | def custom_false_positive_metric(vec_output): 31 | 32 | def false_positive_metric(y, y_hat): 33 | y_hat_rounded = K.round(y_hat) 34 | if vec_output: 35 | ground_truth_labels = K.cast(K.argmax(y), dtype='float32') 36 | predicted_labels = K.cast(K.argmax(y_hat_rounded), dtype='float32') 37 | else: 38 | ground_truth_labels = K.cast(y, dtype='float32') 39 | predicted_labels = K.cast(y_hat_rounded, dtype='float32') 40 | ground_truth_equal_zero = K.cast(K.equal(K.zeros(K.shape(ground_truth_labels)), ground_truth_labels), dtype='float32') 41 | prediction_equal_one = K.cast(K.equal(K.ones(K.shape(predicted_labels)), predicted_labels), dtype='float32') 42 | # product of these two vectors is 1 if and only if both conditions are met. Sum the product to get the number of samples 43 | nominator_FPR = K.sum(ground_truth_equal_zero*prediction_equal_one) 44 | denominator_FPR = K.sum(ground_truth_equal_zero) 45 | return nominator_FPR / (denominator_FPR)#+K.epsilon()) 46 | 47 | return false_positive_metric 48 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | # some useful methods for this project like normalization of relevances for various datatypes 2 | import numpy as np 3 | import copy 4 | import sys 5 | import os 6 | import pickle as pkl 7 | from scipy import sparse 8 | from tqdm import tqdm 9 | 10 | 11 | # given relevance array (either numpy or csr_sparse) normalizes it with respect to given method 12 | # method can be one of [mean, max, abs_max]. 13 | # if macro is False, each sample will be normalized for itself, else the whole data is normalized 14 | def normalize_relevances(relevances, method, macro=False): 15 | if method not in ['mean', 'max', 'abs_max']: 16 | print('Invalid method name! Choose one of {}'.format(['mean', 'max', 'abs_max'])) 17 | sys.exit(1) 18 | normed_relevances = copy.deepcopy(relevances) 19 | if macro: 20 | normed_relevances = to_macro(normed_relevances) 21 | if type(normed_relevances).__module__ == 'scipy.sparse.csr': 22 | nonzero_rows = normed_relevances.nonzero()[0] 23 | print('Calculating normalization for {} samples ...'.format(normed_relevances.shape[0])) 24 | for idx in tqdm(np.unique(nonzero_rows)): 25 | data_idx = np.where(nonzero_rows == idx)[0] 26 | normed_row = normalize_array(normed_relevances.data[data_idx], method) 27 | normed_relevances.data[data_idx] = normed_row 28 | elif type(normed_relevances).__module__ == 'numpy' or type(normed_relevances) is list: 29 | print('Calculating normalization for {} samples ...'.format(len(normed_relevances))) 30 | for i in range(len(normed_relevances)): 31 | if type(normed_relevances[i]) is list: 32 | normed_row = normalize_array(np.array(normed_relevances[i]), method) 33 | normed_relevances[i] = list(normed_row) 34 | else: 35 | normed_row = normalize_array(normed_relevances[i], method) 36 | normed_relevances[i] = normed_row 37 | else: 38 | print('Datatype not understood!') 39 | sys.exit(1) 40 | return normed_relevances 41 | 42 | 43 | # normalizes 1D numpy array with respect to some method 44 | def normalize_array(arr, method): 45 | if method not in ['mean', 'max', 'abs_max']: 46 | print('Invalid method name! Choose one of {}'.format(['mean', 'max', 'abs_max'])) 47 | sys.exit(1) 48 | arr_cpy = copy.deepcopy(arr) 49 | if method == 'abs_max': 50 | abs_max = np.max(np.abs(arr_cpy)) 51 | if abs_max != 0: 52 | arr_cpy = 1. / abs_max * arr_cpy 53 | elif method == 'mean': 54 | mu = np.mean(arr_cpy) 55 | sigma = np.std(arr_cpy) 56 | if sigma == 0: 57 | sigma += 1e-5 58 | arr_cpy = (arr_cpy-mu)/sigma 59 | elif method == 'max': 60 | min, max = np.min(arr_cpy), np.max(arr_cpy) 61 | if max != min: 62 | arr_cpy = (arr_cpy-min)/(max-min) 63 | arr_cpy = 2 * arr_cpy - 1 64 | else: 65 | arr_cpy = (arr_cpy - min) / min 66 | return arr_cpy 67 | 68 | 69 | def to_macro(normed_relevances): 70 | if type(normed_relevances).__module__ == 'scipy.sparse.csr': 71 | macro = np.array([normed_relevances.data]) 72 | elif type(normed_relevances).__module__ == 'numpy': 73 | macro = np.array([normed_relevances.flatten]) 74 | elif type(normed_relevances) is list: 75 | macro = np.array([x for l in normed_relevances for x in l]) 76 | return macro 77 | 78 | 79 | def get_error_type(y_true, y_pred): 80 | if y_true == 0: 81 | if y_pred == 0: 82 | return 'TN' 83 | else: 84 | return 'FP' 85 | else: 86 | if y_pred == 0: 87 | return 'FN' 88 | else: 89 | return 'TP' 90 | 91 | 92 | # takes a filepath and loads the contained data for the data formats .npy, .pkl, .npz 93 | def load_npy_npz_pkl(path_to_data): 94 | _, filetype = os.path.splitext(path_to_data) 95 | if filetype == '.npz': 96 | data = sparse.load_npz(path_to_data) 97 | elif filetype == '.npy': 98 | data = np.load(path_to_data) 99 | elif filetype == '.pkl': 100 | data = pkl.load(open(path_to_data, 'rb')) 101 | else: 102 | print('Could not load filepath! Invalid datatype!') 103 | data = None 104 | return data 105 | --------------------------------------------------------------------------------