├── common ├── gpu_utills.py ├── point.py └── tensorflow_utils.py ├── computer.py ├── data ├── circles_type_8 │ ├── circles_type_8.tfrecords │ └── inference_circles_type_8.tfrecords ├── rings_9 │ └── rings_9.tfrecords └── spheres_9 │ └── spheres_9.tfrecords ├── estimators ├── __init__.py ├── data_loader.py ├── multi_gpu_utils.py ├── net_helper_funcs.py ├── nn_simple.py └── nn_trainer.py ├── julia_include ├── include_eirene_run.jl ├── julia_aux2.jl └── testing_topology_package.jl ├── pics ├── geometry_change.png └── topology_change.png ├── pipelines ├── __init__.py ├── excel_2_betti.py ├── make_anime.py ├── train_2_excel_2D.py ├── train_2_excel_3D.py ├── visualize_excel.py ├── visualize_nn_prediction.py ├── visualize_nn_simple_prediction.py └── visualize_tfrecords_dataset.py ├── readme.md ├── simulators └── simulator.py ├── transformers ├── __init__.py ├── betti_calc.py ├── betti_calc_parallel.py ├── circles_9.png ├── constant_feeder.py ├── excel_reader.py ├── excel_tensor_values_saver.py ├── excel_to_betti_feeder.py ├── excel_to_betti_feeder_parallel.py ├── excel_to_plot_feeder.py ├── grid_generator.py ├── pickle_reader.py ├── pickle_tensors_values_saver.py ├── raw_circles_generator.py ├── raw_mnist_generator.py ├── raw_rings_generator.py ├── raw_spheres_generation.py ├── rings_9.png ├── spheres_9.png ├── tensors_values_plotter.py ├── tf_reader.py ├── tf_saver.py ├── visualizer_2d.py └── visualizer_2d_loops.py └── videos ├── folding1.mp4 ├── folding2.mp4 ├── folding3.mp4 ├── folding4.mp4 ├── output_6_by_3_classes.avi └── output_6_by_3_grid.avi /common/gpu_utills.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from subprocess import Popen, PIPE 3 | 4 | 5 | def get_num_gpu(): 6 | try: 7 | p = Popen(["nvidia-smi", 8 | "--query-gpu=index", 9 | "--format=csv,noheader,nounits"], stdout=PIPE) 10 | 11 | stdout, _ = p.communicate() 12 | 13 | num_gpus = len(stdout.split(sep=b'\n')) - 1 # Num of GPU's is the number of lines produced at stdout 14 | except: 15 | warnings.warn('Cannot find the number of available GPUs. Using 0.') 16 | num_gpus = 0 17 | 18 | return num_gpus 19 | -------------------------------------------------------------------------------- /common/point.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | 4 | class Point2D(namedtuple('Point2D', ('x', 'y'))): 5 | def __add__(self, other): 6 | return Point2D(x=self.x + other.x, y=self.y + other.y) 7 | 8 | def __sub__(self, other): 9 | return Point2D(x=self.x - other.x, y=self.y - other.y) 10 | 11 | def __pow__(self, power, modulo=None): 12 | return Point2D(x=self.x ** power, y=self.y ** power) 13 | 14 | 15 | Point3D = namedtuple('Point3D', ('x', 'y', 'z')) 16 | -------------------------------------------------------------------------------- /common/tensorflow_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def print_tensor(sess, tensor_name): 4 | """prints value of a given tensor""" 5 | 6 | var = [v for v in tf.trainable_variables() if v.name == "fully_connected/weights:0"][0] 7 | print(sess.run(var)) 8 | 9 | def print_tensors(sess): 10 | """ prints values of all tensors.""" 11 | 12 | vars = [(v.name, sess.run(v)) for v in tf.trainable_variables()] 13 | print(vars) -------------------------------------------------------------------------------- /computer.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import time 3 | import pipelines 4 | from simulators.simulator import Simulator 5 | import glob 6 | import tensorflow as tf 7 | import datetime 8 | 9 | 10 | class ComputerSimulator(Simulator): 11 | """ 12 | Class for running various pipelines 13 | """ 14 | 15 | def __init__(self, **kwargs): 16 | Simulator.__init__(self, **kwargs) 17 | self.input_folder = kwargs.get('input_folder', None) 18 | self.output_folder = kwargs.get('output_folder', None) 19 | self.source = kwargs.get('source', 0) 20 | self.pipe_action = kwargs.get('pipe_action', 'predict') 21 | 22 | 23 | def run(self, **fit_params): 24 | 25 | content = None 26 | if self.output_folder is not None and not os.path.exists(self.output_folder): 27 | os.makedirs(self.output_folder) 28 | 29 | if self.pipe_action == 'predict': 30 | content = self.pipeline.predict(None) 31 | 32 | if self.pipe_action == 'fit': 33 | self.pipeline.fit(X=None, training_labels=None, **fit_params) 34 | 35 | if self.pipe_action == 'fit_transform': 36 | self.pipeline.fit_transform(None, None, **fit_params) 37 | 38 | if self.pipe_action == 'transform': 39 | content = self.pipeline.transform(X=None) 40 | 41 | if self.output_folder is not None: 42 | pass 43 | return content 44 | 45 | if __name__ == "__main__": 46 | import argparse 47 | from importlib import import_module 48 | 49 | parser = argparse.ArgumentParser() 50 | parser.add_argument('--verbose', action='store_true', help="") 51 | parser.add_argument('--plot', action='store_true', help="") 52 | parser.add_argument('--cat1', action='store_true', help="") 53 | parser.add_argument('--cat2', action='store_true', help="") 54 | parser.add_argument('--input-tf-folder', default='./data/default', 55 | help="Input folder with tfrecords dataset records") 56 | parser.add_argument('--input-tf-dataset', default='default.tfrecords', help="tfrecords data set name") 57 | 58 | parser.add_argument('--output-tf-dataset', default='./data/default/', help="Generated tfrecords dataset name") 59 | parser.add_argument('--output-tf-folder', default='./data/default/', 60 | help="Output folder with tfrecords dataset records") 61 | 62 | parser.add_argument('--input-pickle-folder', default=None, help="Input folder with model predictins") 63 | parser.add_argument('--input-pickle-predictions', default=None, help="pickle file name with model predictions") 64 | 65 | parser.add_argument('--output-pickle-predictions', default='simple', 66 | help="Generated pickle file name with model predictions") 67 | parser.add_argument('--output-pickle-folder', default='./data/simple/', help="Output folder with model predictions") 68 | 69 | parser.add_argument('--output-plots-folder', default='./data/simple/', 70 | help="Folder name to store visualization results") 71 | 72 | parser.add_argument('--pipe-action', default='transform', 73 | choices=('fit', 'predict', 'fit_transform', 'fit_predict', 'transform'), 74 | help="Which method of final pipeline stage is to apply.") 75 | 76 | parser.add_argument('--pipeline_name', default='visualize_simple_nn_prediction', choices=pipelines.__all__, 77 | help="Pipeline to load") 78 | parser.add_argument('--output', 79 | help='Output folder where model is saved') 80 | parser.add_argument('--save_freq', 81 | default=1, 82 | type=int, 83 | help='frequency of epochs to save model snapshot.') 84 | parser.add_argument('--divisor', 85 | default=3, 86 | type=int, 87 | help='when computing betti numbers reduces the number of smaples by divisor') 88 | parser.add_argument('--betti_max', 89 | default=3, 90 | type=int, 91 | help='when computing betti numbers reduces the number of smaples by divisor') 92 | parser.add_argument('--trials', 93 | default=1, 94 | type=int, 95 | help='how many time run simulation') 96 | parser.add_argument('--trial', 97 | default=0, 98 | type=int, 99 | help='initialize simulation with trial having tis value.') 100 | parser.add_argument('--id', 101 | default=0, 102 | type=int, 103 | help='id for cross referencing') 104 | parser.add_argument('--neighbors', 105 | default=14, 106 | type=int, 107 | help='number of nearest neighbors for betti numbers calculation') 108 | parser.add_argument('--validation_freq', 109 | default=1, 110 | type=int, 111 | help='frequency of validation of the model in number of iterations.') 112 | parser.add_argument('--summary_freq', 113 | type=int, 114 | default=50, 115 | help='frequency of validation of the model in number of iterations.') 116 | parser.add_argument('--learning_rate', type=float, default=0.00001) 117 | parser.add_argument('--num_gpus', type=int, default=1, help='Number of GPUs to use.') 118 | parser.add_argument('--training_epochs', type=int, default=1000, help='Number of GPUs to use.') 119 | parser.add_argument('--use_cpu', action='store_true', help='Use cpu') 120 | parser.add_argument('--activation_type', default='LeakyRelu', choices=['Relu', 'LeakyRelu', 'Tanh', 'Custom'], 121 | help='Specify actiavation function') 122 | parser.add_argument('--read_excel_from', default='list', choices=['list', 'Relu', 'LeakyRelu', 'Tanh', 'Custom'], 123 | help='Specify where to take excel files from') 124 | parser.add_argument('--gpu', default=-1, type=int, help='gpu id (-1 to disable GPU)'), 125 | parser.add_argument('--pretrained-model', 126 | default=None, 127 | help='Set pretrained model path if exists. Otherwises start from random') 128 | parser.add_argument('--model', 129 | default='base', 130 | choices=['base', 'arch_type_1', '4_by_50', '10_by_5', '10_by_6', '10_by_10', '10_by_15', '8_by_15', '2_by_15', 131 | '3_by_15', 132 | '4_by_15', '10_by_25','10_by_25', '10_by_50','5_by_15','6_by_15','7_by_15', 133 | '3_by_10_2_by_1_3_by_10', 134 | '3_by_10_3_by_1_3_by_10', 135 | '3_by_25_2_by_1_3_by_25', 136 | '3_by_25_3_by_1_3_by_25', 'nn_simple', 'nn_9_by_6_simple', 'nn_hourglass_simple', 137 | 'nn_5_by_50_simple', 'nn_9_by_30_simple', 'nn_9_by_10_simple', 'nn_9_by_15_simple', 138 | 'nn_9_by_20_simple', 'nn_9_by_50_simple', 'nn_9_by_3_simple', 'nn_12_by_3_simple', 139 | 'nn_16_by_3_simple'], 140 | help='Specifies which model to use.') 141 | parser.add_argument('--save2pkl', action='store_true'), 142 | parser.add_argument('--pretrained', default='', help='pretrained model to load') 143 | parser.add_argument('--restore_meta_graph', action='store_true', help='') 144 | parser.add_argument('--freeze_pretrained', action='store_true', help='') 145 | 146 | args = parser.parse_args() 147 | j = '' 148 | initial_path = args.output 149 | total_good_results = 0 150 | 151 | def main(trial=0): 152 | # analyze simulation results 153 | if args.pipeline_name in ['excel_2_betti_3D_parallel', 'excel_2_betti']: 154 | base = os.path.join(initial_path, args.model) 155 | # read 100% accuracy results. 156 | 157 | with open(os.path.join(base, args.read_excel_from, 'good_results.txt')) as f: 158 | content = f.readlines() 159 | 160 | # get rid of empty lines and empty characters 161 | content = [item.rstrip() for item in content if item.strip()] 162 | if len(content) < trial - 1: 163 | return -1 164 | else: 165 | try: 166 | args.pretrained = os.path.join(base, os.path.basename(content[trial])) 167 | except: 168 | print("content is shorter than trial", trial, len(content)) 169 | return -1 170 | print(args.pretrained) 171 | if args.pipeline_name in ['train_2_betti', 'excel_2_betti', 'train_2_excel_2D', 'train_2_excel_3D', 'excel_2_betti_3D_parallel']: 172 | 173 | 174 | all_subdirs = [d for d in glob.glob(args.output + '/*') if os.path.isdir(d)] 175 | 176 | if args.pretrained: 177 | args.output = os.path.join(initial_path, args.model, os.path.basename(args.pretrained)) 178 | else: 179 | if args.pretrained_model and not len(all_subdirs) == 0: 180 | 181 | timestr = time.strftime("%Y-%m-%d-%H-%M-%S") 182 | 183 | 184 | args.output = os.path.join(initial_path, args.model, 185 | timestr + "-pretrained-on-" + os.path.basename(args.pretrained_model)) 186 | args.output = os.path.join(initial_path, args.model, os.path.basename(args.pretrained_model)) 187 | else: 188 | timestr = time.strftime("%Y-%m-%d-%H-%M-%S") 189 | args.output = os.path.join(initial_path, args.model, timestr) 190 | print('Created new output directory: ' + args.output) 191 | 192 | fit_params = vars(args) 193 | fit_params.update({'julia': j}) 194 | args.pipeline = import_module('pipelines.{}'.format(args.pipeline_name)).load(args) 195 | simulator = ComputerSimulator(**vars(args)) 196 | good_result = 0 197 | try: 198 | # used in conjecture with "visualize_nn_prediction.py" pipeline 199 | # and pipe action predict. 200 | content = simulator.run(**fit_params) 201 | 202 | if content == '': 203 | good_result = 0 204 | else: 205 | good_result = 1 206 | 207 | except (KeyboardInterrupt, FileNotFoundError): 208 | print('Stopping simulation') 209 | 210 | del simulator 211 | return good_result 212 | 213 | GOOD_RESULTS_LIMIT = 15 214 | GOOD_RESULTS_LIMIT = 3 215 | 216 | now = datetime.datetime.now() 217 | for trial in range(args.trial, args.trials): 218 | print("\n" * 4) 219 | print("*" * 100) 220 | #"Stating trial:", trial, "out of total:", args.trials, "| pipeline", args.pipeline_name, "| model", args.model, args.activation_type, "| dataset", os.path.basename(args.input_tf_dataset), "| sofar good results:", total_good_results) 221 | trial_str = str(args.id) + " | Stating trial: " + str(trial) + " out of total: " + str(args.trials) + " | pipeline " + args.pipeline_name + " | model " + args.model + ', ' + args.activation_type + " | dataset " + os.path.basename(args.input_tf_dataset) + " | sofar good results: " + str(total_good_results) 222 | print(trial_str) 223 | 224 | # record of all simulation from the big bang until now. 225 | with open('./logs/simulation_runs.txt', 'a') as f: 226 | timestr = time.strftime("%Y-%m-%d-%H-%M-%S") 227 | 228 | f.write(timestr + ' -- ') 229 | f.write(trial_str) 230 | f.write("\n") 231 | 232 | # look at the name of this file to see what was the last simulation 233 | flag_file = os.path.join('./logs/', str(args.id) + '-' + args.model + '-' + str(trial) + '-' + args.activation_type + '-' + os.path.basename(args.input_tf_dataset)) 234 | with open(flag_file, 'w') as f: 235 | f.write('look at my name') 236 | 237 | print("*" * 100) 238 | print(datetime.datetime.now()) 239 | 240 | res = main(trial) 241 | 242 | # -1 is returned when no more excel files are avaliable to clculate betti numbers 243 | if res == -1: 244 | print("reached end of good_results.txt") 245 | break 246 | 247 | total_good_results += res 248 | tf.reset_default_graph() 249 | if total_good_results == GOOD_RESULTS_LIMIT: 250 | print("Maximum number of good results reached") 251 | break 252 | print("trial", trial, "is over", ":" * 90) 253 | print("\n" * 4) 254 | os.remove(flag_file) 255 | 256 | print("Done running configuration, took", datetime.datetime.now() - now) 257 | print("!" * 100) 258 | -------------------------------------------------------------------------------- /data/circles_type_8/circles_type_8.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/data/circles_type_8/circles_type_8.tfrecords -------------------------------------------------------------------------------- /data/circles_type_8/inference_circles_type_8.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/data/circles_type_8/inference_circles_type_8.tfrecords -------------------------------------------------------------------------------- /data/rings_9/rings_9.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/data/rings_9/rings_9.tfrecords -------------------------------------------------------------------------------- /data/spheres_9/spheres_9.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/data/spheres_9/spheres_9.tfrecords -------------------------------------------------------------------------------- /estimators/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class Estimator(ABC): 5 | def fit(self, x, y=None, **fit_params): 6 | return self 7 | 8 | @abstractmethod 9 | def predict(self, x): 10 | print ("predicting" * 100) 11 | raise NotImplementedError 12 | 13 | def transform(self, X): 14 | """Implements identity transform: the output equals to the input. 15 | 16 | Motivation: all members in the pipeline, up to this estimator, are expected to 17 | support transform operation. Consequently, if we add identity transform in here, we are able to run: 18 | pipeline.transform(pipeline_input) and smoothly retrieve the transformed input, which is fed into 19 | the estimator on running 'predict'. 20 | """ 21 | 22 | return X 23 | -------------------------------------------------------------------------------- /estimators/data_loader.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | vec_size = 2 7 | 8 | feature = { 9 | 'vector/point': tf.FixedLenFeature(shape=[vec_size], dtype=tf.float32), 10 | 'vector/ground_truth': tf.FixedLenFeature(shape=[1], dtype=tf.float32), 11 | } 12 | 13 | class DataLoader(object): 14 | def __init__(self, 15 | shard_path, 16 | batch_size=100, 17 | num_parallel_calls = 1, 18 | prefetch_buffer_size = 500): 19 | 20 | dataset_files = self._parse_input(shard_path) 21 | self._check_filenames(dataset_files) 22 | 23 | if len(dataset_files) == 0: 24 | raise IOError('Error: dataset files do not exist.') 25 | _, ext = os.path.splitext(dataset_files[0]) 26 | if ext == '.tfrecords': 27 | self._dataset = tf.data.TFRecordDataset(dataset_files, 28 | num_parallel_reads=num_parallel_calls) 29 | 30 | self._dataset = self._dataset.map(parse, 31 | num_parallel_calls=num_parallel_calls) 32 | 33 | self._dataset = self._dataset.batch(batch_size=batch_size) 34 | self._dataset = self._dataset.prefetch(buffer_size=prefetch_buffer_size) 35 | self._iterator = self._dataset.make_initializable_iterator() 36 | self._next_batch = self._iterator.get_next() 37 | 38 | def initialize(self, sess): 39 | sess.run(self._iterator.initializer) 40 | 41 | def _parse_input(self, shard_path): 42 | if isinstance(shard_path, str): 43 | if os.path.isdir(shard_path): 44 | shard_files = glob.glob(os.path.join(shard_path, '*.tfrecords')) 45 | else: 46 | shard_files = glob.glob(shard_path) 47 | elif isinstance(shard_path, list): 48 | shard_files = [] 49 | for f in shard_path: 50 | if os.path.isdir(f): 51 | new_path = os.path.join(f, '*.tfrecords') 52 | shard_files += self._parse_input(new_path) 53 | else: 54 | shard_files.append(f) 55 | else: 56 | raise FileNotFoundError(shard_path) 57 | return sorted(shard_files) 58 | 59 | @staticmethod 60 | def _check_filenames(filenames): 61 | for f in filenames: 62 | if not os.path.exists(f): 63 | raise FileNotFoundError(f) 64 | 65 | def load_batch(self, sess): 66 | batch = sess.run(self._next_batch) 67 | return batch 68 | 69 | 70 | def parse(example): 71 | parsed_example = tf.parse_single_example(example, features=feature) 72 | return parsed_example 73 | 74 | 75 | if __name__ == '__main__': 76 | import time 77 | shards = '/home/topology_of_dl/' 78 | batch_size = 16 79 | num_parallel_calls = 5 80 | prefetch_buffer_size = 100 81 | loopruns = 5 82 | data_loader = DataLoader(shards, 83 | batch_size=batch_size, 84 | num_parallel_calls=num_parallel_calls) 85 | 86 | with tf.Session() as sess: 87 | data_loader.initialize(sess) 88 | total_time = 0 89 | for i in range(loopruns): 90 | start_time = time.time() 91 | batch = data_loader.load_batch(sess) 92 | total_time += (time.time() - start_time) 93 | 94 | print("FPS: {0:.2f}".format(batch_size*loopruns/total_time)) -------------------------------------------------------------------------------- /estimators/multi_gpu_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | PS_OPS = ['Variable', 'VariableV2', 'AutoReloadVariable'] 4 | 5 | def assign_to_device(device, ps_device='/cpu:0'): 6 | def _assign(op): 7 | node_def = op if isinstance(op, tf.NodeDef) else op.node_def 8 | if node_def.op in PS_OPS: 9 | return "/" + ps_device 10 | else: 11 | return device 12 | 13 | return _assign 14 | 15 | 16 | def average_gradients(tower_grads): 17 | average_grads = [] 18 | for grad_and_vars in zip(*tower_grads): 19 | # Note that each grad_and_vars looks like the following: 20 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 21 | grads = [] 22 | for g, _ in grad_and_vars: 23 | # Add 0 dimension to the gradients to represent the tower. 24 | expanded_g = tf.expand_dims(g, 0) 25 | 26 | # Append on a 'tower' dimension which we will average over below. 27 | grads.append(expanded_g) 28 | 29 | # Average over the 'tower' dimension. 30 | grad = tf.concat(grads, 0) 31 | grad = tf.reduce_mean(grad, 0) 32 | 33 | # Keep in mind that the Variables are redundant because they are shared 34 | # across towers. So .. we will just return the first tower's pointer to 35 | # the Variable. 36 | v = grad_and_vars[0][1] 37 | grad_and_var = (grad, v) 38 | average_grads.append(grad_and_var) 39 | return average_grads -------------------------------------------------------------------------------- /estimators/net_helper_funcs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | import tarfile 5 | import urllib 6 | from io import BytesIO 7 | from typing import List 8 | 9 | import cv2 10 | import numpy as np 11 | import tensorflow as tf 12 | 13 | 14 | def num_of_corrects(y_true, y_pred): 15 | actual = tf.argmax(y_true, axis=1) 16 | pred = tf.argmax(y_pred, axis=1) 17 | equality = tf.equal(pred, actual) 18 | return tf.reduce_sum(tf.cast(equality, tf.int64)) 19 | 20 | 21 | 22 | 23 | 24 | def download_and_uncompress_tarball(tarball_url, dataset_dir): 25 | """Downloads the `tarball_url` and uncompresses it locally. 26 | Args: 27 | tarball_url: The URL of a tarball file. 28 | dataset_dir: The directory where the temporary files are stored. 29 | """ 30 | filename = tarball_url.split('/')[-1] 31 | filepath = os.path.join(dataset_dir, filename) 32 | 33 | def _progress(count, block_size, total_size): 34 | sys.stdout.write('\r>> Downloading %s %.1f%%' % ( 35 | filename, float(count * block_size) / float(total_size) * 100.0)) 36 | sys.stdout.flush() 37 | 38 | filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress) 39 | print() 40 | statinfo = os.stat(filepath) 41 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 42 | tarfile.open(filepath, 'r:gz').extractall(dataset_dir) 43 | 44 | 45 | def download_and_extract_model(url, ckpt_path): 46 | checkpoint_dir = os.path.dirname(ckpt_path) 47 | if not tf.gfile.Exists(checkpoint_dir): 48 | tf.gfile.MakeDirs(checkpoint_dir) 49 | if not tf.gfile.Exists(ckpt_path): 50 | download_and_uncompress_tarball(url, checkpoint_dir) 51 | -------------------------------------------------------------------------------- /estimators/nn_simple.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from abc import ABC, abstractmethod 4 | import tensorflow as tf 5 | import os 6 | import time 7 | import glob 8 | import argparse 9 | from transformers.tf_reader import TFReader 10 | from transformers.grid_generator import GridGenerator 11 | from transformers.visualizer_2d import Visualizer2D 12 | from importlib import import_module 13 | import pipelines 14 | from transformers.pickle_tensors_values_saver import PickleTensorValuesSaver 15 | from transformers.excel_tensor_values_saver import ExcelTensorValuesSaver 16 | from transformers.visualizer_2d import Visualizer2D 17 | from transformers.pickle_reader import PickleReader 18 | from transformers.tf_reader import TFReader 19 | from tensorflow.python.tools import inspect_checkpoint as chkp 20 | from tensorflow.python import pywrap_tensorflow 21 | import traceback 22 | 23 | import numbers 24 | 25 | import numpy as np 26 | 27 | from tensorflow.python.eager import context 28 | from tensorflow.python.framework import dtypes 29 | from tensorflow.python.framework import graph_util 30 | from tensorflow.python.framework import ops 31 | from tensorflow.python.framework import tensor_shape 32 | from tensorflow.python.framework import tensor_util 33 | from tensorflow.python.ops import array_ops 34 | from tensorflow.python.ops import check_ops 35 | from tensorflow.python.ops import gen_nn_ops 36 | from tensorflow.python.ops import math_ops 37 | from tensorflow.python.ops import random_ops 38 | 39 | # go/tf-wildcard-import 40 | # pylint: disable=wildcard-import 41 | from tensorflow.python.ops.gen_nn_ops import * 42 | # pylint: enable=wildcard-import 43 | 44 | from tensorflow.python.util import deprecation 45 | from tensorflow.python.util.tf_export import tf_export 46 | 47 | # Aliases for some automatically-generated names. 48 | local_response_normalization = gen_nn_ops.lrn 49 | 50 | 51 | def custom(features, alpha=0.2, name=None): 52 | """Compute the Leaky ReLU activation function. 53 | 54 | "Rectifier Nonlinearities Improve Neural Network Acoustic Models" 55 | AL Maas, AY Hannun, AY Ng - Proc. ICML, 2013 56 | https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf 57 | 58 | Args: 59 | features: A `Tensor` representing preactivation values. Must be one of 60 | the following types: `float16`, `float32`, `float64`, `int32`, `int64`. 61 | alpha: Slope of the activation function at x < 0. 62 | name: A name for the operation (optional). 63 | 64 | Returns: 65 | The activation value. 66 | """ 67 | with ops.name_scope(name, "Custom", [features, alpha]) as name: 68 | features = ops.convert_to_tensor(features, name="features") 69 | if features.dtype.is_integer: 70 | features = math_ops.to_float(features) 71 | alpha = ops.convert_to_tensor(0, dtype=features.dtype, name="alpha") 72 | return tf.identity(math_ops.minimum(math_ops.maximum(features, 0, name='CustomAmax'), 1), name=name) 73 | 74 | 75 | 76 | class NeuralNetworkSimple(ABC): 77 | 78 | @staticmethod 79 | def one_hot_encode(x, n_classes): 80 | """ 81 | One hot encode a list of sample labels. Return a one-hot encoded vector for each label. 82 | : x: List of sample Labels 83 | : return: Numpy array of one-hot encoded labels 84 | """ 85 | return np.eye(n_classes)[x] 86 | 87 | 88 | def neural_net(self, x, model_type, activation_type): 89 | 90 | ACTIVATION_NAMES = ['LeakyRelu', 'Relu', 'Tanh', 'Custom'] 91 | ACTIVATIONS = [tf.nn.leaky_relu, tf.nn.relu, tf.nn.tanh, custom] 92 | 93 | 94 | MODELS = [ '4_by_50', 95 | '2_by_15', 96 | '3_by_15', 97 | '10_by_5', 98 | '10_by_6', 99 | '10_by_10', 100 | '10_by_15', 101 | '8_by_15', 102 | '4_by_15', 103 | '10_by_25', 104 | '10_by_25', 105 | '10_by_50', 106 | '3_by_10_2_by_1_3_by_10', 107 | '3_by_10_3_by_1_3_by_10', 108 | '3_by_25_2_by_1_3_by_25', 109 | '3_by_25_3_by_1_3_by_25', 110 | '5_by_15', 111 | '6_by_15', 112 | '7_by_15'] 113 | 114 | MODEL_PARAMS = [(4, 50), 115 | (2, 15), 116 | (3, 15), 117 | (10, 5), 118 | (10, 6), 119 | (10, 10), 120 | (10, 15), 121 | (8,15), 122 | (4,15), 123 | (10, 25), 124 | (10, 25), 125 | (10, 50), 126 | (3,10, 2), 127 | (3, 10, 3), 128 | (3, 25, 2), 129 | (3, 25, 3), 130 | (5, 15), 131 | (6, 15), 132 | (7, 15) 133 | ] 134 | 135 | 136 | i = ACTIVATION_NAMES.index(activation_type) 137 | activation = ACTIVATIONS[i] 138 | 139 | j = MODELS.index(model_type) 140 | model_params = MODEL_PARAMS[j] 141 | 142 | width = model_params[1] 143 | length = model_params[0] 144 | num_classes = 2 145 | 146 | layers = [] 147 | layers.append(activation(tf.layers.dense(x, width))) 148 | for i in range(0, 20): 149 | layers.append(activation(tf.layers.dense(layers[i], width))) 150 | 151 | # identity to make the name look nice 152 | out_layer = tf.identity(tf.layers.dense(layers[length-2], num_classes)) 153 | 154 | print("Names of the layer in a network") 155 | print("input:", x) 156 | for j in range(length-1): 157 | print("hidden layer:", layers[j]) 158 | print("outer most layer before soft max:", out_layer) 159 | 160 | return out_layer 161 | 162 | def __init__(self, 163 | model_path: str = 'models_folder', 164 | num_input = 2, 165 | **fit_params): 166 | 167 | self.model_type = fit_params.get('model', '10_by_5') 168 | self.activation_type = fit_params.get('activation_type', 'LeakyRelu') 169 | self.activation_name = self.activation_type 170 | 171 | self._model_path = model_path 172 | self.learning_rate = fit_params.get('learning_rate', 0.001) 173 | self.restore_meta_graph = fit_params.get('restore_meta_graph', False) 174 | self.freeze_pretrained = fit_params.get('freeze_pretrained', False) 175 | self.verbose = fit_params.get('verbose', False) 176 | output_folder = fit_params.get('output', './output') 177 | 178 | # directory where we are going to save training logs 179 | logs_path = os.path.join(output_folder, 'logs') 180 | os.makedirs(logs_path, exist_ok=True) 181 | self.logs_path = logs_path 182 | 183 | # prefix where we are going to snapshots of our save trained models 184 | os.makedirs(os.path.join(output_folder, 'snapshots'), exist_ok=True) 185 | model_snapshot_save_path = os.path.join(output_folder, 'snapshots','snapshot') 186 | self.model_snapshot_save_path = model_snapshot_save_path 187 | 188 | num_input = num_input 189 | num_classes = 2 190 | self.X = tf.placeholder("float", [None, num_input]) 191 | self.Y = tf.placeholder("float", [None, num_classes]) 192 | 193 | 194 | self.logits = self.neural_net(self.X, self.model_type, self.activation_type) 195 | self.prediction = tf.nn.softmax(self.logits) 196 | 197 | # Define loss and optimizer 198 | self.loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y)) 199 | 200 | self.step = tf.Variable(0, trainable=False) 201 | rate = tf.train.exponential_decay(self.learning_rate, self.step, 2500, 0.5) 202 | # rate = tf.train.exponential_decay(self.learning_rate, self.step, 4000, 0.5) 203 | self.optimizer = tf.train.AdamOptimizer(learning_rate=rate) 204 | 205 | self.train_op = self.optimizer.minimize(self.loss_op, global_step=self.step) 206 | 207 | # Evaluate model 208 | self.correct_pred = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.Y, 1)) 209 | self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32)) 210 | 211 | # Initialize the variables (i.e. assign their default value) 212 | self.init = tf.global_variables_initializer() 213 | 214 | # Create a summary to monitor cost tensor 215 | tf.summary.scalar("loss", self.loss_op) 216 | # Create a summary to monitor accuracy tensor 217 | tf.summary.scalar("accuracy", self.accuracy) 218 | # Merge all summaries into a single op 219 | self.merged_summary_op = tf.summary.merge_all() 220 | self._pretrained_model = fit_params.get('pretrained_model') 221 | 222 | self._saver = tf.train.Saver(save_relative_paths=True, max_to_keep=1) 223 | 224 | config = tf.ConfigProto(allow_soft_placement=True) 225 | config.gpu_options.allow_growth = True 226 | self._sess = tf.Session(config=config) 227 | self._model_loaded = False 228 | 229 | def _load_model_decorator(func): 230 | """ 231 | Decorator function for loading the TF model. Used before making predictions. 232 | """ 233 | 234 | def wrapper(self, *arg, **kw): 235 | if not self._model_loaded: 236 | self._load_model(os.path.join(self._model_path)) 237 | 238 | self._model_loaded = True 239 | 240 | return func(self, *arg, **kw) 241 | 242 | return wrapper 243 | 244 | def _predict_one(self, input_vec): 245 | feed_dict = {self._in: input_vec} 246 | try: 247 | out_all = self._sess.run(self._pred, feed_dict) 248 | except ValueError as e: 249 | print(str(e)) 250 | raise ValueError( 251 | "Something went wrong") 252 | out = out_all 253 | return out 254 | 255 | @_load_model_decorator 256 | def transform(self, input_vec, model_path = None): 257 | """ Return prediction and data""" 258 | 259 | return_dic = False 260 | if isinstance(input_vec, dict): 261 | contents = input_vec 262 | input_vec = input_vec['test_dataset']['samples'] 263 | return_dic = True 264 | 265 | if model_path: 266 | self._model_path = model_path 267 | 268 | if type(input_vec) is np.ndarray and input_vec.shape[0] == 1: 269 | if return_dic: 270 | contents.update({'nn_predictions_on_test_dataset':{'labels': self._predict_one(input_vec), 271 | 'samples': input_vec, 272 | 'labels_names': [item.name.replace(':', '-') for item in self._pred]}}) 273 | return contents 274 | 275 | return {'labels': self._predict_one(input_vec), 276 | 'samples': input_vec, 277 | 'labels_names': [item.name.replace(':', '-') for item in self._pred]} 278 | 279 | elif type(input_vec) is np.ndarray: 280 | 281 | if return_dic: 282 | contents.update({'nn_predictions_on_test_dataset':{'labels': [self._predict_one(input_vec[np.newaxis, i, :]) for i in range(input_vec.shape[0])], 283 | 'samples': [input_vec[i, :] for i in range(input_vec.shape[0])], 284 | 'labels_names': [item.name.replace(':', '-') for item in self._pred]}}) 285 | return contents 286 | 287 | return {'labels': [self._predict_one(input_vec[np.newaxis, i, :]) for i in range(input_vec.shape[0])], 288 | 'samples': [input_vec[i, :] for i in range(input_vec.shape[0])], 289 | 'labels_names': [item.name.replace(':', '-') for item in self._pred]} 290 | 291 | elif type(input_vec) is list: 292 | 293 | if return_dic: 294 | contents.update({'nn_predictions_on_test_dataset':{'tensors_values': [self._predict_one(vec) for vec in input_vec], 295 | 'samples': [vec for vec in input_vec], 296 | 'labels_names': [item.name.replace(':', '-') for item in self._pred]}}) 297 | return contents 298 | 299 | return {'tensors_values': [self._predict_one(vec) for vec in input_vec], 300 | 'samples': [vec for vec in input_vec], 301 | 'labels_names': [item.name.replace(':', '-') for item in self._pred]} 302 | else: 303 | raise ValueError("Wrong type of input argument: {}".format(input_vec)) 304 | 305 | def save(self, step: int): 306 | """ 307 | Save current model and variables into the given output folder. 308 | It will give an error if the folder already exists. 309 | """ 310 | self._saver.save(self._sess, self.model_snapshot_save_path, global_step=step) 311 | 312 | print("Saved model to: {}".format(self.model_snapshot_save_path)) 313 | 314 | def fit(self, training_samples, 315 | training_labels, 316 | validation_samples, 317 | validation_labels, 318 | args, **fit_params): 319 | 320 | prev_acc = 0 321 | 322 | labels = self.one_hot_encode(training_labels.astype(int), 2) 323 | save_nn_predictions = PickleTensorValuesSaver(content_name='nn_predictions_on_validation_dataset') 324 | save_nn_predictions2excel = ExcelTensorValuesSaver(content_name='nn_predictions_on_validation_dataset') 325 | save_nn_predictions._output_filename = os.path.join(args.output, 'pickles', 326 | (os.path.basename(args.input_tf_dataset)).split('.')[0] + '-' + "last_prediction" + ".pkl") 327 | 328 | save_nn_predictions2excel._output_filename = os.path.join(args.output, 'excels') 329 | 330 | def plot_grid(step): 331 | 332 | visualizer_grid_and_data_base_on_which_nn_was_trained = Visualizer2D( 333 | output_filename=os.path.join(args.output, 'plots', 334 | (os.path.basename(args.input_tf_dataset)).split('.')[0] + '-' + str(step) + ".html")) 335 | 336 | x = self.transform(validation_samples) 337 | x = save_nn_predictions.transform({'nn_predictions_on_validation_dataset': x}) 338 | x = save_nn_predictions2excel.transform(x) 339 | dic = {'training_dataset': {'samples': training_samples, 'labels': training_labels}} 340 | x.update(dic) 341 | x = visualizer_grid_and_data_base_on_which_nn_was_trained.transform(x) 342 | 343 | """ 344 | TODO: add ability to load pretrained model 345 | """ 346 | 347 | best_acc = 0 348 | 349 | num_steps = fit_params.get('training_epochs', 1000) 350 | display_step = fit_params.get('summary_freq', 100) 351 | 352 | 353 | # Run the initializer 354 | self._sess.run(self.init) 355 | 356 | loss, acc, summary = self._sess.run([self.loss_op, self.accuracy, self.merged_summary_op], 357 | feed_dict={self.X: training_samples, self.Y: labels}) 358 | print(acc, "acc", loss, "loss") 359 | 360 | # op to write logs to Tensorboard 361 | summary_writer = tf.summary.FileWriter(self.logs_path, graph=tf.get_default_graph()) 362 | if self._pretrained_model: 363 | print("-- loading pretrained model" * 100) 364 | self._load_model(self._pretrained_model) 365 | self._model_loaded = True 366 | loss, acc, summary = self._sess.run([self.loss_op, self.accuracy, self.merged_summary_op], 367 | feed_dict={self.X: training_samples, self.Y: labels}) 368 | print(acc, "acc", loss, "loss") 369 | 370 | 371 | # TODO: uncomment if you want to show classification by pretrained model 372 | 373 | for step in range(1, num_steps + 1): 374 | 375 | _ = self._sess.run(self.train_op, feed_dict={self.X: training_samples, self.Y: labels}) 376 | 377 | if step % display_step == 0 or step == 1: 378 | # Calculate batch loss and accuracy 379 | loss, acc, summary = self._sess.run([self.loss_op, self.accuracy, self.merged_summary_op], 380 | feed_dict={self.X: training_samples, self.Y: labels}) 381 | 382 | if acc > best_acc: 383 | best_acc = acc 384 | 385 | summary_writer.add_summary(summary, step) 386 | print("Step " + str(step) + ", Minibatch Loss= " + \ 387 | "{:.5f}".format(loss) + ", Training Accuracy= " + \ 388 | "{:.4f}".format(acc) + ', Learning rate= %f' % (self._sess.run(self.optimizer._lr)) + \ 389 | ", Best Accuracy= " + "{:.5f}".format(best_acc)) 390 | 391 | 392 | if prev_acc == best_acc : 393 | print("No progress, quit", step) 394 | break; 395 | 396 | if best_acc == 1.0: 397 | print("Best accuracy achieved", step) 398 | self.save(step=step) 399 | if not os.path.exists(os.path.join(os.path.dirname(os.path.dirname(self.logs_path)), self.activation_name)): 400 | os.makedirs(os.path.join(os.path.dirname(os.path.dirname(self.logs_path)), self.activation_name)) 401 | 402 | with open(os.path.join(os.path.dirname(os.path.dirname(self.logs_path)), self.activation_name,'good_results.txt'), 'a') as f: 403 | f.write(os.path.dirname(self.logs_path)) 404 | f.write('\n') 405 | print(self.logs_path) 406 | break; 407 | 408 | prev_acc = best_acc 409 | 410 | # draw decision boundaries 411 | # 412 | if self.verbose: 413 | if step % (1 * display_step) == 0: 414 | self._model_path = os.path.dirname(self.model_snapshot_save_path) 415 | plot_grid(step) 416 | 417 | print("Optimization Finished!", step) 418 | 419 | if not os.path.exists(os.path.join(os.path.dirname(os.path.dirname(self.logs_path)), self.activation_name)): 420 | os.makedirs(os.path.join(os.path.dirname(os.path.dirname(self.logs_path)), self.activation_name)) 421 | 422 | with open(os.path.join(os.path.dirname(os.path.dirname(self.logs_path)), self.activation_name,'all_results.txt'), 'a') as f: 423 | f.write(os.path.dirname(self.logs_path) + ' ' + str(acc)) 424 | f.write('\n') 425 | print(self.logs_path) 426 | 427 | print("Testing Accuracy:", \ 428 | self._sess.run(self.accuracy, feed_dict={self.X: training_samples, self.Y: labels})) 429 | 430 | return best_acc 431 | 432 | def __enter__(self): 433 | return self 434 | 435 | def __exit__(self, exc_type, exc_val, exc_tb): 436 | if self._sess is not None: 437 | self._sess.close() 438 | 439 | 440 | def _load_model(self, save_path, verbose=False): 441 | """ 442 | Loads a saved TF model from a file. 443 | Args: 444 | save_path: The save path of the saved session, returned by Saver.save(). 445 | Returns: 446 | None 447 | """ 448 | graph = tf.get_default_graph() 449 | metagraph_file = None 450 | for file in sorted(os.listdir(save_path), reverse=True): 451 | if file.endswith('.meta'): 452 | metagraph_file = os.path.join(save_path, file) 453 | break 454 | if metagraph_file is None: 455 | raise FileNotFoundError("Cant find metagraph file in: {}".format(save_path)) 456 | tvars = tf.trainable_variables() 457 | 458 | # TODO: fix. We initialize the model for training, but training should only happen when we run fit. 459 | # there is some weird things going on here. 460 | 461 | self._sess.run(self.init) 462 | tvars_vals = self._sess.run(tvars) 463 | 464 | if(self.restore_meta_graph): 465 | print("Loading metagraph from '%s'..." % metagraph_file) 466 | saver = tf.train.import_meta_graph(metagraph_file, clear_devices=True) 467 | self._saver = saver 468 | 469 | self._restore_weights(save_path) 470 | 471 | def _restore_weights(self, save_path, verbose=False): 472 | """ 473 | Loads a saved TF model weights from a file. 474 | Args: 475 | save_path: The save path for the saved model, returned by Saver.save(). 476 | Returns: 477 | None 478 | """ 479 | latest_checkpoint_file = tf.train.latest_checkpoint(save_path) 480 | if latest_checkpoint_file is None: 481 | raise FileNotFoundError("Cant find checkpoint in path: {}".format(save_path)) 482 | 483 | print("Loading model weights from: '%s'..." % latest_checkpoint_file) 484 | 485 | if(not(self.restore_meta_graph)): 486 | # read tensors one by one to dict 487 | loaded_tensors_dict= {} 488 | reader = pywrap_tensorflow.NewCheckpointReader(latest_checkpoint_file) 489 | var_to_shape_map = reader.get_variable_to_shape_map() 490 | for key in sorted(var_to_shape_map): 491 | loaded_tensors_dict.update({key+':0': reader.get_tensor(key)}) 492 | 493 | # tf.get_collection(tf.GraphKeys.VARIABLES) 494 | vars = {v.name: v for v in tf.trainable_variables()} 495 | updated_vars = vars.copy() 496 | for key, value in vars.items(): 497 | if key in loaded_tensors_dict.keys(): 498 | try : 499 | self._sess.run(tf.assign(ref=vars[key], value=loaded_tensors_dict[key])) 500 | except Exception: 501 | traceback.print_exc() 502 | else: 503 | if(self.freeze_pretrained): 504 | del(updated_vars[key]) 505 | self.trainable=updated_vars 506 | self.train_op = self.optimizer.minimize(self.loss_op, global_step=self.step, var_list=self.trainable) 507 | tvars = tf.trainable_variables() 508 | tvars_vals = self._sess.run(tvars) 509 | 510 | else: 511 | self._saver.restore(self._sess, save_path=latest_checkpoint_file) 512 | 513 | self.input_name = 'Placeholder:0' 514 | self.output_names = ['ArgMax:0', 'Placeholder:0'] 515 | self.output_names.append(self.activation_name + ':0') 516 | for k in range(1, 11): 517 | self.output_names.append(self.activation_name + '_' + str(k) +':0') 518 | self.output_names.append('Identity' + ':0') 519 | self.output_names.append('Softmax' + ':0') 520 | self._pred = [] 521 | for output_name in self.output_names: 522 | self._pred.append(tf.get_default_graph().get_tensor_by_name(output_name)) 523 | self._in = tf.get_default_graph().get_tensor_by_name(self.input_name) 524 | 525 | 526 | if __name__ == '__main__': 527 | """ 528 | Run this script to train a model 529 | """ 530 | 531 | parser = argparse.ArgumentParser() 532 | 533 | model_choices = ['base', 534 | 'test', 535 | 'arch_type_1', 536 | 'nn_simple', 537 | 'nn_9_by_3_simple', 538 | 'nn_10_by_3_simple', 539 | 'nn_16_by_3_simple', 540 | 'nn_18_by_3_simple'] 541 | 542 | parser.add_argument('--input-tf-dataset', default='simple.tfrecords', help="tfrecords data set name") 543 | parser.add_argument('--output', help='Output folder where model is saved') 544 | parser.add_argument('--model', default='base',choices=model_choices, help='Specifies which model to use for the image stream.') 545 | parser.add_argument('--save_freq', default=1, help='frequency of epochs to save model snapshot.') 546 | parser.add_argument('--validation_freq', default=1, help='frequency of validation of the model in number of iterations.') 547 | parser.add_argument('--summary_freq', type=int, default=50, help='frequency of validation of the model in number of iterations.') 548 | parser.add_argument('--pretrained_model', default=None, help='Set pretrained model path if exists. Otherwises start from random') 549 | parser.add_argument('--batch_size', type=int, default=500, help='Batch size in training.') 550 | parser.add_argument('--learning_rate', type=float, default=0.00001) 551 | parser.add_argument('--num_gpus', type=int, default=1, help='Number of GPUs to use.') 552 | parser.add_argument('--training_epochs', type=int, default=1000, help='Number of GPUs to use.') 553 | parser.add_argument('--use_cpu', action='store_true', help='Use cpu') 554 | parser.add_argument('--restore_meta_graph', action='store_true', help='') 555 | parser.add_argument('--freeze_pretrained', action='store_true', help='') 556 | parser.add_argument('--verbose', action='store_true', help='') 557 | 558 | args = parser.parse_args() 559 | 560 | initial_path = args.output 561 | 562 | 563 | all_subdirs = [d for d in glob.glob(args.output + '/*') if os.path.isdir(d)] 564 | if args.pretrained_model and not len(all_subdirs) == 0: # TODO now it is hacked make this as it supposed to be. 565 | timestr = time.strftime("%Y-%m-%d-%H-%M-%S") 566 | args.output = os.path.join(initial_path, args.model, timestr + "-pretrained-on-" + os.path.basename(args.pretrained_model) ) 567 | model_path = None 568 | else: 569 | timestr = time.strftime("%Y-%m-%d-%H-%M-%S") 570 | args.output = os.path.join(initial_path, args.model, timestr) 571 | model_path = None 572 | print('Created new output directory: ' + args.output) 573 | 574 | fit_params = vars(args) 575 | nn = NeuralNetworkSimple(model_path=model_path, **fit_params) 576 | nn.fit(args, **fit_params) -------------------------------------------------------------------------------- /estimators/nn_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from abc import ABC 3 | import os 4 | import time 5 | import glob 6 | import argparse 7 | import numpy as np 8 | from transformers.grid_generator import GridGenerator 9 | import shutil 10 | 11 | class NeuralNetworkTrainer(ABC): 12 | def __init__(self, 13 | nn, 14 | content_name, 15 | args, 16 | **fit_params): 17 | 18 | self.args = args 19 | self.fit_params = fit_params 20 | self.nn = nn 21 | self.content_name = content_name 22 | 23 | def transform(self, content): 24 | 25 | def _get_lables_and_data_tf(ds): 26 | """ parse tf records to points and labels. """ 27 | 28 | labels = np.array(ds['labels']) 29 | labels.shape = (labels.shape[0],) 30 | samples = np.array(ds['samples']) 31 | return samples, labels 32 | 33 | training_samples, training_labels = _get_lables_and_data_tf(content['training_dataset']) 34 | validation_samples, validation_labels = _get_lables_and_data_tf(content['validation_dataset']) 35 | 36 | best_acc = self.nn.fit(training_samples=training_samples, 37 | training_labels=training_labels, 38 | validation_samples=validation_samples, 39 | validation_labels=validation_labels, 40 | args=self.args, 41 | **self.fit_params) 42 | 43 | if best_acc == 1.0: 44 | # if True: 45 | dic = {self.content_name : self.nn.transform(content['test_dataset']['samples'])} 46 | content.update(dic) 47 | return content 48 | else: 49 | # remove simulation directory if not successful 50 | print("accuracy less than 100 percent, remove directory:", self.fit_params['output']) 51 | if os.path.basename(self.fit_params['output'])[0:4] == '2019': # protection to insure we delete diretories that start with 2019 52 | shutil.rmtree(self.fit_params['output']) 53 | else: 54 | print('WARNING! - attempted to delete and discovered output directory is weired') 55 | return '' 56 | 57 | 58 | def fit(self, content): 59 | # no fit just does transform 60 | return self.transform(content) 61 | 62 | if __name__ == '__main__': 63 | """ 64 | Run this script to train a model 65 | """ 66 | 67 | parser = argparse.ArgumentParser() 68 | 69 | model_choices = ['base', 70 | 'test', 71 | 'arch_type_1', 72 | 'nn_simple', 73 | 'nn_9_by_3_simple', 74 | 'nn_12_by_3_simple', 75 | 'nn_16_by_3_simple', 76 | 'nn_18_by_3_simple'] 77 | 78 | parser.add_argument('--input-tf-dataset', default='simple.tfrecords', help="tfrecords data set name") 79 | parser.add_argument('--output', help='Output folder where model is saved') 80 | parser.add_argument('--model', default='base',choices=model_choices, help='Specifies which model to use for the image stream.') 81 | parser.add_argument('--save_freq', default=1, help='frequency of epochs to save model snapshot.') 82 | parser.add_argument('--validation_freq', default=1, help='frequency of validation of the model in number of iterations.') 83 | parser.add_argument('--summary_freq', type=int, default=50, help='frequency of validation of the model in number of iterations.') 84 | parser.add_argument('--pretrained_model', default=None, help='Set pretrained model path if exists. Otherwises start from random') 85 | parser.add_argument('--batch_size', type=int, default=500, help='Batch size in training.') 86 | parser.add_argument('--learning_rate', type=float, default=0.00001) 87 | parser.add_argument('--num_gpus', type=int, default=1, help='Number of GPUs to use.') 88 | parser.add_argument('--training_epochs', type=int, default=1000, help='Number of GPUs to use.') 89 | parser.add_argument('--use_cpu', action='store_true', help='Use cpu') 90 | parser.add_argument('--restore_meta_graph', action='store_true', help='') 91 | parser.add_argument('--freeze_pretrained', action='store_true', help='') 92 | parser.add_argument('--verbose', action='store_true', help='') 93 | 94 | args = parser.parse_args() 95 | initial_path = args.output 96 | 97 | all_subdirs = [d for d in glob.glob(args.output + '/*') if os.path.isdir(d)] 98 | 99 | if args.pretrained_model and not len(all_subdirs) == 0: # TODO now it is hacked make this as it supposed to be. 100 | 101 | timestr = time.strftime("%Y-%m-%d-%H-%M-%S") 102 | args.output = os.path.join(initial_path, args.model, timestr + "-pretrained-on-" + os.path.basename(args.pretrained_model) ) 103 | 104 | else: 105 | timestr = time.strftime("%Y-%m-%d-%H-%M-%S") 106 | args.output = os.path.join(initial_path, args.model, timestr) 107 | print('Created new output directory: ' + args.output) 108 | 109 | fit_params = vars(args) 110 | nn_trainer = NeuralNetworkTrainer(args, **fit_params) 111 | nn_trainer.transform([]) 112 | -------------------------------------------------------------------------------- /julia_include/include_eirene_run.jl: -------------------------------------------------------------------------------- 1 | # update link to point to local Eirene library installation 2 | include("/julia_include/Eirene_jl/src/Eirene.jl") 3 | using NPZ 4 | 5 | fn2 = include("./julia_aux2.jl") 6 | 7 | neihbors_betti_file = ARGS[1] 8 | neihbors_curve_file = ARGS[2] 9 | file_repres_file = ARGS[3] 10 | maxdim = ARGS[4] 11 | mat_file = ARGS[5] 12 | 13 | calc_betti(neihbors_betti_file,neihbors_curve_file,file_repres_file, maxdim ,mat_file) 14 | exit() 15 | -------------------------------------------------------------------------------- /julia_include/julia_aux2.jl: -------------------------------------------------------------------------------- 1 | function calc_betti(filepath_betti, file_path_curve, file_repres, maxdim, mat) 2 | print("\nsymmetric mat") 3 | print("\nstarted calculating betti numbers for mat") 4 | 5 | if isa(mat, Array) 6 | print("\nmat is a matrix") 7 | C = Eirene.eirene(mat, maxdim=maxdim, minrad=1, maxrad=2, numrad=1, record = "cyclerep") 8 | else 9 | print("\nmat is a file") 10 | mat = npzread(mat) 11 | maxdim = parse(Int64, maxdim) 12 | C = Eirene.eirene(mat, maxdim=maxdim, minrad=1, maxrad=2, numrad=1, record = "cyclerep") 13 | end 14 | 15 | print("\nDone calculating betti numbers\n") 16 | 17 | dim = maxdim 18 | while dim >=0 19 | t = Eirene.barcode(C, dim = dim) 20 | 21 | print("\nBetti neighbors filtration calc:\n") 22 | print(t) 23 | 24 | # save results to csv file. 25 | f = open(filepath_betti * "_" * string(dim),"w") 26 | 27 | print("\nstoring betti numbers in \n") 28 | print(filepath_betti) 29 | print("\n") 30 | 31 | for i in 1:length(t[:,1]) 32 | write(f, @sprintf("%20.16f, %20.16f\n", t[i, 1], t[i,2])) 33 | end 34 | close(f) 35 | 36 | 37 | print("\nDone storing betti numbers \n") 38 | 39 | print("\n Extract betti curve \n") 40 | B = Eirene.betticurve(C, dim=dim) 41 | print("\nbetti curve\n") 42 | print(B) 43 | 44 | f = open(file_path_curve * "_" * string(dim), "w") 45 | print("\nstoring betti numbers in \n") 46 | print(file_path_curve) 47 | print("\n") 48 | 49 | for i in 1:length(B[:]) 50 | write(f, @sprintf("%20.16f, %20.16f\n", i, B[i])) 51 | end 52 | 53 | close(f) 54 | print("\nDone, storing betti numbers in \n") 55 | print("\nDone all mat calculations.\n") 56 | 57 | print("\nExtracting representative.\n") 58 | 59 | f = open(file_repres * "_" * string(dim), "w") 60 | print("\nstoring representatives in \n") 61 | print(file_repres) 62 | print("\n") 63 | 64 | for i in 1:length(t[:,1]) 65 | S = Eirene.classrep(C, class=i, dim=dim) 66 | write(f, "$S\n") 67 | end 68 | 69 | close(f) 70 | print("\nDone, storing representatives " * string(dim)* " .\n\n\n\n\n\n") 71 | dim = dim - 1 72 | end 73 | end -------------------------------------------------------------------------------- /julia_include/testing_topology_package.jl: -------------------------------------------------------------------------------- 1 | # load Eirene library 2 | include("C:\\topology_prj\\framework_repo_f\\topology_of_dl\\julia\\Eirene.jl\\src\\Eirene.jl") 3 | 4 | # Substitutet "Eirene.jl\src\examples\test.csv" csv file with 5 | # the desired point cloud data and use built in "eirenefilepath" method to load that csv 6 | # filepath = Eirene.eirenefilepath("test") 7 | filepath = joinpath(@__DIR__,"examples/test.csv") 8 | # read csv 9 | pointcloud = Eirene.readcsv(filepath) 10 | 11 | # evaluate persitant homology of dimensions up to "maxdim", where persistance starts at 12 | # radius "minrad" up to radius "maxrad" and is evaluated at "numrad" equaly spaces points. 13 | C = Eirene.eirene(pointcloud,maxdim=1, minrad=0.15, maxrad=0.8, numrad=2, model="pc") 14 | 15 | # display peristance intervals of dimension "dim" 16 | t = Eirene.barcode(C, dim = 0) 17 | 18 | # save results to csv file. 19 | f = open("C:\\topology_prj\\framework_repo_f\\topology_of_dl\\julia\\bars.csv","w") 20 | for i in 1:length(t[:,1]) 21 | write(f, @sprintf("%20.16f, %20.16f\n", t[i, 1], t[i,2])) 22 | end 23 | close(f) -------------------------------------------------------------------------------- /pics/geometry_change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/pics/geometry_change.png -------------------------------------------------------------------------------- /pics/topology_change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/pics/topology_change.png -------------------------------------------------------------------------------- /pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | import glob 2 | from inspect import signature 3 | from os.path import dirname, basename, isfile 4 | from typing import Any, Union, GenericMeta 5 | 6 | from sklearn.pipeline import Pipeline 7 | 8 | modules = glob.glob(dirname(__file__) + "/*.py") 9 | __all__ = [basename(f)[:-3] for f in modules if isfile(f) and not f.endswith('__init__.py')] 10 | 11 | 12 | class InvalidPipelineException(TypeError): 13 | pass 14 | 15 | 16 | def validate(pipeline: Pipeline): 17 | """ 18 | This is a very basic Pipeline validator. It checks whether the input/output types of steps are correct. 19 | 20 | At the moment it only supports simple types and Union. A full validator is much more complicated. 21 | There's a tool that can potentially be used for this purpose: http://www.mypy-lang.org 22 | """ 23 | output_type = None 24 | for step_name, step in pipeline.steps: 25 | try: 26 | sig = signature(step.transform) 27 | except AttributeError: 28 | sig = signature(step.predict) 29 | 30 | input_type = next(iter(sig.parameters.values())).annotation 31 | 32 | if output_type is None: # first element of pipeline 33 | output_type = input_type 34 | 35 | allowed_input_types = input_type.__args__ if type(input_type) == type(Union) else [input_type] 36 | allowed_output_types = output_type.__args__ if type(output_type) == type(Union) else [output_type] 37 | 38 | any_valid = False 39 | for x in allowed_output_types: 40 | if x is Any: 41 | any_valid = True 42 | break 43 | 44 | if x in allowed_input_types: 45 | any_valid = True 46 | break 47 | 48 | for parent in allowed_input_types: 49 | if type(parent) is GenericMeta: 50 | # TODO I couldn't find a better way of converting typing.List to list 51 | parent = list 52 | 53 | if issubclass(x, parent) or issubclass(parent, x): 54 | any_valid = True 55 | break 56 | 57 | if not any_valid: 58 | raise InvalidPipelineException( 59 | "Input type of `{}` step does not match output of previous step".format(step_name)) 60 | 61 | output_type = sig.return_annotation 62 | 63 | return True 64 | -------------------------------------------------------------------------------- /pipelines/excel_2_betti.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from sklearn.pipeline import Pipeline 3 | import os 4 | from transformers.visualizer_2d_loops import Visualizer2DLoops 5 | from transformers.excel_to_betti_feeder_parallel import ExcelToBettiFeederParallel 6 | from transformers.constant_feeder import ConstantFeeder 7 | from transformers.grid_generator import GridGenerator 8 | from estimators.nn_simple import NeuralNetworkSimple 9 | from transformers.excel_tensor_values_saver import ExcelTensorValuesSaver 10 | 11 | def load(args: Namespace) -> Pipeline: 12 | log_path = args.output 13 | fit_params = vars(args) 14 | activation_name = args.activation_type 15 | 16 | if args.cat2: 17 | excels_cat_2_a = ['cat2_Placeholder-0.csv', 'cat2_%s-0.csv' % (activation_name)] 18 | excels_cat_2_b = ['cat2_%s_%s-0.csv' % (activation_name, i) for i in range(1, int((args.model.split('_')[0])) - 1)] 19 | excels_cat_2_c = ['cat2_Identity-0.csv'] 20 | 21 | excels_cat_2 = excels_cat_2_a + excels_cat_2_b + excels_cat_2_c 22 | else: 23 | excels_cat_2 = [] 24 | 25 | if args.cat1: 26 | excels_cat_1_a = ['cat1_Placeholder-0.csv', 'cat1_%s-0.csv' % (activation_name)] 27 | excels_cat_1_b = ['cat1_%s_%s-0.csv' % (activation_name, i) for i in 28 | range(1, int((args.model.split('_')[0])) - 1)] 29 | excels_cat_1_c = ['cat1_Identity-0.csv'] 30 | 31 | excels_cat_1 = excels_cat_1_a + excels_cat_1_b + excels_cat_1_c 32 | 33 | else: 34 | excels_cat_1 = [] 35 | 36 | pipeline_steps = [ ('tensor_names', ConstantFeeder(constant={})), 37 | ('excel_to_betti_feeder', ExcelToBettiFeederParallel(model_path=log_path, julia=fit_params['julia'], 38 | excel_names=excels_cat_2 + excels_cat_1, divisor = fit_params['divisor'], neighbors= fit_params['neighbors'], max_dim=fit_params['betti_max'])) 39 | ] 40 | 41 | return Pipeline(steps=pipeline_steps) 42 | -------------------------------------------------------------------------------- /pipelines/make_anime.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from sklearn.pipeline import Pipeline 3 | from estimators.nn_trainer import NeuralNetworkTrainer 4 | import os 5 | from transformers.pickle_tensors_values_saver import PickleTensorValuesSaver 6 | from transformers.visualizer_2d import Visualizer2D 7 | from transformers.pickle_reader import PickleReader 8 | from transformers.tf_reader import TFReader 9 | from transformers.excel_reader import ExcelReader 10 | from transformers.layer_output_plotter import LayerOutputPlotter 11 | # from transformers.betti_calc import BettiCalc 12 | 13 | def load(args: Namespace) -> Pipeline: 14 | fit_params = vars(args) 15 | pipeline_steps = [ 16 | # read predictions ad for each tensor store prediction results in excel: 17 | ('excel_reader', ExcelReader(input_filename=args.output)) 18 | 19 | # visualization part: 20 | #('read_data_base_on_which_nn_was_trained', TFReader(input_filename=args.input_tf_dataset)), 21 | #('read_nn_predictions', PickleReader(input_filename=os.path.join(args.output, 'pickles'))), 22 | #('visualizer_grid_and_data_base_on_which_nn_was_trained', 23 | #Visualizer2D(output_filename=os.path.join(args.output,'plots', 24 | # os.path.basename(args.input_tf_dataset).split('.')[0] + '-trainer' + ".html"))) 25 | #('betti_calculator', BettiCalc(args.output)) 26 | ] 27 | 28 | return Pipeline(steps=pipeline_steps) -------------------------------------------------------------------------------- /pipelines/train_2_excel_2D.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from sklearn.pipeline import Pipeline 3 | from estimators.nn_trainer import NeuralNetworkTrainer 4 | import os 5 | from transformers.pickle_tensors_values_saver import PickleTensorValuesSaver 6 | from transformers.visualizer_2d import Visualizer2D 7 | from transformers.visualizer_2d_loops import Visualizer2DLoops 8 | from transformers.tf_reader import TFReader 9 | from transformers.excel_tensor_values_saver import ExcelTensorValuesSaver 10 | from transformers.tensors_values_plotter import TensorsValuesPlotter 11 | from transformers.constant_feeder import ConstantFeeder 12 | from transformers.excel_to_betti_feeder import ExcelToBettiFeeder 13 | from estimators.nn_simple import NeuralNetworkSimple 14 | from transformers.grid_generator import GridGenerator 15 | 16 | def load(args: Namespace) -> Pipeline: 17 | fit_params = vars(args) 18 | 19 | # directory to save training results and various calculations 20 | log_path = args.output 21 | 22 | # neural network to be trained and evaluated 23 | nn = NeuralNetworkSimple(model_path=os.path.join(log_path, 'snapshots'), num_input = 2, **fit_params) 24 | 25 | pipeline_steps = [ 26 | 27 | # dummy dictionary to which various transformers will append their contents 28 | ('contents', ConstantFeeder({})), 29 | 30 | # generate training dataset used to train neural network 31 | ('training_dataset_circles', TFReader(input_filename=fit_params.get('input_tf_dataset'), 32 | content_name='training_dataset')), 33 | 34 | # when verbose nn will run on validation dataset to see how decision boundaries change 35 | ('validation_dataset_grid', GridGenerator(res=0.05, content_name='validation_dataset')), 36 | 37 | # once nn is finished training it will run on test set, this is the 38 | # set used for betti numbers calculation 39 | ('test_dataset_grid', GridGenerator(res=0.025, content_name='test_dataset')), 40 | # ('test_dataset_grid', TFReader(input_filename= os.path.join(os.path.dirname(fit_params.get('input_tf_dataset')), 'inference_' + os.path.basename(fit_params.get('input_tf_dataset'))), 41 | # content_name='test_dataset', mode='test')), 42 | # wrapper to: 43 | # -train nn on content['training_dataset'] 44 | # -when verbose display results on content['validation_dataset'] 45 | # -when finished fitting run on test on content['test_dataset'] 46 | ('nn_trainer', NeuralNetworkTrainer(nn=nn, 47 | content_name='nn_predictions_on_test_dataset', 48 | args=args, 49 | **fit_params)), 50 | 51 | # # dump tensor values to pickle file 52 | # ('save_content_to_pickle', PickleTensorValuesSaver(output_filename=os.path.join(log_path, 53 | # 'pickles', 54 | # 'nn_after_fit.pkl'), 55 | # content_name='nn_predictions_on_test_dataset')), 56 | # dump tensor values to excel file 57 | ('save_content_to_excel', ExcelTensorValuesSaver(output_filename=os.path.join(log_path, 'excels'), 58 | content_name='nn_predictions_on_test_dataset')), 59 | # 60 | # plot tensor values 61 | ('layer_output_plotter', TensorsValuesPlotter(content_name='nn_predictions_on_test_dataset', 62 | output_filename=os.path.join(log_path, 'plots', 'tensors_values'), 63 | enable=args.plot 64 | )), 65 | 66 | # # visualize training and test set 67 | ('training_and_test_dataset_visualizer', Visualizer2D(output_filename=os.path.join(log_path, 'plots', 68 | os.path.basename(args.input_tf_dataset).split('.')[0] + '-trainer' + ".html"), 69 | enable=args.plot 70 | )) 71 | ] 72 | 73 | return Pipeline(steps=pipeline_steps) -------------------------------------------------------------------------------- /pipelines/train_2_excel_3D.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from sklearn.pipeline import Pipeline 3 | from estimators.nn_trainer import NeuralNetworkTrainer 4 | import os 5 | from transformers.visualizer_2d import Visualizer2D 6 | from transformers.tf_reader import TFReader 7 | from transformers.excel_tensor_values_saver import ExcelTensorValuesSaver 8 | from transformers.tensors_values_plotter import TensorsValuesPlotter 9 | from transformers.constant_feeder import ConstantFeeder 10 | from estimators.nn_simple import NeuralNetworkSimple 11 | from transformers.grid_generator import GridGenerator 12 | 13 | 14 | def load(args: Namespace) -> Pipeline: 15 | fit_params = vars(args) 16 | 17 | # directory to save training results and various calculations 18 | log_path = args.output 19 | 20 | # neural network to be trained and evaluated 21 | nn = NeuralNetworkSimple(model_path=os.path.join(log_path, 'snapshots'), num_input = 3, **fit_params) 22 | 23 | pipeline_steps = [ 24 | 25 | # dummy dictionary to which various transformers will append their contents 26 | ('contents', ConstantFeeder({})), 27 | 28 | # generate training dataset used to train neural network 29 | ('training_dataset_circles', TFReader(input_filename=fit_params.get('input_tf_dataset'), 30 | content_name='training_dataset')), 31 | 32 | # when verbose nn will run on validation dataset to see how decision boundaries change 33 | ('validation_dataset_grid', GridGenerator(res=0.085, grid_min=-1.01, grid_max=1.01, content_name='validation_dataset', mode='3D')), 34 | 35 | # once nn is finished training it will run on test set, this is the 36 | # set used for betti numbers calculation 37 | # ('test_dataset_grid', GridGenerator(res=0.2, grid_min=-2.01, grid_max=2.01, content_name='test_dataset', mode='3D')), 38 | ('test_dataset_grid', TFReader(input_filename=fit_params.get('input_tf_dataset'), 39 | content_name='test_dataset', mode = 'test')), 40 | 41 | # wrapper to: 42 | # -train nn on content['training_dataset'] 43 | # -when verbose display results on content['validation_dataset'] 44 | # -when finished fitting run on test on content['test_dataset'] 45 | ('nn_trainer', NeuralNetworkTrainer(nn=nn, 46 | content_name='nn_predictions_on_test_dataset', 47 | args=args, 48 | **fit_params)), 49 | 50 | # dump tensor values to pickle file 51 | # ('save_content_to_pickle', PickleTensorValuesSaver(output_filename=os.path.join(log_path, 52 | # 'pickles', 53 | # 'nn_after_fit.pkl'), 54 | # content_name='nn_predictions_on_test_dataset')), 55 | # dump tensor values to excel file 56 | ('save_content_to_excel', ExcelTensorValuesSaver(output_filename=os.path.join(log_path, 'excels'), 57 | content_name='nn_predictions_on_test_dataset')), 58 | 59 | # plot tensor values 60 | ('layer_output_plotter', TensorsValuesPlotter(content_name='nn_predictions_on_test_dataset', 61 | output_filename=os.path.join(log_path, 'plots', 'tensors_values'), 62 | enable=args.plot)), 63 | 64 | # visualize training and test set 65 | ('training_and_test_dataset_visualizer', 66 | Visualizer2D(output_filename=os.path.join(log_path, 'plots', 67 | os.path.basename(args.input_tf_dataset).split('.')[0] + '-trainer' + ".html"), mode='3D', enable=args.plot)) 68 | ] 69 | 70 | return Pipeline(steps=pipeline_steps) -------------------------------------------------------------------------------- /pipelines/visualize_excel.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from sklearn.pipeline import Pipeline 3 | from transformers.excel_to_plot_feeder import ExcelToPlotFeeder 4 | 5 | def load(args: Namespace) -> Pipeline: 6 | log_path = args.output 7 | fit_params = vars(args) 8 | 9 | pipeline_steps = [ ('excel_to_betti_feeder', ExcelToPlotFeeder(model_path=args.pretrained, excel_names=[ 10 | # 'cat1_LeakyRelu-0.csv', 11 | # 'cat1_LeakyRelu_1-0.csv', 12 | # 'cat1_LeakyRelu_2-0.csv', 13 | 'cat1_LeakyRelu_3-0.csv', 14 | # 'cat1_LeakyRelu_4-0.csv', 15 | # 'cat1_LeakyRelu_5-0.csv', 16 | # 'cat1_LeakyRelu_6-0.csv', 17 | # 'cat1_Relu-0.csv', 18 | # 'cat1_Relu_1-0.csv', 19 | # 'cat1_Relu_2-0.csv', 20 | # 'cat2_Relu_2-0.csv', 21 | # 'cat1_Relu_3-0.csv', 22 | # 'cat1_Relu_4-0.csv', 23 | # 'cat1_Relu_5-0.csv', 24 | # 'cat1_Relu_6-0.csv', 25 | # 'cat1_LeakyRelu_7-0.csv', 26 | # 'cat1_LeakyRelu_8-0.csv', 27 | # 'cat1_LeakyRelu_9-0.csv' 28 | ])) 29 | ] 30 | 31 | return Pipeline(steps=pipeline_steps) -------------------------------------------------------------------------------- /pipelines/visualize_nn_prediction.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from sklearn.pipeline import Pipeline 3 | from transformers.grid_generator import GridGenerator 4 | from transformers.tf_saver import TFSaver 5 | from estimators.nn import NeuralNetwork 6 | import os 7 | from transformers.pickle_tensors_values_saver import PickleTensorValuesSaver 8 | from transformers.visualizer_2d import Visualizer2D 9 | from transformers.pickle_reader import PickleReader 10 | from transformers.tf_reader import TFReader 11 | 12 | def load(args: Namespace) -> Pipeline: 13 | pipeline_steps = [ 14 | ('generate_grid', GridGenerator(res=0.05)), 15 | ('nn', NeuralNetwork(is_training=False, architecture=args.model, model_path=args.pretrained_model)), 16 | ('save_nn_predictions', PickleTensorValuesSaver(output_filename=os.path.join(args.output_pickle_folder, args.output_pickle_predictions))), 17 | ('read_data_base_on_which_nn_was_trained', TFReader(input_filename=os.path.join(args.input_tf_folder, args.input_tf_dataset))), 18 | ('read_nn_predictions', PickleReader(input_filename=os.path.join(args.output_pickle_folder, args.output_pickle_predictions))), 19 | ('visualizer_grid_and_data_base_on_which_nn_was_trained', Visualizer2D(output_filename=os.path.join(args.output_plots_folder, args.input_tf_dataset + ".html"))) 20 | ] 21 | 22 | return Pipeline(steps=pipeline_steps) -------------------------------------------------------------------------------- /pipelines/visualize_nn_simple_prediction.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from sklearn.pipeline import Pipeline 3 | from transformers.grid_generator import GridGenerator 4 | from transformers.tf_saver import TFSaver 5 | from estimators.nn_simple import NeuralNetworkSimple 6 | import os 7 | from transformers.pickle_tensors_values_saver import PickleTensorValuesSaver 8 | from transformers.visualizer_2d import Visualizer2D 9 | from transformers.pickle_reader import PickleReader 10 | from transformers.tf_reader import TFReader 11 | 12 | 13 | def load(args: Namespace) -> Pipeline: 14 | fit_params = vars(args) 15 | pipeline_steps = [ 16 | ('generate_grid', GridGenerator(res=0.05)), 17 | ('nn', NeuralNetworkSimple(model_path=args.pretrained_model, **fit_params)), 18 | ('save_nn_predictions', PickleTensorValuesSaver(output_filename=os.path.join(args.output_pickle_folder, args.model, args.output_pickle_predictions))), 19 | ('read_data_base_on_which_nn_was_trained', TFReader(input_filename=os.path.join(args.input_tf_folder, args.input_tf_dataset))), 20 | ('read_nn_predictions', PickleReader(input_filename=os.path.join(args.output_pickle_folder, args.model, args.output_pickle_predictions))), 21 | ('visualizer_grid_and_data_base_on_which_nn_was_trained', Visualizer2D(output_filename=os.path.join(args.output_plots_folder, args.model, args.input_tf_dataset + ".html"))) 22 | ] 23 | 24 | return Pipeline(steps=pipeline_steps) -------------------------------------------------------------------------------- /pipelines/visualize_tfrecords_dataset.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | from sklearn.pipeline import Pipeline 3 | from transformers.tf_reader import TFReader 4 | from transformers.visualizer_2d import Visualizer2D 5 | from transformers.constant_feeder import ConstantFeeder 6 | 7 | import os 8 | 9 | def load(args: Namespace) -> Pipeline: 10 | pipeline_steps = [ 11 | ('contents', ConstantFeeder({})), 12 | ('training_dataset_circles', TFReader(input_filename=args.input_tf_dataset, 13 | content_name='training_dataset')), 14 | ('visualizer', Visualizer2D(output_filename=os.path.join(args.output_plots_folder, "plot.html"))), 15 | ] 16 | 17 | return Pipeline(steps=pipeline_steps) 18 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Framework description and usage 2 | 3 | **The code was developed with the help of Andrey Zhitnikov and is under the responsibility of Gregory Naisat. Please send all questions, inqueries and bug reports to me at: gregn@uchicago.edu** 4 | 5 | ## ![#f03c15](https://placehold.it/15/f03c15/000000?text=+) ![#1589F0](https://placehold.it/15/1589F0/000000?text=+) ![#f03c15](https://placehold.it/15/f03c15/000000?text=+) Anonymized version ![#f03c15](https://placehold.it/15/f03c15/000000?text=+) ![#1589F0](https://placehold.it/15/1589F0/000000?text=+) ![#f03c15](https://placehold.it/15/f03c15/000000?text=+) 6 | 7 | -------------------------------------------- 8 | 9 | ![topology change](https://github.com/topnn/topnn_framework/blob/master/pics/topology_change.png) 10 | 11 | 12 | # Requirements 13 | Python 3.5, 14 | Tensorflow, 15 | Julia 0.64, 16 | [Eirene](https://github.com/Eetion/Eirene.jl) topological data analysis package 17 | 18 | (once Julia and Eirene are installed, update `./julia_include/include_eirene_run.jl` with the path to local Eirene installation, and update `./transformers/excel_to_betti_feeder_parallel.py` with the path to Julia executable) 19 | 20 | # Training a model 21 | * Use `compute.py` running `train_2_excel_2D` pipeline for two dimensional data sets and `train_2_excel_3D` for three dimensional data sets. 22 | * Specify which data set to use (D-I `circles_type_8`, D-II `rings_9`, D-III `spheres_9`) and what architecture to train (i.e. size of the network and the activation type e.g. `10u_by_15`, `Relu`) 23 | * Set the number of training epochs and learning rate 24 | * Set the overall number of trials and frequency of log reports 25 | 26 | For each successful training attempt (perfect classification of the data set) log file (`/data///good_results.txt`) is updated and the resulting model and output of each layer are saved in a new folder whose name is the current date and time (`/data////`) and whose location reflects the selected data set and the selected architecture. Betti numbers calculation pipeline will use `good_results.txt` log file to accesse stored models to run betti numbers calculation on their outputs. 27 | 28 | ## Example: 29 | Train on D-II data set. Using network of size 10 (layers) by 25 (neurons each), and `LeakyRelu` activation. Set 0.2 learning rate and run 12000 training epochs, with 80 trials, report progress every 1000 epochs: 30 | 31 | `computer.py --pipeline_name train_2_excel_3D --output ./data/rings_9/ --input-tf-dataset ./data/rings_9/rings_9.tfrecords --model 10_by_25 --activation_type LeakyRelu --trials 80 --learning_rate 0.02 --training_epochs 12000 --summary_freq 1000` 32 | 33 | # Calculate Betti numbers 34 | * Once a few well trained neural networks have been accumulated use `compute.py` running `texcel_2_betti` pipeline to compute Betti numbers 35 | * Calculated Betti numbers for successfully trained neural networks with given architecture and activation type. Split the calculation in parallel on 10 cores. Each core limited to 10 Gb memory. 36 | 37 | * Run `computer.py` with appropriately set parameters: number of neighbors for nearest neighbor graph construction ( the scale at wich to build Vietoris-Rips complex is fixed in Eirene call at `./julia_include/julia_aux2`. 38 | 39 | ## Example: 40 | Calculate Betti numbers for networks trained on D-II dataset of size 10 (layers) by 25 (neurons each) with Leaky Relu activation. The calculations proceed on a subsample of the data set, which is one fouth of the original data set size. Compute Betti numbers zero and one for class A (`cat2`): 41 | 42 | `computer.py --pipeline_name excel_2_betti --output ./data/rings_9/ --input-tf-dataset ./data/rings_9/rings_9.tfrecords --model 10_by_25 --activation_type LeakyRelu --trials 80 --cat2 --divisor 4 --neighbors 35 --betti_max 1 --read_excel_from LeakyRelu` 43 | 44 | # Visualize Data set 45 | * Run `computer.py` with appropriate pipeline (currently there one basic pipeline to run all the visualisation `visualize_tfrecords_dataset.py`, the pipeline draw 2d projection of the dataset. Example of a call: 46 | ``` 47 | computer.py --pipeline 48 | visualize_tfrecords_dataset 49 | --input-tf-dataset 50 | ./data/spheres_9/spheres_9.tfrecords 51 | --output-plots-folder 52 | ./data/plots/ 53 | ``` 54 | ) 55 | * Check generated `.html` file with `plotly` data set plot. 56 | 57 | 58 | ### Trials (updated July 21, 2020): 59 | 60 | Simulation results are gradually uploaded into [google drive](https://drive.google.com/drive/folders/1NhDn5r6Jm4c5Q71ciaLXAEpCZ1-tQfeu?usp=sharing) 61 | -------------------------------------------------------------------------------- /simulators/simulator.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class Simulator(ABC): 5 | """ 6 | Abstract class to capture the concept of a "simulator". 7 | """ 8 | 9 | def __init__(self, **kwargs): 10 | self.pipeline = kwargs.get('pipeline') 11 | # validate(self.pipeline) 12 | 13 | @abstractmethod 14 | def run(self): 15 | raise NotImplementedError 16 | -------------------------------------------------------------------------------- /transformers/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from sklearn.base import TransformerMixin, BaseEstimator 4 | 5 | 6 | class Transformer(ABC, BaseEstimator, TransformerMixin): 7 | @abstractmethod 8 | def transform(self, x): 9 | raise NotImplementedError 10 | 11 | def fit(self, x, y=None, **fit_params): 12 | return self -------------------------------------------------------------------------------- /transformers/betti_calc.py: -------------------------------------------------------------------------------- 1 | from transformers import Transformer 2 | from os import listdir 3 | import os 4 | from os.path import isfile, join 5 | import julia 6 | import matplotlib 7 | import matplotlib.pyplot as plt 8 | from mpl_toolkits.mplot3d import Axes3D 9 | from sklearn.neighbors import NearestNeighbors 10 | import numpy as np 11 | import csv 12 | import pickle 13 | import time 14 | from sklearn.manifold import Isomap 15 | import sklearn.utils.graph_shortest_path as gp 16 | import networkx as nx 17 | 18 | def print_time(str, now): 19 | print(str, time.time() - now) 20 | now = time.time() 21 | return now 22 | 23 | class BettiCalc(Transformer): 24 | def __init__(self, model_path, julia, divisor, neighbors): 25 | self.betti_dir = os.path.join(model_path, 'betti') 26 | self.pickle_file = os.path.join(model_path, 'betti', "workspace_betti.pkl") 27 | if not os.path.exists(self.betti_dir): 28 | os.makedirs(self.betti_dir) 29 | 30 | self.fn = julia.include('./julia_include/julia_aux.jl') 31 | self.fn2 = julia.include('./julia_include/julia_aux2.jl') 32 | 33 | self.verbose = True 34 | self.number_of_neighbors = int(neighbors) 35 | self.divisor = int(divisor) 36 | 37 | def transform(self, content=None): 38 | 39 | csv_file = content 40 | betti_file = os.path.join(self.betti_dir, 'betti-' + os.path.basename(csv_file)) 41 | 42 | def read_csv(csv_file, divisor): 43 | with open(csv_file) as file: 44 | readCSV = csv.reader(file, delimiter=',') 45 | data = [] 46 | for row in readCSV: 47 | data.append([float(item) for item in row[::divisor]]) 48 | 49 | data = np.unique(np.transpose(np.array(data)), axis=0) 50 | return data 51 | 52 | def find_neighbors(X, n_neighbors): 53 | """ find n_neighbors nearest neighbors to each point in point cloud X.""" 54 | 55 | now = time.time() 56 | now = print_time('NearestNeighbors', now) 57 | nbrs = NearestNeighbors(n_neighbors=n_neighbors, 58 | algorithm='ball_tree').fit(X) 59 | now = print_time('NearestNeighbors - done', now) 60 | distances, indices = nbrs.kneighbors(X) 61 | now = print_time('kneighbors - done',now) 62 | A = nbrs.kneighbors_graph(X).toarray() 63 | now = print_time('kneighbors_graph - done', now) 64 | return indices, A 65 | 66 | def plot_data(data, fig): 67 | """plot first 3d of data""" 68 | 69 | if data.shape[1] > 3: 70 | print("Warning: data dimension is larger than 3, dim is %s" % (data.shape[1])) 71 | 72 | ax = fig.add_subplot(111, projection='3d') 73 | # ax.scatter(data[:, 0], data[:, 1], data[:, 2], marker='.', s=0.5) 74 | return ax 75 | 76 | def plot_neighbors(data, indices, ax): 77 | """connect neighboring nodes in data by line """ 78 | 79 | indices = [item.tolist() for item in indices] 80 | counter_removed = 0 81 | 82 | for j in range(data.shape[0]): 83 | def remove_repetitions(indices, j, counter_removed): 84 | for item in indices[j]: 85 | try: 86 | indices[item].remove(j) 87 | counter_removed = counter_removed + 1 88 | except ValueError: 89 | pass 90 | # print("removed", counter_removed) 91 | return counter_removed, indices 92 | 93 | counter_removed, indices = remove_repetitions(indices, j, counter_removed) 94 | origin = data[j, :] 95 | targets = [data[i, :] for i in indices[j]] 96 | 97 | for target in targets: 98 | x = [origin[0], target[0]] 99 | y = [origin[1], target[1]] 100 | z = [origin[2], target[2]] 101 | ax.plot(x, y, z, 'ro-', linewidth='1', markersize=1) 102 | 103 | 104 | def creat_graph_and_calc_dist_verb(A): 105 | """ 106 | creates graph from ajacency matrix and calculates shortest path 107 | """ 108 | return gp.graph_shortest_path(A, method='auto', directed=False) 109 | 110 | 111 | def distance_mat_verb(q, A, cut_off = 10): 112 | """ Use graph A to build graph distance matrix""" 113 | 114 | # maximum distance between node is 1000 115 | 116 | 117 | mat = np.ones(shape=A.shape) * 1000 118 | for node in range(len(q)): 119 | for neighbor in range(len(q)): 120 | 121 | # distance of node to itself is zero 122 | if node == neighbor: 123 | mat[node, neighbor] = 0 124 | continue 125 | 126 | # nodes that cannot be reached (cut_off distance away) are set to zero which 127 | # this problem is fixed below 128 | if q[node, neighbor] > cut_off or q[node, neighbor] == 0: 129 | mat[node, neighbor] = 1000 130 | else: 131 | mat[node, neighbor] = q[node, neighbor] 132 | return mat 133 | 134 | betti_file = os.path.join(self.betti_dir, 'betti-' + os.path.basename(csv_file)) 135 | betti_curve_file = os.path.join(self.betti_dir, 'betti-curve-' + os.path.basename(csv_file)) 136 | 137 | 138 | print("running calculation on neighbors filtration") 139 | data = read_csv(csv_file, self.divisor) 140 | 141 | now = time.time() 142 | now = print_time('Start: creat neighborhood graph and calculate graph distance (ISOMAP)', now) 143 | indices, A = find_neighbors(data, self.number_of_neighbors) 144 | mat = creat_graph_and_calc_dist_verb(A) 145 | mat = distance_mat_verb(mat, A) 146 | now = print_time('Done: creat neighborhood graph and calculate graph distance (ISOMAP)', now) 147 | 148 | if self.verbose: 149 | pass 150 | 151 | neihbors_betti_file = os.path.join(self.betti_dir, 'betti-nbr-' + str(self.divisor) + '-' + str(self.number_of_neighbors) + '-' + os.path.basename(csv_file)) 152 | neihbors_curve_file = os.path.join(self.betti_dir, 'betti-nbr-curve-' + str(self.divisor) + '-' + str(self.number_of_neighbors) + '-' + os.path.basename(csv_file)) 153 | file_repres_file = os.path.join(self.betti_dir, 'betti-nbr-repres-' + str(self.divisor) + '-' + str(self.number_of_neighbors) + '-' + os.path.basename(csv_file).split('.')[0] + '.txt') 154 | mat_file = os.path.join(self.betti_dir, 'mat' + str(self.divisor) + '-' + str(self.number_of_neighbors) + '-' + os.path.basename(csv_file).split('.')[0]) 155 | 156 | maxdim = 2 157 | np.save(mat_file, mat) 158 | print("fn2 call :", "neihbors_betti_file", neihbors_betti_file, "neihbors_curve_file", neihbors_curve_file, "file_repres_file", file_repres_file, 159 | "maxdim", maxdim, "mat_file", mat_file) 160 | 161 | print('"'+ neihbors_betti_file + '",' + '"' +neihbors_curve_file+ '",' + '"' +file_repres_file + '",'+ str(maxdim) + ',' '"'+ mat_file + '.npy"' ) 162 | 163 | self.fn2(neihbors_betti_file, neihbors_curve_file, file_repres_file, maxdim, mat_file + ".npy") 164 | 165 | return 'done' # uncomment to skip loops draw 166 | 167 | file_repres_file = file_repres_file + '_1' 168 | with open(file_repres_file) as f: 169 | lines = f.readlines() 170 | 171 | def draw_repres(data, origin, target, ax, k): 172 | """ Add connection from origin to targets""" 173 | 174 | colors = ['y', 'g', 'k', 'm', 'c', 'b'] 175 | for t in range(len(origin)): 176 | x = [data[origin[t] - 1, 0], data[target[t] - 1, 0]] 177 | y = [data[origin[t] - 1, 1], data[target[t] - 1, 1]] 178 | z = [data[origin[t] - 1, 2], data[target[t] - 1, 2]] 179 | ax.plot(x, y, z, colors[k % 5] + 'o-', linewidth='3', markersize=1) 180 | 181 | def get_origin_and_targets(line, divisor): 182 | t = line.split(" ") 183 | q = [item.replace("'", "").replace("[", "").replace("]", "").replace(";", "").replace("\n", "") for item in 184 | t] 185 | points = [(int(item) - 1)* divisor for item in q] 186 | n = int(len(points) / 2) 187 | origin = points[:n] 188 | target = points[-n:] 189 | return origin, target 190 | 191 | loops = {} 192 | for k, line in enumerate(lines): 193 | origin, target = get_origin_and_targets(line, self.divisor) 194 | loops.update({k: (origin, target)}) 195 | 196 | plt.show() 197 | 198 | with open(self.pickle_file, 'wb') as handle: 199 | pickle.dump((data, mat), handle, protocol=pickle.HIGHEST_PROTOCOL) 200 | 201 | return loops 202 | -------------------------------------------------------------------------------- /transformers/betti_calc_parallel.py: -------------------------------------------------------------------------------- 1 | from transformers import Transformer 2 | from os import listdir 3 | import os 4 | from os.path import isfile, join 5 | import matplotlib 6 | import matplotlib.pyplot as plt 7 | from mpl_toolkits.mplot3d import Axes3D 8 | from sklearn.neighbors import NearestNeighbors 9 | import numpy as np 10 | import csv 11 | import pickle 12 | import time 13 | from sklearn.manifold import Isomap 14 | import sklearn.utils.graph_shortest_path as gp 15 | import networkx as nx 16 | 17 | def print_time(str, now): 18 | print(str, time.time() - now) 19 | now = time.time() 20 | return now 21 | 22 | class BettiCalcParallel(Transformer): 23 | def __init__(self, model_path, julia, divisor, neighbors, max_dim): 24 | self.project_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 25 | self.include_eirene_run = os.path.join(self.project_path, 'julia_include') 26 | self.betti_dir = os.path.join(model_path, 'betti') 27 | self.pickle_file = os.path.join(model_path, 'betti', "workspace_betti.pkl") 28 | if not os.path.exists(self.betti_dir): 29 | os.makedirs(self.betti_dir) 30 | 31 | self.verbose = True 32 | self.number_of_neighbors = int(neighbors) 33 | self.divisor = int(divisor) 34 | self.max_dim = int(max_dim) 35 | 36 | def transform(self, content=None): 37 | 38 | csv_file = content 39 | betti_file = os.path.join(self.betti_dir, 'betti-' + os.path.basename(csv_file)) 40 | 41 | def read_csv(csv_file, divisor): 42 | with open(csv_file) as file: 43 | readCSV = csv.reader(file, delimiter=',') 44 | data = [] 45 | for row in readCSV: 46 | data.append([float(item) for item in row[::divisor]]) 47 | 48 | data = np.unique(np.transpose(np.array(data)), axis=0) 49 | return data 50 | 51 | def find_neighbors(X, n_neighbors): 52 | """ find n_neighbors nearest neighbors to each point in point cloud X.""" 53 | 54 | now = time.time() 55 | now = print_time('NearestNeighbors', now) 56 | 57 | if X.shape[0] < n_neighbors + 1: 58 | n_neighbors = X.shape[0] 59 | 60 | nbrs = NearestNeighbors(n_neighbors=n_neighbors, 61 | algorithm='ball_tree').fit(X) 62 | 63 | now = print_time('NearestNeighbors - done', now) 64 | distances, indices = nbrs.kneighbors(X) 65 | now = print_time('kneighbors - done',now) 66 | A = nbrs.kneighbors_graph(X).toarray() 67 | now = print_time('kneighbors_graph - done', now) 68 | return indices, A 69 | 70 | def creat_graph_and_calc_dist_verb(A): 71 | """ 72 | creates graph from ajacency matrix and calculates shortest path 73 | """ 74 | return gp.graph_shortest_path(A, method='auto', directed=False) 75 | 76 | 77 | def distance_mat_verb(q, A, cut_off = 10): 78 | """ Use graph A to build graph distance matrix""" 79 | # maximum distance between node is 1000 80 | mat = np.ones(shape=A.shape) * 1000 81 | for node in range(len(q)): 82 | for neighbor in range(len(q)): 83 | 84 | # distance of node to itself is zero 85 | if node == neighbor: 86 | mat[node, neighbor] = 0 87 | continue 88 | 89 | # nodes that cannot be reached (cut_off distance away) are set to zero which 90 | # this problem is fixed below 91 | if q[node, neighbor] > cut_off or q[node, neighbor] == 0: 92 | mat[node, neighbor] = 1000 93 | else: 94 | mat[node, neighbor] = q[node, neighbor] 95 | return mat 96 | 97 | betti_file = os.path.join(self.betti_dir, 'betti-' + os.path.basename(csv_file)) 98 | betti_curve_file = os.path.join(self.betti_dir, 'betti-curve-' + os.path.basename(csv_file)) 99 | 100 | print("running calculation on neighbors filtration") 101 | data = read_csv(csv_file, self.divisor) 102 | 103 | now = time.time() 104 | now = print_time('Start: creat neighborhood graph and calculate graph distance (ISOMAP)', now) 105 | indices, A = find_neighbors(data, self.number_of_neighbors) 106 | mat = creat_graph_and_calc_dist_verb(A) 107 | mat = distance_mat_verb(mat, A) 108 | now = print_time('Done: creat neighborhood graph and calculate graph distance (ISOMAP)', now) 109 | 110 | if self.verbose: 111 | pass 112 | 113 | neihbors_betti_file = os.path.join(self.betti_dir, 'betti-nbr-' + str(self.divisor) + '-' + str(self.number_of_neighbors) + '-' + os.path.basename(csv_file)) 114 | neihbors_curve_file = os.path.join(self.betti_dir, 'betti-nbr-curve-' + str(self.divisor) + '-' + str(self.number_of_neighbors) + '-' + os.path.basename(csv_file)) 115 | file_repres_file = os.path.join(self.betti_dir, 'betti-nbr-repres-' + str(self.divisor) + '-' + str(self.number_of_neighbors) + '-' + os.path.basename(csv_file).split('.')[0] + '.txt') 116 | mat_file = os.path.join(self.betti_dir, 'mat' + str(self.divisor) + '-' + str(self.number_of_neighbors) + '-' + os.path.basename(csv_file).split('.')[0]) 117 | 118 | maxdim = self.max_dim 119 | np.save(mat_file, mat) 120 | print("fn2 call :", "neihbors_betti_file", neihbors_betti_file, "neihbors_curve_file", neihbors_curve_file, "file_repres_file", file_repres_file, 121 | "maxdim", maxdim, "mat_file", mat_file) 122 | 123 | call = ("julia " + os.path.join(self.include_eirene_run, "include_eirene_run.jl") + ' "' + neihbors_betti_file + '" ' + '"' +neihbors_curve_file+ '" ' + '"' +file_repres_file + '" '+ str(maxdim) + ' ' '"'+ mat_file + '.npy"' ) 124 | 125 | return (call, mat_file + ".npy") -------------------------------------------------------------------------------- /transformers/circles_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/transformers/circles_9.png -------------------------------------------------------------------------------- /transformers/constant_feeder.py: -------------------------------------------------------------------------------- 1 | from transformers import Transformer 2 | from os import listdir 3 | import os 4 | from os.path import isfile, join 5 | import julia 6 | 7 | class ConstantFeeder(Transformer): 8 | def __init__(self, constant): 9 | self.constant = constant 10 | 11 | def transform(self, content=None): 12 | return self.constant -------------------------------------------------------------------------------- /transformers/excel_reader.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | import numpy as np 3 | from transformers import Transformer 4 | import csv 5 | import os 6 | import glob 7 | import re 8 | import matplotlib.pyplot as plt 9 | from mpl_toolkits.mplot3d import Axes3D 10 | import cv2 11 | 12 | class ExcelReader(Transformer): 13 | def __init__(self, input_filename="saved_dataset.csv"): 14 | self._input_filename = input_filename 15 | self.step2load = '1000' 16 | 17 | def transform(self, content=None): 18 | if os.path.isdir(self._input_filename): 19 | print("\nExcel Reader: given directory: " + self._input_filename) 20 | files = sorted(glob.glob(self._input_filename + '/*.csv')) 21 | print("\nExcel Reader found: ") 22 | print(files, sep='\n') 23 | else: 24 | files = [self._input_filename] 25 | 26 | 27 | dictforplot = {} 28 | names =['denseMatMul', 'denseBiasAdd', 'LeakyRelu', 29 | 'dense_1MatMul' , 'dense_1BiasAdd', 'LeakyRelu_1', 30 | 'dense_2MatMul', 'dense_2BiasAdd', 'LeakyRelu_2', 31 | 'dense_3MatMul', 'dense_3BiasAdd', 'LeakyRelu_3', 32 | 'dense_4MatMul', 'dense_4BiasAdd', 'LeakyRelu_4', 33 | 'dense_5MatMul', 'dense_5BiasAdd', 'LeakyRelu_5', 34 | 'dense_6MatMul', 'dense_6BiasAdd', 'LeakyRelu_6', 35 | 'dense_7MatMul', 'dense_7BiasAdd', 'LeakyRelu_7', 36 | 'dense_8MatMul', 'dense_8BiasAdd', 'LeakyRelu_8' ] 37 | cap = cv2.VideoCapture(0) # video source: webcam 38 | fourcc = cv2.VideoWriter.fourcc(*'XVID') # record format xvid 39 | out = cv2.VideoWriter('output.avi', fourcc, 10, (640, 480)) 40 | regex = re.compile('(\S+)_(\S+\d)_(\S+)_*(\d*)-(\d)\.csv') 41 | for file in files: 42 | f = os.path.basename(file) 43 | result = regex.match(f) 44 | print(result.group(0)) 45 | print(result.group(1)) 46 | print(result.group(2)) 47 | print(result.group(3)) 48 | print(result.group(4)) 49 | print(result.group(5)) 50 | if not (result.group(2) == 'cat2' and self.step2load in result.group(1)): 51 | continue 52 | with open(file) as csv_file: 53 | rows = [] 54 | csv_reader = csv.reader(csv_file, delimiter=',') 55 | for row in csv_reader: 56 | print(row) 57 | rows.append(row) 58 | rows= np.asarray(rows,dtype=float) 59 | dictforplot.update({result.group(3) : rows}) 60 | 61 | for name in names: 62 | rows = dictforplot[name] 63 | fig = plt.figure() 64 | ax = fig.gca(projection='3d') 65 | ax.scatter(rows[0,:], rows[1,:], rows[2,:], s=0.1) 66 | ax.set_axis_off() 67 | ax.set_xlabel('X') 68 | ax.set_ylabel('Y') 69 | ax.set_zlabel('Z') 70 | azimuths = [0, 30 , 60, 90, 120, 150, 180, 210, 240, 270, 300, 330] 71 | elevations = [0, 30, 60, 90, 120, 150, 180, 150, 120, 90, 60, 30] 72 | for i, azimuth in enumerate(azimuths): 73 | el = elevations[2] 74 | ax.view_init(elev=el, azim=azimuth) 75 | plt.savefig('last.png') 76 | img = cv2.imread("last.png") 77 | cv2.putText(img=img,text= 'cat-1: ' + name , org=(0, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(200, 0, 0), thickness=1, lineType=cv2.LINE_AA) 78 | out.write(img) 79 | pass 80 | out.release() 81 | 82 | 83 | 84 | def predict(self, content=None): 85 | """saver done't support predict instead writes content on the disk""" 86 | 87 | return self.transform(content) 88 | -------------------------------------------------------------------------------- /transformers/excel_tensor_values_saver.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | import numpy as np 3 | from transformers import Transformer 4 | import csv 5 | import os 6 | 7 | class ExcelTensorValuesSaver(Transformer): 8 | def __init__(self, content_name, output_filename="saved_dataset.csv", ): 9 | self._output_filename = output_filename 10 | self.content_name = content_name 11 | 12 | if not os.path.exists(output_filename): 13 | os.makedirs(output_filename) 14 | 15 | def transform(self, content=None): 16 | 17 | if type(content) == type({}): 18 | # create directory if does not exist 19 | if not os.path.exists(os.path.join(os.path.dirname(self._output_filename))): 20 | os.makedirs(os.path.dirname(self._output_filename)) 21 | 22 | tensor_names = content[self.content_name]['labels_names'] 23 | tensor_names = [ name.replace('/','') for name in tensor_names] 24 | predictions = content[self.content_name]['labels'] 25 | samples = content[self.content_name]['samples'] 26 | samples_n = len(samples) 27 | 28 | # first tensor is ArgMax:0 it is used to infer category 29 | for j in range(1, len(tensor_names)): 30 | 31 | name = tensor_names[j] 32 | tensor_dim = predictions[0][j].shape[1] 33 | tensor_values_all = np.zeros(shape=(samples_n, tensor_dim)) 34 | tensor_values_cat1 = np.zeros(shape=(samples_n, tensor_dim)) 35 | tensor_values_cat2 = np.zeros(shape=(samples_n, tensor_dim)) 36 | 37 | for i, point in enumerate(samples): 38 | 39 | tensor_values_all[i, :] = predictions[i][j] 40 | 41 | if predictions[i][0] == 1: 42 | tensor_values_cat1[i, :] = predictions[i][j] 43 | 44 | elif predictions[i][0] == 0: 45 | tensor_values_cat2[i, :] = predictions[i][j] 46 | 47 | file_name = 'cat1' + '_' + name + '.csv' 48 | with open(os.path.join(self._output_filename, file_name), 'w', newline='') as csvfile: 49 | spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) 50 | 51 | for row in np.transpose(tensor_values_cat1): 52 | spamwriter.writerow(row) 53 | 54 | file_name = 'cat2' + '_' + name + '.csv' 55 | with open(os.path.join(self._output_filename, file_name), 'w', newline='') as csvfile: 56 | spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) 57 | for row in np.transpose(tensor_values_cat2): 58 | spamwriter.writerow(row) 59 | 60 | file_name = 'all' + '_' + name + '.csv' 61 | with open(os.path.join(self._output_filename, file_name), 'w', newline='') as csvfile: 62 | spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) 63 | for row in np.transpose(tensor_values_all): 64 | spamwriter.writerow(row) 65 | 66 | return content 67 | else: 68 | return '' 69 | def predict(self, content=None): 70 | """saver done't support predict instead writes content on the disk""" 71 | 72 | return self.transform(content) 73 | -------------------------------------------------------------------------------- /transformers/excel_to_betti_feeder.py: -------------------------------------------------------------------------------- 1 | from transformers import Transformer 2 | from os import listdir 3 | import os 4 | from os.path import isfile, join 5 | from transformers.betti_calc import BettiCalc 6 | import csv 7 | import multiprocessing 8 | 9 | 10 | def run_calculation(csv_file, task, model_path, julia, divisor, neighbors): 11 | print("Starting task ", task) 12 | loops = [] 13 | betti_calc = BettiCalc(model_path, julia, divisor, neighbors) 14 | trace = betti_calc.transform(csv_file) 15 | 16 | loops.append({'trace': trace, 'name': os.path.basename(csv_file)}) 17 | print("Done task ", task) 18 | return loops 19 | 20 | 21 | class ExcelToBettiFeeder(Transformer): 22 | 23 | def __init__(self, model_path, julia, excel_names, divisor, neighbors): 24 | self.excel_dir = os.path.join(model_path, 'excels') 25 | self.model_path = model_path 26 | self.excel_names = excel_names 27 | self.julia = julia 28 | self.divisor = int(divisor) 29 | self.neighbors = int(neighbors) 30 | 31 | def transform(self, content=None): 32 | 33 | betti_calc = BettiCalcParallel(self.model_path, self.julia, self.divisor, self.neighbors) 34 | loops = [] 35 | for file in self.excel_names: 36 | csv_file = os.path.join(self.excel_dir, file) 37 | trace = betti_calc.transform(csv_file) 38 | 39 | loops.append({'trace': trace, 'name' : file}) 40 | 41 | content.update({'betti_list' : loops}) 42 | return content 43 | # return '' -------------------------------------------------------------------------------- /transformers/excel_to_betti_feeder_parallel.py: -------------------------------------------------------------------------------- 1 | from transformers import Transformer 2 | from os import listdir 3 | import os 4 | from os.path import isfile, join 5 | from transformers.betti_calc_parallel import BettiCalcParallel 6 | import csv 7 | import multiprocessing 8 | import datetime 9 | import time 10 | import psutil 11 | 12 | def run_calculation(csv_file, task, model_path, julia, divisor, neighbors): 13 | print("Starting task ", task) 14 | loops = [] 15 | betti_calc = BettiCalcParallel(model_path, julia, divisor, neighbors) 16 | trace = betti_calc.transform(csv_file) 17 | 18 | loops.append({'trace': trace, 'name': os.path.basename(csv_file)}) 19 | print("Done task ", task) 20 | return loops 21 | 22 | 23 | class ExcelToBettiFeederParallel(Transformer): 24 | 25 | def __init__(self, model_path, julia, excel_names, divisor, neighbors, max_dim): 26 | self.excel_dir = os.path.join(model_path, 'excels') 27 | self.model_path = model_path 28 | self.excel_names = excel_names 29 | self.julia = julia 30 | self.divisor = int(divisor) 31 | self.neighbors = int(neighbors) 32 | self.max_dim = int(max_dim) 33 | 34 | def transform(self, content=None): 35 | now = datetime.datetime.now() 36 | 37 | workers = 10 38 | giga = 1000000000 39 | mem_reserved = 10 * giga # reserve 10Gb of memory 40 | 41 | def split(arr, size): 42 | arrs = [] 43 | while len(arr) > size: 44 | pice = arr[:size] 45 | arrs.append(pice) 46 | arr = arr[size:] 47 | arrs.append(arr) 48 | return arrs 49 | 50 | 51 | from threading import Thread 52 | loops = [] 53 | 54 | subarrays = split(self.excel_names, workers) 55 | 56 | def check_threds(threads): 57 | alive_count = 0 58 | for thread in threads: 59 | if thread.is_alive(): 60 | alive_count += 1 61 | print("Currently", alive_count, "active threads", "out of max", workers, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) 62 | 63 | return alive_count 64 | 65 | def check_mem(): 66 | mem = psutil.virtual_memory().free 67 | print("free mem (Gb):", 1.0 * psutil.virtual_memory().free / giga, "the reserved lower limit is (Gb) :",1.0 * mem_reserved/giga, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) 68 | return mem 69 | 70 | 71 | mat_files = [] 72 | threads = [] 73 | total = 0 74 | for i, subarray in enumerate(subarrays): 75 | for j, file in enumerate(subarray): 76 | 77 | print("running chunk", i) 78 | print("Submiting job", j) 79 | print("Total job count", total) 80 | csv_file = os.path.join(self.excel_dir, file) 81 | print("running on csv_file", csv_file) 82 | betti_calc = BettiCalcParallel(self.model_path, self.julia, self.divisor, self.neighbors, self.max_dim ) 83 | (call, mat_file) = betti_calc.transform(csv_file) 84 | mat_files.append(mat_file) 85 | print("calling", call) 86 | print(":-:" * 30) 87 | 88 | # update os.system with path to julia executable 89 | threads.append(Thread(group=None, target=lambda: os.system('julia-9d11f62bcb/bin/' + call + " > " + os.path.join(self.model_path, "job-" + str(total) + ".txt")))) 90 | threads[total].start() 91 | print(":-:" * 30) 92 | total += 1 93 | 94 | while (check_threds(threads) == workers) or (check_mem() < mem_reserved): 95 | print("All workers are busy or memory is low (Gb)", 1.0 * check_mem()/giga, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) 96 | time.sleep(10) 97 | 98 | print("done chunk ", i, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) 99 | 100 | while check_threds(threads) > 0 or (check_mem() < mem_reserved): 101 | time.sleep(10) 102 | 103 | print("all workers done") 104 | for mat_file in mat_files: 105 | os.remove(mat_file) 106 | 107 | print("done betti calculation for model", self.model_path , "in ", datetime.datetime.now() - now, " time") 108 | return '' 109 | -------------------------------------------------------------------------------- /transformers/excel_to_plot_feeder.py: -------------------------------------------------------------------------------- 1 | from transformers import Transformer 2 | from os import listdir 3 | import os 4 | from os.path import isfile, join 5 | from transformers.tensors_values_plotter import TensorsValuesPlotter 6 | import numpy as np 7 | import csv 8 | from itertools import combinations 9 | 10 | 11 | class ExcelToPlotFeeder(Transformer): 12 | 13 | def __init__(self, model_path, excel_names): 14 | self.excel_dir = os.path.join(model_path, 'excels') 15 | self.model_path = model_path 16 | self.excel_names = excel_names 17 | 18 | self.plotter = TensorsValuesPlotter(content_name='results_plot', 19 | output_filename=os.path.join(model_path, 'plots', 20 | 'tensors_values')) 21 | def transform(self, content=None): 22 | 23 | def read_csv(csv_file): 24 | with open(csv_file) as file: 25 | readCSV = csv.reader(file, delimiter=',') 26 | data = [] 27 | for row in readCSV: 28 | data.append([float(item) for item in row]) 29 | 30 | data = np.transpose(np.array(data)) 31 | return data 32 | 33 | for file in self.excel_names: 34 | csv_file = os.path.join(self.excel_dir, file) 35 | data = read_csv(csv_file) 36 | 37 | labels = data 38 | # have to mach data structure of "TensorsValuesPlotter" 39 | labels = [ [[item]] for item in data] 40 | contents = {'results_plot': {'labels_names' :[file], 41 | 'labels' : labels }} 42 | 43 | for a in list(combinations(range(6), 3 )): 44 | contents = {'results_plot': {'labels_names': [file + '_' + str(a[0]) + str(a[1]) + str(a[2])], 45 | 'labels': labels}} 46 | 47 | self.plotter.transform(contents, dim_1=a[0], dim_2=a[1], dim_3=a[2], auto_open=True) 48 | -------------------------------------------------------------------------------- /transformers/grid_generator.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | import numpy as np 3 | from transformers import Transformer 4 | import matplotlib.pyplot as plt 5 | 6 | class GridGenerator(Transformer): 7 | def __init__(self, content_name ='grid', random=False, grid_min=-1.1, grid_max=1.1, res=0.1, mode='2D'): 8 | self.grid_min = grid_min 9 | self.grid_max = grid_max 10 | self.res = res 11 | self.content_name = content_name 12 | self.random = random 13 | self.mode = mode 14 | 15 | @staticmethod 16 | def gen_grid(grid_min, grid_max, res, random=False, mode='2D'): 17 | 18 | if mode == '2D': 19 | x = np.arange(grid_min, grid_max, res) 20 | 21 | y = np.arange(grid_min, grid_max, res) 22 | 23 | xx, yy = np.meshgrid(x, y) 24 | if random: 25 | xx = xx + np.random.randn(xx.shape[0], xx.shape[1]) * 0.02 26 | yy = yy + np.random.randn(yy.shape[0], yy.shape[1]) * 0.02 27 | grid = np.dstack((xx, yy)).reshape(-1, 2) 28 | 29 | if mode == '3D': 30 | x = np.arange(grid_min, grid_max, res) 31 | y = np.arange(grid_min, grid_max, res) 32 | z = np.arange(grid_min, grid_max, res) 33 | 34 | xx, yy, zz = np.meshgrid(x, y, z, indexing='ij') 35 | grid = np.stack((xx, yy, zz), axis=3).reshape(-1, 3) 36 | print("-" * 100, "length of grid", len(grid)) 37 | 38 | return grid 39 | 40 | def transform(self, content=None): 41 | grid = self.gen_grid(self.grid_min, self.grid_max, self.res, self.random, self.mode) 42 | dic = {self.content_name : {'samples': grid, 43 | 'labels': [0] * grid.shape[0]} } 44 | 45 | if content != None: 46 | content.update(dic) 47 | else: 48 | content = dic 49 | 50 | return content 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /transformers/pickle_reader.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | import numpy as np 3 | from transformers import Transformer 4 | import pickle 5 | import os 6 | import glob 7 | 8 | class PickleReader(Transformer): 9 | def __init__(self, input_filename="saved_dataset.pkl"): 10 | self._input_filename = input_filename 11 | 12 | def predict(self, content=None): 13 | """Does not support predict instead reads input 14 | """ 15 | 16 | return self.transform(content) 17 | 18 | def transform(self, content=None): 19 | if os.path.isdir(self._input_filename): 20 | print("\nPickle Reader: given directory: " + self._input_filename) 21 | file = (sorted(glob.glob(self._input_filename + '/*.pkl')))[-1] 22 | 23 | self._input_filename = os.path.join(self._input_filename, os.path.basename(file)) 24 | print("\nPickle Reader choose file: " + self._input_filename) 25 | 26 | with (open(self._input_filename, "rb")) as openfile: 27 | a = pickle.load(openfile) 28 | 29 | 30 | grid = a['grid'] 31 | predictions = a['predictions'] 32 | y_category = [item[0] for item in predictions] 33 | 34 | # format ([1.0], [-0.3360700011253357, -0.6241478323936462]) 35 | classification_res = [(y_category[i], grid[i]) for i in range(len(y_category))] 36 | if content is not None: 37 | dic = {'classification_res' : classification_res, 'dataset': content['dataset']} 38 | else: 39 | dic = {'classification_res': classification_res} 40 | return dic 41 | -------------------------------------------------------------------------------- /transformers/pickle_tensors_values_saver.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | import numpy as np 3 | from transformers import Transformer 4 | import pickle 5 | import os 6 | 7 | class PickleTensorValuesSaver(Transformer): 8 | def __init__(self, content_name, output_filename="saved_dataset.pkl"): 9 | self._output_filename = output_filename 10 | self.content_name = content_name 11 | 12 | def transform(self, content=None): 13 | 14 | # create directory if does not exist 15 | if not os.path.exists(os.path.dirname(self._output_filename)): 16 | os.makedirs(os.path.dirname(self._output_filename)) 17 | 18 | with open(self._output_filename, 'wb') as f: 19 | pickle.dump(content[self.content_name], f) 20 | 21 | return content 22 | 23 | def predict(self, content=None): 24 | """saver done't support predict instead writes content on the disk""" 25 | 26 | return self.transform(content) 27 | -------------------------------------------------------------------------------- /transformers/raw_circles_generator.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | import numpy as np 3 | from transformers import Transformer 4 | import matplotlib.pyplot as plt 5 | from sklearn.model_selection import train_test_split 6 | 7 | def split(data, label, test_size=0.22, sub_sample=10000): 8 | 9 | X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=test_size) 10 | 11 | train_size = int(sub_sample*(1-test_size)) 12 | test_size = int(sub_sample * test_size) 13 | 14 | return ([X_train[0:train_size], y_train[0:train_size] ], [X_test[0:test_size], y_test[0:test_size]]) 15 | 16 | class CirclesGenerator(Transformer): 17 | def __init__(self, content_name ='circles', random=False, grid_min=-10, grid_max=10, res=0.19, mode='2D'): 18 | self.grid_min = grid_min 19 | self.grid_max = grid_max 20 | self.res = res 21 | self.content_name = content_name 22 | self.random = random 23 | self.mode = mode 24 | 25 | def gen_grid(self, grid_min, grid_max, res, random=False, centers=[(0,0), (8,8)], radius=0.5, margin=0.5): 26 | 27 | x = np.arange(grid_min, grid_max, res) 28 | y = np.arange(grid_min, grid_max, res) 29 | xx, yy = np.meshgrid(x, y) 30 | if random: 31 | xx = xx + np.random.randn(xx.shape[0], xx.shape[1]) * 0.02 32 | yy = yy + np.random.randn(yy.shape[0], yy.shape[1]) * 0.02 33 | grid = np.dstack((xx, yy)).reshape(-1, 2) 34 | 35 | y = np.ones(shape=(len(grid), 2)) * 1 36 | 37 | for center in centers: 38 | for i, point in enumerate(grid): 39 | 40 | if np.linalg.norm(center - point) < radius + margin: 41 | y[i, 0] = 3 42 | y[i, 1] = 3 43 | 44 | if np.linalg.norm(center - point) < radius: 45 | y[i, 0] = 0 * y[i, 0] 46 | y[i, 1] = not y[i, 0] 47 | 48 | y = np.array(y) 49 | 50 | mask = (y == 3) 51 | label = y[:, 0] 52 | label = np.delete(label, np.where(mask[:, 0])) 53 | xx = list(grid[:, 0]) 54 | yy = list(grid[:, 1]) 55 | 56 | xx = np.delete(xx, np.where(mask[:, 0])) 57 | yy = np.delete(yy, np.where(mask[:, 0])) 58 | grid = np.ones(shape=(len(xx), 2)) 59 | 60 | grid[:, 0] = xx 61 | grid[:, 1] = yy 62 | 63 | y = np.ones(shape=(len(xx), 2)) 64 | y[:,0] = label 65 | y[:, 1]= 1- label 66 | 67 | return ( grid, y) 68 | 69 | def transform(self, content=None, big_r = 7, small_r = 1.3, n=9): 70 | 71 | import math 72 | pi = math.pi 73 | 74 | def LargeCirlce(r, n=n): 75 | return [(math.cos(2 * pi / n * x) * r, math.sin(2 * pi / n * x) * r) for x in range(0, n + 1)] 76 | 77 | X, y = self.gen_grid(self.grid_min, self.grid_max, self.res, self.random, centers=list(LargeCirlce(big_r, n-1)) + [0,0], radius=small_r) 78 | 79 | 80 | return [X, y] 81 | 82 | 83 | 84 | if __name__ == "__main__": 85 | 86 | mp = CirclesGenerator() 87 | content = mp.transform() 88 | X = content[0] 89 | y = content[1] 90 | trn, tsts = split(X, y) 91 | 92 | plt.scatter(trn[0][:, 0], trn[0][:, 1], c=trn[1][:, 0]) 93 | plt.show() -------------------------------------------------------------------------------- /transformers/raw_mnist_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from transformers import Transformer 4 | from sklearn.decomposition import PCA 5 | import matplotlib.pyplot as plt 6 | from sklearn.model_selection import train_test_split 7 | 8 | def split(data, label, test_size=0.22, sub_sample=70000): 9 | 10 | X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=test_size) 11 | 12 | train_size = int(sub_sample*(1-test_size)) 13 | test_size = int(sub_sample * test_size) 14 | 15 | return ([X_train[0:train_size], y_train[0:train_size] ], [X_test[0:test_size], y_test[0:test_size]]) 16 | 17 | class MnistParser(Transformer): 18 | 19 | def __init__(self, dim=50, visual=False, load_path="."): 20 | self.dim = dim 21 | self.visual = visual 22 | self.load_path = load_path 23 | self.classification_digit = 1 24 | 25 | def load_mnist(self): 26 | data_dir = os.path.join(self.load_path, "mnist") 27 | 28 | fd = open(os.path.join(data_dir, 'train-images-idx3-ubyte')) 29 | loaded = np.fromfile(file=fd, dtype=np.uint8) 30 | trX = loaded[16:].reshape((60000, 28, 28, 1)).astype(np.float) 31 | 32 | fd = open(os.path.join(data_dir, 'train-labels-idx1-ubyte')) 33 | loaded = np.fromfile(file=fd, dtype=np.uint8) 34 | trY = loaded[8:].reshape((60000)).astype(np.float) 35 | 36 | fd = open(os.path.join(data_dir, 't10k-images-idx3-ubyte')) 37 | loaded = np.fromfile(file=fd, dtype=np.uint8) 38 | teX = loaded[16:].reshape((10000, 28, 28, 1)).astype(np.float) 39 | 40 | fd = open(os.path.join(data_dir, 't10k-labels-idx1-ubyte')) 41 | loaded = np.fromfile(file=fd, dtype=np.uint8) 42 | teY = loaded[8:].reshape((10000)).astype(np.float) 43 | 44 | trY = np.asarray(trY) 45 | teY = np.asarray(teY) 46 | 47 | X = np.concatenate((trX, teX), axis=0) 48 | y = np.concatenate((trY, teY), axis=0).astype(np.int) 49 | 50 | seed = 547 51 | np.random.seed(seed) 52 | np.random.shuffle(X) 53 | np.random.seed(seed) 54 | np.random.shuffle(y) 55 | 56 | return (X / 255.), y 57 | 58 | def transform(self, content=None): 59 | 60 | X, y = self.load_mnist() 61 | flattened_images = X.reshape(X.shape[0], -1) 62 | ytwo_classes = [] 63 | 64 | for i, label in enumerate(y): 65 | 66 | if label == self.classification_digit: 67 | ytwo_classes.append(1) 68 | else: 69 | ytwo_classes.append(0) 70 | 71 | pca = PCA(n_components=self.dim) 72 | pca.fit(flattened_images) 73 | X = pca.transform(flattened_images) 74 | X_reconstructed = pca.inverse_transform(X) 75 | 76 | return [X, ytwo_classes, X_reconstructed] 77 | 78 | 79 | if __name__ == "__main__": 80 | 81 | mp = MnistParser(load_path='/home/greg/topology/db/') 82 | X, y = mp.load_mnist() 83 | X_pca, y_pca, X_reconstructed = mp.transform() 84 | trn, tsts = split(X_reconstructed, y_pca, test_size=1.0/7.0) 85 | first_image = trn[0][0] 86 | pixels = first_image.reshape((28, 28)) 87 | 88 | plt.imshow(pixels, cmap='gray') 89 | plt.title(trn[1][0]) 90 | plt.show() -------------------------------------------------------------------------------- /transformers/raw_rings_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from transformers import Transformer 3 | import matplotlib.pyplot as plt 4 | from sklearn import preprocessing 5 | from sklearn.model_selection import train_test_split 6 | from mpl_toolkits.mplot3d import Axes3D 7 | 8 | def split(data, label, test_size=0.22, sub_sample=60000): 9 | 10 | X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=test_size) 11 | 12 | train_size = int(sub_sample*(1-test_size)) 13 | test_size = int(sub_sample * test_size) 14 | 15 | return ([X_train[0:train_size], y_train[0:train_size] ], [X_test[0:test_size], y_test[0:test_size]]) 16 | 17 | 18 | 19 | class DataSet3DRingsGenerator(Transformer): 20 | def __init__(self, visual=False, samples=15000, shapeParam1=15, shapeParam2=2.5, shapeParam3=2.2, radius=1): 21 | self.visual = visual 22 | self.shapeParam1 = shapeParam1 23 | self.shapeParam2 = shapeParam2 24 | self.shapeParam3 = shapeParam3 25 | self.radius = radius 26 | self.samples= samples 27 | self.range = 0.5 28 | 29 | def draw_circle(self, r, center, n, rand=True): 30 | 31 | angles = np.linspace(start=0, stop=n, num=n) * (np.pi * 2) / n 32 | X = np.zeros(shape=(n, 2)) 33 | X[:, 0] = np.sin(angles) * r 34 | X[:, 1] = np.cos(angles) * r 35 | 36 | if rand: 37 | return X + center + np.random.rand(n, 2) * r / self.shapeParam1 38 | else: 39 | return X + center 40 | 41 | def gen_ring(self, center, flip, q=1.4, r=1): 42 | 43 | N_SAMPLES = self.samples 44 | X = np.zeros(shape=(2 * N_SAMPLES, 3)) 45 | y = np.zeros(shape=(2 * N_SAMPLES,)) 46 | 47 | X1 = self.draw_circle(r=r, center=np.array((0, 0)), n=N_SAMPLES, rand=False) 48 | X2 = self.draw_circle(r=r, center=np.array((0, 0)), n=N_SAMPLES, rand=False) 49 | 50 | 51 | X[0:N_SAMPLES, 0] = (X1[:, 0]) * self.shapeParam2 + np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 52 | X[0:N_SAMPLES, 1] = (X1[:, 1]) * self.shapeParam2 + np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 53 | X[0:N_SAMPLES, 2] = np.random.uniform(low=-self.range, high=self.range, size = X1.shape[0]) * q 54 | 55 | X[N_SAMPLES: 2 * N_SAMPLES, 0] = X2[:, 0] * self.shapeParam3 + np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 56 | X[N_SAMPLES: 2 * N_SAMPLES, 1] = X2[:, 1] * self.shapeParam3 + np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 57 | X[N_SAMPLES: 2 * N_SAMPLES, 2] = np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 58 | 59 | y[:] = flip 60 | y[0:N_SAMPLES] = flip 61 | 62 | X_total = X.copy() + np.array(( self.shapeParam3, 0, 0)) 63 | y_total = y.copy() 64 | 65 | X = np.zeros(shape=(2 * N_SAMPLES, 3)) 66 | y = np.zeros(shape=(2 * N_SAMPLES,)) 67 | 68 | X1 = self.draw_circle(r=r, center=np.array((0, 0)), n=N_SAMPLES, rand=False) 69 | X2 = self.draw_circle(r=r, center=np.array((0, 0)), n=N_SAMPLES, rand=False) 70 | 71 | X[0:N_SAMPLES, 0] = (X1[:, 0]) * self.shapeParam2 + np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 72 | X[0:N_SAMPLES, 2] = (X1[:, 1]) * self.shapeParam2 + np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 73 | X[0:N_SAMPLES, 1] = np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 74 | 75 | X[N_SAMPLES: 2 * N_SAMPLES, 0] = X2[:, 0] * self.shapeParam3 + np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 76 | X[N_SAMPLES: 2 * N_SAMPLES, 2] = X2[:, 1] * self.shapeParam3 + np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 77 | X[N_SAMPLES: 2 * N_SAMPLES, 1] = np.random. uniform(low=-self.range, high=self.range,size = X1.shape[0]) * q 78 | 79 | y[:] = 1 - flip 80 | y[0:N_SAMPLES] = 1 - flip 81 | 82 | X_total = np.concatenate((X_total, X), axis=0) + center 83 | y_total = np.concatenate((y_total, y), axis=0) 84 | 85 | return X_total, y_total 86 | 87 | def transform(self, q=3): 88 | 89 | X1, y1 = self.gen_ring((q, q, q), 0) 90 | X2, y2 = self.gen_ring((-q, -q, q), 1) 91 | X3, y3 = self.gen_ring((-q, q, -q), 0) 92 | X4, y4 = self.gen_ring((q, -q, -q), 1) 93 | X5, y5 = self.gen_ring((0, 0, 0), 0) 94 | X6, y6 = self.gen_ring((-q, -q, -q), 0) 95 | X7, y7 = self.gen_ring((q, q, -q), 1) 96 | X8, y8 = self.gen_ring((-q, q, q), 0) 97 | X9, y9 = self.gen_ring((q, -q, q), 1) 98 | 99 | X_total = np.concatenate((X1, X2), axis=0) 100 | y_total = np.concatenate((y1, y2), axis=0) 101 | 102 | X_total = np.concatenate((X_total, X3), axis=0) 103 | y_total = np.concatenate((y_total, y3), axis=0) 104 | 105 | X_total = np.concatenate((X_total, X4), axis=0) 106 | y_total = np.concatenate((y_total, y4), axis=0) 107 | 108 | X_total = np.concatenate((X_total, X5), axis=0) 109 | y_total = np.concatenate((y_total, y5), axis=0) 110 | 111 | X_total = np.concatenate((X_total, X6), axis=0) 112 | y_total = np.concatenate((y_total, y6), axis=0) 113 | 114 | X_total = np.concatenate((X_total, X7), axis=0) 115 | y_total = np.concatenate((y_total, y7), axis=0) 116 | 117 | X_total = np.concatenate((X_total, X8), axis=0) 118 | y_total = np.concatenate((y_total, y8), axis=0) 119 | 120 | X_total = np.concatenate((X_total, X9), axis=0) 121 | y_total = np.concatenate((y_total, y9), axis=0) 122 | 123 | X = X_total.copy() 124 | y = y_total.copy() 125 | 126 | max_abs_scaler = preprocessing.MaxAbsScaler() 127 | X = max_abs_scaler.fit_transform(X) 128 | 129 | return [X, y] 130 | 131 | if __name__ == '__main__': 132 | 133 | mp = DataSet3DRingsGenerator() 134 | content = mp.transform() 135 | X = content[0] 136 | y = content[1] 137 | trn, tsts = split(X, y, test_size=1-4.5/6.0) 138 | 139 | fig = plt.figure() 140 | ax = fig.add_subplot(111, projection='3d') 141 | ax.scatter(trn[0][:,0], trn[0][:,1], trn[0][:,2], c=trn[1]) 142 | plt.show() 143 | 144 | 145 | -------------------------------------------------------------------------------- /transformers/raw_spheres_generation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from transformers import Transformer 3 | import matplotlib.pyplot as plt 4 | from sklearn import preprocessing 5 | from sklearn.model_selection import train_test_split 6 | from mpl_toolkits.mplot3d import Axes3D 7 | import math 8 | import random 9 | from sklearn import preprocessing 10 | def split(data, label, test_size=0.22, sub_sample=60000): 11 | 12 | X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=test_size) 13 | 14 | train_size = int(sub_sample*(1-test_size)) 15 | test_size = int(sub_sample * test_size) 16 | 17 | return ([X_train[0:train_size], y_train[0:train_size] ], [X_test[0:test_size], y_test[0:test_size]]) 18 | 19 | 20 | 21 | class DataSet3DSpheresGenerator(Transformer): 22 | 23 | def __init__(self, visual=False, samples=9450): 24 | self.visual = visual 25 | self.samples = int(samples/9) 26 | 27 | def fibonacci_sphere(self, samples=2000, randomize=True, radius=1.0): 28 | rnd = 1. 29 | if randomize: 30 | rnd = random.random() * samples 31 | 32 | points = [] 33 | offset = 2. / samples 34 | increment = math.pi * (3. - math.sqrt(5.)) 35 | 36 | for i in range(samples): 37 | 38 | y = ((i * offset) - 1) + (offset / 2) 39 | r = math.sqrt(1 - pow(y, 2)) 40 | 41 | phi = ((i + rnd) % samples) * increment 42 | 43 | x = math.cos(phi) * r 44 | z = math.sin(phi) * r 45 | 46 | points.append([x * radius, y * radius, z * radius]) 47 | 48 | return points 49 | 50 | def gen_2spheres(self, N_SAMPLES=1000, visual=True, r1=1, r2=1.05, r3=2, r4=2.05, r5=0.5): 51 | 52 | random.seed(1) 53 | x1a = self.fibonacci_sphere(samples=N_SAMPLES, randomize=True, radius=r1) 54 | y1a = np.zeros(len(x1a)).tolist() 55 | 56 | x1b = self.fibonacci_sphere(samples=N_SAMPLES, randomize=False, radius=r2) 57 | y1b = np.zeros(len(x1b)).tolist() 58 | 59 | x2a = self.fibonacci_sphere(samples=N_SAMPLES, randomize=False, radius=r3) 60 | y2a = np.ones(len(x2a)).tolist() 61 | 62 | x2b = self.fibonacci_sphere(samples=N_SAMPLES, randomize=False, radius=r4) 63 | y2b = np.ones(len(x2b)).tolist() 64 | 65 | x3 = self.fibonacci_sphere(samples=int(N_SAMPLES), randomize=False, radius=r5) 66 | y3 = np.ones(len(x3)).tolist() 67 | 68 | x = np.asarray(x1a + x1b + x2a + x2b + x3) 69 | y = y1a + y1b + y2a + y2b + y3 70 | 71 | return x, y 72 | 73 | def transform(self, content=None): 74 | 75 | def gen_spheres(N_SAMPLES, visual, center, flip): 76 | 77 | X, y = self.gen_2spheres(N_SAMPLES=N_SAMPLES, visual=visual) 78 | X = X + center 79 | y = np.abs(flip - np.array(y)) 80 | 81 | return X, y 82 | 83 | X1, y1 = gen_spheres(N_SAMPLES=self.samples, visual=False, center=(0,0,0), flip =0) 84 | X2, y2 = gen_spheres(N_SAMPLES=self.samples, visual=False, center=(-3,-3,-3), flip =0) 85 | X3, y3 = gen_spheres(N_SAMPLES=self.samples, visual=False, center=(3, 3, 3), flip =0) 86 | 87 | X4, y4 = gen_spheres(N_SAMPLES=self.samples, visual=False, center=(-3, 3, 3), flip =0) 88 | X5, y5 = gen_spheres(N_SAMPLES=self.samples, visual=False, center=(-3, 3, -3), flip =0) 89 | X6, y6 = gen_spheres(N_SAMPLES=self.samples, visual=False, center=(-3,-3, 3), flip =0) 90 | 91 | X7, y7 = gen_spheres(N_SAMPLES=self.samples, visual=False, center=(3, -3, -3), flip =0) 92 | X8, y8 = gen_spheres(N_SAMPLES=self.samples, visual=False, center=(3, -3, 3), flip =0) 93 | X9, y9 = gen_spheres(N_SAMPLES=self.samples, visual=False, center=(3, 3, -3), flip =0) 94 | 95 | X_total = np.concatenate((X1, X2), axis=0) 96 | y_total = np.concatenate((y1, y2), axis=0) 97 | 98 | X_total = np.concatenate((X_total, X3), axis=0) 99 | y_total = np.concatenate((y_total, y3), axis=0) 100 | 101 | X_total = np.concatenate((X_total, X4), axis=0) 102 | y_total = np.concatenate((y_total, y4), axis=0) 103 | 104 | X_total = np.concatenate((X_total, X5), axis=0) 105 | y_total = np.concatenate((y_total, y5), axis=0) 106 | 107 | X_total = np.concatenate((X_total, X6), axis=0) 108 | y_total = np.concatenate((y_total, y6), axis=0) 109 | 110 | X_total = np.concatenate((X_total, X7), axis=0) 111 | y_total = np.concatenate((y_total, y7), axis=0) 112 | 113 | X_total = np.concatenate((X_total, X8), axis=0) 114 | y_total = np.concatenate((y_total, y8), axis=0) 115 | 116 | X_total = np.concatenate((X_total, X9), axis=0) 117 | y_total = np.concatenate((y_total, y9), axis=0) 118 | 119 | X = X_total.copy() 120 | y = y_total.copy() 121 | 122 | max_abs_scaler = preprocessing.MaxAbsScaler() 123 | X = max_abs_scaler.fit_transform(X) 124 | 125 | return [X, y] 126 | 127 | 128 | if __name__ == '__main__': 129 | spheres_generator = DataSet3DSpheresGenerator(visual=True) 130 | content = spheres_generator.transform() 131 | X = content[0] 132 | y = content[1] 133 | trn, tsts = split(X, y, test_size=1-4.0/5.0) 134 | 135 | fig = plt.figure() 136 | ax = fig.add_subplot(111, projection='3d') 137 | ax.scatter(trn[0][:, 0], trn[0][:, 1], trn[0][:, 2], c=trn[1]) 138 | plt.show() 139 | print(len(trn[1])) 140 | 141 | -------------------------------------------------------------------------------- /transformers/rings_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/transformers/rings_9.png -------------------------------------------------------------------------------- /transformers/spheres_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/transformers/spheres_9.png -------------------------------------------------------------------------------- /transformers/tensors_values_plotter.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | import numpy as np 3 | from transformers import Transformer 4 | import csv 5 | import os 6 | import csv 7 | import plotly.offline as py 8 | import plotly.graph_objs as go 9 | from sklearn import preprocessing 10 | import numpy as np 11 | from sklearn import preprocessing 12 | 13 | class TensorsValuesPlotter(Transformer): 14 | def __init__(self, content_name, output_filename, enable=True): 15 | 16 | self.plots_path = output_filename 17 | self.content_name = content_name 18 | self.divisor = 1 19 | if not os.path.exists(output_filename): 20 | os.makedirs(output_filename) 21 | self.enable = enable 22 | 23 | def transform(self, content=None, dim_1=0, dim_2=1, dim_3=2, auto_open=False, normalize=True): 24 | 25 | if type(content) == type({}): 26 | 27 | if self.enable: 28 | tensor_names = content[self.content_name]['labels_names'] 29 | tensor_values = content[self.content_name]['labels'] 30 | 31 | print("plotting those tensors:", tensor_names) 32 | color_scale = np.array([item[0][0] for item in tensor_values]) 33 | 34 | for i in range(len(tensor_names)): 35 | flag_2d = False 36 | name = tensor_names[i] 37 | 38 | name = name.replace("/", "-") 39 | name = name.replace(":", "-") 40 | 41 | values = np.array([item[i][0] for item in tensor_values]) 42 | 43 | shape = values.shape 44 | if len(shape) == 1: 45 | print("warning: tensor %s is a list, will not plot (only 3d plot supported)" % (name)) 46 | continue 47 | 48 | if shape[1] > 3: 49 | print("warning: tensor %s has dimensions %s, will plot the first 3 dimensions" %(name, shape[1])) 50 | 51 | if normalize: 52 | max_abs_scaler = preprocessing.MaxAbsScaler() 53 | values = max_abs_scaler.fit_transform(values) 54 | 55 | x = values[::self.divisor, dim_1] 56 | y = values[::self.divisor, dim_2] 57 | z = values[::self.divisor, dim_3] 58 | 59 | 60 | else: # less or equal to 3 61 | 62 | if shape[1] < 3: 63 | print("warning: tensor %s has dimensions %s)" % (name, shape[1])) 64 | if normalize: 65 | max_abs_scaler = preprocessing.MaxAbsScaler() 66 | values = max_abs_scaler.fit_transform(values) 67 | 68 | x = values[::self.divisor, dim_1] 69 | y = values[::self.divisor, dim_2] 70 | z = [0] * len(values[::self.divisor, dim_1]) 71 | flag_2d = True 72 | else: 73 | if normalize: 74 | max_abs_scaler = preprocessing.MaxAbsScaler() 75 | values = max_abs_scaler.fit_transform(values) 76 | 77 | x = values[::self.divisor, dim_1] 78 | y = values[::self.divisor, dim_2] 79 | z = values[::self.divisor, dim_3] 80 | 81 | trace_gt = go.Scatter3d(x=x, y=y, z=z, 82 | mode='markers', 83 | marker=dict(size=2, 84 | colorscale='Jet', 85 | color = color_scale, 86 | opacity=0.3, 87 | ), 88 | name=name) 89 | 90 | data = [trace_gt] 91 | 92 | layout = dict(title='tensor values for %s' %(name), 93 | width=700, height=700, 94 | showlegend= False, 95 | scene=dict( 96 | xaxis=dict( 97 | nticks=4, range=[-1, 1], ), 98 | yaxis=dict( 99 | nticks=4, range=[-1, 1], ), 100 | zaxis=dict( 101 | nticks=4, range=[-1, 1], ), ) 102 | ) 103 | 104 | fig = go.Figure(data=data, layout=layout) 105 | file_name = os.path.join(self.plots_path, name + ".html") 106 | py.plot(fig, filename=file_name, auto_open=auto_open) 107 | 108 | # category plot 109 | 110 | values_cat1 = np.array([values[i,:] for i in range(len(values)) if color_scale[i] == 1]) 111 | 112 | if len(values_cat1) > 0: 113 | if flag_2d: 114 | x = values_cat1[::self.divisor, dim_1] 115 | y = values_cat1[::self.divisor, dim_2] 116 | z = [0] * len(values_cat1[::self.divisor, dim_1]) 117 | else: 118 | x = values_cat1[::self.divisor, dim_1] 119 | y = values_cat1[::self.divisor, dim_2] 120 | z = values_cat1[::self.divisor, dim_3] 121 | 122 | trace_gt = go.Scatter3d(x=x, y=y, z=z, 123 | mode='markers', 124 | marker=dict(size=2, 125 | colorscale='Jet', 126 | # color = color_scale, 127 | opacity=0.3, 128 | ), 129 | name=name) 130 | 131 | data = [trace_gt] 132 | 133 | layout = dict(title='tensor values for %s' %(name), 134 | width=700, height=700, 135 | showlegend= False, 136 | scene=dict( 137 | xaxis=dict( 138 | nticks=4, range=[-1, 1], ), 139 | yaxis=dict( 140 | nticks=4, range=[-1, 1], ), 141 | zaxis=dict( 142 | nticks=4, range=[-1, 1], ), ) 143 | ) 144 | 145 | fig = go.Figure(data=data, layout=layout) 146 | file_name = os.path.join(self.plots_path, name + "_cat1_ " + ".html") 147 | py.plot(fig, filename=file_name, auto_open=auto_open) 148 | 149 | 150 | values_cat2 =np.array([values[i,:] for i in range(len(values)) if color_scale[i] == 0]) 151 | 152 | if len(values_cat2) > 0: 153 | 154 | if flag_2d: 155 | x = values_cat2[::self.divisor, dim_1] 156 | y = values_cat2[::self.divisor, dim_2] 157 | z = [0] * len(values_cat1[::self.divisor, dim_1]) 158 | else: 159 | x = values_cat2[::self.divisor, dim_1] 160 | y = values_cat2[::self.divisor, dim_2] 161 | z = values_cat2[::self.divisor, dim_3] 162 | 163 | 164 | trace_gt = go.Scatter3d(x=x, y=y, z=z, 165 | mode='markers', 166 | marker=dict(size=2, 167 | colorscale='Jet', 168 | # color=color_scale, 169 | opacity=0.3, 170 | ), 171 | name=name) 172 | 173 | data = [trace_gt] 174 | 175 | layout = dict(title='tensor values for %s' % (name), 176 | width=700, height=700, 177 | showlegend=False, 178 | scene=dict( 179 | xaxis=dict( 180 | nticks=4, range=[-1, 1], ), 181 | yaxis=dict( 182 | nticks=4, range=[-1, 1], ), 183 | zaxis=dict( 184 | nticks=4, range=[-1, 1], ), ) 185 | ) 186 | 187 | fig = go.Figure(data=data, layout=layout) 188 | file_name = os.path.join(self.plots_path, name + "_cat2_ " + ".html") 189 | py.plot(fig, filename=file_name, auto_open=auto_open) 190 | return content 191 | else: 192 | return '' -------------------------------------------------------------------------------- /transformers/tf_reader.py: -------------------------------------------------------------------------------- 1 | from transformers import Transformer 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | class TFReader(Transformer): 6 | def __init__(self, content_name ='grid', input_filename="train.tfrecords", mode= 'normal'): 7 | self._input_filename = input_filename 8 | self.content_name = content_name 9 | self.mode = mode 10 | 11 | def write(self, point, gt_label): 12 | feature = { 13 | 'vector/point': tf.train.Feature(float_list=tf.train.FloatList(value=point)), 14 | 'vector/ground_truth': tf.train.Feature(float_list=tf.train.FloatList(value= [gt_label])) 15 | } 16 | 17 | example = tf.train.Example(features=tf.train.Features(feature=feature)) 18 | self._writer.write(example.SerializeToString()) 19 | 20 | def transform(self, content=None): 21 | """Read tfrecords database 22 | """ 23 | 24 | def get_sample(dataset): 25 | """Extract ground truth and point value. """ 26 | 27 | items = tf.train.Example.FromString(dataset) 28 | point_as_list = items.features.feature.get('vector/point').float_list.value 29 | samples = [point_as_list.pop() for i in range(len(point_as_list))] 30 | return samples 31 | 32 | def get_label(dataset): 33 | 34 | items = tf.train.Example.FromString(dataset) 35 | labels = items.features.feature.get('vector/ground_truth').float_list.value 36 | 37 | return labels 38 | 39 | iter = tf.python_io.tf_record_iterator(self._input_filename) 40 | samples = [get_sample(item) for item in iter] 41 | 42 | iter = tf.python_io.tf_record_iterator(self._input_filename) 43 | labels = [get_label(item) for item in iter] 44 | 45 | if self.mode == 'test': 46 | 47 | dic = {self.content_name: {'samples' : np.array(samples), 48 | 'labels' : labels, 49 | 'label_names' : ['cat'] * len(labels)}} 50 | else: 51 | dic = {self.content_name: {'samples' : samples, 52 | 'labels' : labels, 53 | 'label_names' : ['cat'] * len(labels)}} 54 | 55 | if content != None: 56 | content.update(dic) 57 | else: 58 | content = dic 59 | 60 | return content 61 | 62 | -------------------------------------------------------------------------------- /transformers/tf_saver.py: -------------------------------------------------------------------------------- 1 | from transformers import Transformer 2 | import tensorflow as tf 3 | import os 4 | 5 | class TFSaver(Transformer): 6 | def __init__(self, output_filename="train.tfrecords"): 7 | if not os.path.exists(os.path.dirname(output_filename)): 8 | os.makedirs(os.path.dirname(output_filename)) 9 | 10 | self._writer = tf.python_io.TFRecordWriter(output_filename) 11 | self._output_filename = output_filename 12 | 13 | 14 | def write(self, point, gt_label): 15 | feature = { 16 | 'vector/point': tf.train.Feature(float_list=tf.train.FloatList(value=point)), 17 | 'vector/ground_truth': tf.train.Feature(float_list=tf.train.FloatList(value= [gt_label])) 18 | } 19 | 20 | example = tf.train.Example(features=tf.train.Features(feature=feature)) 21 | self._writer.write(example.SerializeToString()) 22 | 23 | def transform(self, content=None): 24 | ll = len(content[0]) 25 | for i in range(ll): 26 | self.write(content[0][i], content[1][i]) 27 | self._writer.close() 28 | 29 | 30 | -------------------------------------------------------------------------------- /transformers/visualizer_2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from transformers import Transformer 3 | import plotly.offline as py 4 | import plotly.graph_objs as go 5 | import os 6 | class Visualizer2D(Transformer): 7 | 8 | def __init__(self, output_filename="2d-scatter-grid-colorscale.html", mode='2D', enable=True): 9 | self._output_filename = output_filename 10 | self.mode = mode 11 | self.enable = enable 12 | 13 | if not os.path.exists(os.path.dirname(output_filename)): 14 | os.makedirs(os.path.dirname(output_filename)) 15 | 16 | def predict(self, content=None): 17 | """ Does not support predict instead produces visualization of the input contents 18 | """ 19 | return self.transform(content) 20 | 21 | def transform(self, content=None): 22 | """Read tfrecords database""" 23 | if type(content) == type({}): 24 | if self.enable: 25 | if self.mode == '2D': 26 | trace_training = go.Scatter() 27 | trace_test = go.Scatter() 28 | trace_valid = go.Scatter() 29 | trace_loops = go.Scatter() 30 | else: 31 | trace_training = go.Scatter3d() 32 | trace_test = go.Scatter3d() 33 | trace_valid = go.Scatter3d() 34 | trace_loops = go.Scatter3d() 35 | 36 | if 'training_dataset' in content.keys(): 37 | 38 | labels = np.array(content['training_dataset']['labels']).astype(int) 39 | labels.shape = (labels.shape[0], ) 40 | samples = np.array(content['training_dataset']['samples']) 41 | if self.mode == '2D': 42 | trace_training = go.Scatter(x=samples[:, 0], y=samples[:, 1], 43 | mode='markers', 44 | marker=dict(size=5, 45 | # marker=dict(size=1, 46 | color=labels, # set color to an array/list of desired values 47 | colorscale='Jet', # choose a color scale 48 | opacity=0.9, 49 | #line=dict(color='rgb(231, 99, 250)', width=0.5) 50 | ), 51 | name='Dataset') 52 | else: 53 | trace_training = go.Scatter3d(x=samples[:, 0], y=samples[:, 1], z=samples[:, 2], 54 | mode='markers', 55 | marker=dict(size=5, 56 | # marker=dict(size=1, 57 | color=labels, # set color to an array/list of desired values 58 | colorscale='Jet', # choose a color scale 59 | opacity=0, 60 | #line=dict(color='rgb(231, 99, 250)', width=0.5) 61 | ), 62 | name='Dataset') 63 | if 'nn_predictions_on_test_dataset' in content.keys(): 64 | 65 | # first tensor should be ArgMax with network's predictions. 66 | labels = [item[0] for item in content['nn_predictions_on_test_dataset']['labels']] 67 | labels = np.array(labels).astype(int) 68 | labels.shape = (labels.shape[0], ) 69 | 70 | samples = np.array(content['nn_predictions_on_test_dataset']['samples']) 71 | 72 | if self.mode == '2D': 73 | trace_test = go.Scatter(x=samples[:, 0], y=samples[:, 1], 74 | mode='markers', 75 | marker=dict(size=11, 76 | # marker=dict(size=1, 77 | color=labels, # set color to an array/list of desired values 78 | colorscale='Viridis', # choose a color scale 79 | opacity=0.2, 80 | line=dict(width=0), 81 | symbol='square'), 82 | name='samples') 83 | else: 84 | trace_test = go.Scatter3d(x=samples[:, 0], y=samples[:, 1], z=samples[:, 2], 85 | mode='markers', 86 | marker=dict(size=11, 87 | # marker=dict(size=1, 88 | color=labels, # set color to an array/list of desired values 89 | colorscale='Viridis', # choose a color scale 90 | opacity=0.01, 91 | line=dict(width=0), 92 | symbol='square'), 93 | name='samples') 94 | 95 | if 'nn_predictions_on_validation_dataset' in content.keys(): 96 | 97 | # first tensor should be ArgMax with network's predictions. 98 | labels = [item[0] for item in content['nn_predictions_on_validation_dataset']['labels']] 99 | labels = np.array(labels).astype(int) 100 | labels.shape = (labels.shape[0], ) 101 | 102 | samples = np.array(content['nn_predictions_on_validation_dataset']['samples']) 103 | 104 | if self.mode == '2D': 105 | 106 | trace_valid = go.Scatter(x=samples[:, 0], y=samples[:, 1], 107 | mode='markers', 108 | marker=dict(size=11, 109 | # marker=dict(size=1, 110 | color=labels, # set color to an array/list of desired values 111 | colorscale='Viridis', # choose a color scale 112 | opacity=0.2, 113 | line=dict(width=0), 114 | symbol='square'), 115 | name='samples') 116 | else: 117 | trace_valid = go.Scatter3d(x=samples[:, 0], y=samples[:, 1], z=samples[:, 2], 118 | mode='markers', 119 | marker=dict(size=11, 120 | # marker=dict(size=1, 121 | color=labels, # set color to an array/list of desired values 122 | colorscale='Viridis', # choose a color scale 123 | opacity=0.01, 124 | line=dict(width=0), 125 | symbol='square'), 126 | name='samples') 127 | 128 | data = [trace_training, trace_test, trace_valid] 129 | layout = dict(title='Data Visualization', 130 | width=900, height=900, autosize=False, showlegend= False) 131 | 132 | fig = go.Figure(data=data, layout=layout) 133 | py.plot(fig, filename=self._output_filename, auto_open=False) 134 | 135 | return content 136 | else: 137 | return '' 138 | -------------------------------------------------------------------------------- /transformers/visualizer_2d_loops.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from transformers import Transformer 3 | import plotly.offline as py 4 | import plotly.graph_objs as go 5 | import os 6 | class Visualizer2DLoops(Transformer): 7 | 8 | def __init__(self, output_filename="2d-scatter-grid-colorscale.html"): 9 | self._output_filename = output_filename 10 | 11 | if not os.path.exists(os.path.dirname(output_filename)): 12 | os.makedirs(os.path.dirname(output_filename)) 13 | 14 | def predict(self, content=None): 15 | """ Does not support predict instead produces visualization of the input contents 16 | """ 17 | return self.transform(content) 18 | 19 | def transform(self, content=None): 20 | """Read tfrecords database""" 21 | 22 | trace_training = go.Scatter() 23 | trace_test = go.Scatter() 24 | trace_valid = go.Scatter() 25 | trace_loops = go.Scatter() 26 | 27 | for k, betti_dict in enumerate(content['betti_list']): 28 | 29 | loops = betti_dict['trace'] 30 | betti_name = betti_dict['name'] 31 | 32 | if 'nn_predictions_on_test_dataset' in content.keys(): 33 | 34 | # first tensor should be ArgMax with network's predictions. 35 | labels = [item[0] for item in content['nn_predictions_on_test_dataset']['labels']] 36 | labels = np.array(labels).astype(int) 37 | labels.shape = (labels.shape[0], ) 38 | 39 | samples = np.array(content['nn_predictions_on_test_dataset']['samples']) 40 | 41 | trace_test = go.Scatter(x=samples[:, 0], y=samples[:, 1], 42 | mode='markers', 43 | marker=dict(size=11, 44 | color=labels, # set color to an array/list of desired values 45 | colorscale='Viridis', # choose a color scale 46 | opacity=0.2, 47 | line=dict(width=0), 48 | symbol='square'), 49 | name='samples') 50 | lines = [] 51 | 52 | if 'nn_predictions_on_test_dataset' in content.keys(): 53 | samples = np.array(content['nn_predictions_on_test_dataset']['samples']) 54 | else: 55 | samples = np.array(content['test_dataset']['samples']) 56 | 57 | colors = ['rgb(67, 104, 31)', 'rgb(255, 180, 3)', 'rgb(129, 255, 3)', 'rgb(3, 214, 255)', 58 | 'rgb(197, 3, 255)', 'rgb(0,0, 0)', 'rgb(173,218, 69)'] 59 | 60 | for k, key in enumerate(loops.keys()): 61 | 62 | origins, targets = loops[key] 63 | lines.extend([{ 64 | 'type': 'line', 65 | 'x0': samples[origins[i]][0], 66 | 'y0': samples[origins[i]][1], 67 | 'x1': samples[targets[i]][0], 68 | 'y1': samples[targets[i]][1], 69 | 'line': { 70 | 'color': colors[k % 7], 71 | 'width': 7, 72 | # 'dash': 'dot', 73 | }} for i in range(len(origins))]) 74 | 75 | data = [trace_training, trace_test] 76 | layout = dict(title='Testing of ' + betti_name, 77 | width=700, height=700, autosize=False, showlegend= False, 78 | shapes=lines) 79 | 80 | fig = go.Figure(data=data, layout=layout) 81 | py.plot(fig, filename=self._output_filename + betti_name + '.html', auto_open=False) -------------------------------------------------------------------------------- /videos/folding1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/videos/folding1.mp4 -------------------------------------------------------------------------------- /videos/folding2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/videos/folding2.mp4 -------------------------------------------------------------------------------- /videos/folding3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/videos/folding3.mp4 -------------------------------------------------------------------------------- /videos/folding4.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/videos/folding4.mp4 -------------------------------------------------------------------------------- /videos/output_6_by_3_classes.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/videos/output_6_by_3_classes.avi -------------------------------------------------------------------------------- /videos/output_6_by_3_grid.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topnn/topnn_framework/8779aea55ac89abcf15fbfc8214f1a35b12c074b/videos/output_6_by_3_grid.avi --------------------------------------------------------------------------------