├── src ├── modules │ ├── __init__.py │ ├── user_input.py │ ├── sfdp_layout.py │ ├── animation.py │ ├── distance_matrix.py │ ├── tsn_config.py │ ├── graph_io.py │ ├── layout_io.py │ └── thesne.py └── tsnetwork.py ├── LICENSE ├── .gitignore └── README.md /src/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/modules/user_input.py: -------------------------------------------------------------------------------- 1 | def confirm(prompt='Yes or no?'): 2 | while True: 3 | response = input(prompt + ' ').lower() 4 | if response in ['yes', 'no', 'y', 'n', 'yep', 'yeah', 'nope']: 5 | break 6 | print('Please type yes or no.') 7 | if response in ['yes', 'y', 'yep', 'yeah']: 8 | return True 9 | return False 10 | 11 | 12 | def integer(prompt='Enter a number:', positive=None, zero=True): 13 | while True: 14 | response = input(prompt + ' ') 15 | if not all(c.isdigit() for c in response) or len(response) == 0: 16 | print('Please enter numeric characters.') 17 | continue 18 | response = int(response) 19 | if positive is not None and positive and response < 0: 20 | print('Input must be positive.') 21 | elif positive is not None and not positive and response > 0: 22 | print('Input must be negative.') 23 | elif not zero and response == 0: 24 | print('Input must be nonzero.') 25 | else: 26 | break 27 | return response 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Han Kruiger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/modules/sfdp_layout.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import graph_tool.all as gt 3 | 4 | import modules.layout_io as layout_io 5 | import modules.user_input as usr_input 6 | 7 | 8 | # Perform an SFDP placement on the graph, and save a drawing of the layout. 9 | def sfdp_placement(g, output_folder, color_property_map=None, ask_for_acceptance=True, opacity=1): 10 | pos_sfdp = None 11 | while True: 12 | print('[tsnetwork] Performing SFDP') 13 | pos = gt.sfdp_layout(g, multilevel=True, C=1.2, p=1) 14 | 15 | pos_sfdp = g.new_vertex_property('vector') 16 | for v in g.vertices(): 17 | pos_sfdp[v] = (float(pos[v][0]), float(pos[v][1])) 18 | 19 | print('[tsnetwork] Saving SFDP layout...') 20 | layout_io.save_drawing(output_folder, g, pos=pos_sfdp, description='sfdp', color_property_map=color_property_map, edge_colors="rgb", draw_vertices=False, opacity=opacity) 21 | 22 | if ask_for_acceptance: 23 | if usr_input.confirm('[tsnetwork] Is the generated sfdp layout ({0}) acceptable? [y/n]: '.format(output_folder + '/sfdp.pdf')): 24 | break 25 | else: 26 | break 27 | 28 | # Copy SFDP vertex coordinates to Y_init 29 | Y_init = np.zeros((g.num_vertices(), 2)) 30 | for v in g.vertices(): 31 | Y_init[int(v), :] = pos_sfdp[v] 32 | 33 | layout_io.save_layout(output_folder + '/sfdp.pickle', g, None, Y_init) 34 | 35 | return Y_init, pos_sfdp 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | graphs 2 | *output* 3 | 4 | # Created by https://www.gitignore.io/api/python,linux 5 | 6 | ### Python ### 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *,cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # IPython Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | 98 | ### Linux ### 99 | *~ 100 | 101 | # temporary files which can be created if a process still has a handle open of a deleted file 102 | .fuse_hidden* 103 | 104 | # KDE directory preferences 105 | .directory 106 | 107 | # Linux trash folder which might appear on any partition or disk 108 | .Trash-* 109 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # If you plan to use/evaluate tsnetwork, please use the code on https://github.com/HanKruiger/tsNET which is (just a little bit) less experimental. 2 | This repository is still here because my [MSc thesis](http://fse.studenttheses.ub.rug.nl/id/eprint/14540) referred to it. 3 | 4 | # `tsnetwork` 5 | 6 | Graph layouts by t-distributed stochastic neighbour embedding. 7 | 8 | This repository contains the implementation of a graph layout algorithm that makes use of the [t-SNE](https://lvdmaaten.github.io/tsne/) dimensionality reduction technique. 9 | 10 | The exploration and evaluation of using this technique for graph layouts was done as my [MSc thesis](http://irs.ub.rug.nl/dbi/57cd44e1a5b49) project at Rijksuniversiteit Groningen, which I aim to finish in August 2016. 11 | 12 | A large part of an essential module in this implementation is a heavily adjusted version of Paulo Rauber's [thesne](https://github.com/paulorauber/thesne), which is an implementation of dynamic t-SNE. 13 | 14 | ## Dependencies 15 | 16 | This was developed and tested solely on ArchLinux. 17 | For this implementation to work, you will need: 18 | 19 | * `python3` 20 | * [`numpy`](http://www.numpy.org/) 21 | * [`graph-tool`](https://graph-tool.skewed.de/) 22 | * [`theano`](http://deeplearning.net/software/theano/) 23 | * [`graphviz`](http://www.graphviz.org/) 24 | * [`matplotlib`](http://matplotlib.org/) 25 | * [`scikit-learn`](http://scikit-learn.org/stable/) 26 | 27 | For rendering fancy animations (even more heavily untested, probably only works on my system) you need: 28 | 29 | * [`ffmpeg`](https://ffmpeg.org/) 30 | * [`imagemagick`](https://www.imagemagick.org/) 31 | 32 | ## Benchmark layout animations 33 | 34 | For a set of graphs that has been used as a benchmark, animations that show the state of the layout as a function of optimization time can be seen [over here](https://hankruiger.github.io/tsnetwork/animations). 35 | 36 | ## Warning 37 | 38 | Usage of this software is at your own risk. 39 | This utility writes and removes directories in a directory you specify, and (with me being __not__ a professional software developer) you should not trust using this if you're afraid to lose data. 40 | -------------------------------------------------------------------------------- /src/modules/animation.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import glob 3 | import shutil 4 | import os 5 | import modules.user_input as user_input 6 | 7 | 8 | class cd: 9 | """Context manager for changing the current working directory""" 10 | def __init__(self, new_path): 11 | self.new_path = os.path.expanduser(new_path) 12 | 13 | def __enter__(self): 14 | self.saved_path = os.getcwd() 15 | os.chdir(self.new_path) 16 | 17 | def __exit__(self, etype, value, traceback): 18 | os.chdir(self.saved_path) 19 | 20 | 21 | # Save animation of images from 'snaps_folder' as a .mp4 in the snaps_folder's 22 | # parent folder. 23 | def save_animation(snaps_folder, description): 24 | # chdir to snaps folder 25 | with cd(snaps_folder): 26 | # Create directory for reformatted images if it did not exists yet. 27 | if not os.path.exists('./animation'): 28 | os.makedirs('./animation') 29 | else: 30 | if user_input.confirm('[animation] ' + snaps_folder + '/animation exists. Delete contents?'): 31 | for file in os.listdir('./animation'): 32 | file_path = os.path.join('./animation/', file) 33 | try: 34 | if os.path.isfile(file_path): 35 | os.unlink(file_path) 36 | except Exception as e: 37 | print(e) 38 | 39 | # Copy files to new directory 40 | for file in glob.glob('./*.jpg'): 41 | shutil.copy(file, './animation') 42 | 43 | # chdir to snaps_folder/animation 44 | with cd('./animation'): 45 | # Write subprocesses' stdout to this file, to clean up logging. 46 | DEVNULL = open(os.devnull, 'w') 47 | 48 | # Reformat images with ImageMagick 49 | print('[animation] Reformatting images...') 50 | subprocess.call('mogrify -gravity center -resize 512x512 -extent 512x512 *.jpg', shell=True, stdout=DEVNULL) 51 | print('[animation] ... Done!') 52 | 53 | # Render video with ffmpeg 54 | print('[animation] Rendering video...') 55 | subprocess.call("ffmpeg -framerate 30 -pattern_type glob -i 'tsne_snap_*.jpg' -c:v libx264 -r 30 -pix_fmt yuv420p -hide_banner -loglevel panic {0}.mp4".format(description), shell=True, stdout=DEVNULL) 56 | print('[animation] ... Done!') 57 | try: 58 | # Copy video out of animation folder and remove the animation folder. 59 | shutil.copy('./animation/{0}.mp4'.format(description), '..') 60 | shutil.rmtree('./animation') 61 | except Exception as e: 62 | print(e) 63 | -------------------------------------------------------------------------------- /src/modules/distance_matrix.py: -------------------------------------------------------------------------------- 1 | import graph_tool.all as gt 2 | import numpy as np 3 | import itertools 4 | 5 | 6 | def get_modified_adjacency_matrix(g, k): 7 | # Get regular adjacency matrix 8 | adj = gt.adjacency(g) 9 | 10 | # Initialize the modified adjacency matrix 11 | X = np.zeros(adj.shape) 12 | 13 | # Loop over nonzero elements 14 | for i, j in zip(*adj.nonzero()): 15 | X[i, j] = 1 / adj[i, j] 16 | 17 | adj_max = adj.max() 18 | 19 | # Loop over zero elements 20 | for i, j in set(itertools.product(range(adj.shape[0]), range(adj.shape[1]))).difference(zip(*adj.nonzero())): 21 | X[i, j] = k * adj_max 22 | 23 | return X 24 | 25 | 26 | def get_shortest_path_distance_matrix(g, k=2): 27 | # Used to find which vertices are not connected. This has to be this weird, 28 | # since graph_tool uses maxint for the shortest path distance between 29 | # unconnected vertices. 30 | def get_unconnected_distance(): 31 | g_mock = gt.Graph() 32 | g_mock.add_vertex(2) 33 | shortest_distances_mock = gt.shortest_distance(g_mock) 34 | unconnected_dist = shortest_distances_mock[0][1] 35 | return unconnected_dist 36 | 37 | # Get the value (usually maxint) that graph_tool uses for distances between 38 | # unconnected vertices. 39 | unconnected_dist = get_unconnected_distance() 40 | 41 | # Get shortest distances for all pairs of vertices in a NumPy array. 42 | X = gt.shortest_distance(g).get_2d_array(range(g.num_vertices())) 43 | 44 | if len(X[X == unconnected_dist]) > 0: 45 | print('[distance_matrix] There were disconnected components!') 46 | 47 | # Get maximum shortest-path distance (ignoring maxint) 48 | X_max = X[X != unconnected_dist].max() 49 | 50 | # Set the unconnected distances to k times the maximum of the other 51 | # distances. 52 | X[X == unconnected_dist] = k * X_max 53 | 54 | return X 55 | 56 | 57 | # Return the distance matrix of g, with the specified metric. 58 | def get_distance_matrix(g, distance_metric, normalize=True, k=10.0): 59 | print('[distance_matrix] Computing distance matrix (metric: {0})'.format(distance_metric)) 60 | 61 | if distance_metric == 'shortest_path' or distance_metric == 'spdm': 62 | X = get_shortest_path_distance_matrix(g) 63 | elif distance_metric == 'modified_adjacency' or distance_metric == 'mam': 64 | X = get_modified_adjacency_matrix(g, k) 65 | else: 66 | raise Exception('Unknown distance metric.') 67 | 68 | # Just to make sure, symmetrize the matrix. 69 | X = (X + X.T) / 2 70 | 71 | # Force diagonal to zero 72 | X[range(X.shape[0]), range(X.shape[1])] = 0 73 | 74 | # Normalize matrix s.t. sum is 1. 75 | if normalize: 76 | X /= np.max(X) 77 | 78 | print('[distance_matrix] Done!') 79 | 80 | return X 81 | -------------------------------------------------------------------------------- /src/modules/tsn_config.py: -------------------------------------------------------------------------------- 1 | # Class that represents the configuration of a tsnetwork placement. 2 | class TsnConfig: 3 | 4 | def __init__(self, perplexity, n_epochs, initial_lr, final_lr, lr_switch, 5 | initial_momentum, final_momentum, momentum_switch, 6 | initial_l_kl, final_l_kl, l_kl_switch, 7 | initial_l_e, final_l_e, l_e_switch, 8 | initial_l_c, final_l_c, l_c_switch, 9 | initial_l_r, final_l_r, l_r_switch, 10 | r_eps, k, pre_sfdp, rmv_edge_frac, rnd_seed, distance_matrix): 11 | self.perplexity = perplexity 12 | self.n_epochs = n_epochs 13 | 14 | self.initial_lr = initial_lr 15 | self.final_lr = final_lr 16 | self.lr_switch = lr_switch 17 | 18 | self.initial_momentum = initial_momentum 19 | self.final_momentum = final_momentum 20 | self.momentum_switch = momentum_switch 21 | 22 | self.initial_l_kl = initial_l_kl 23 | self.final_l_kl = final_l_kl 24 | self.l_kl_switch = l_kl_switch 25 | 26 | self.initial_l_e = initial_l_e 27 | self.final_l_e = final_l_e 28 | self.l_e_switch = l_e_switch 29 | 30 | self.initial_l_c = initial_l_c 31 | self.final_l_c = final_l_c 32 | self.l_c_switch = l_c_switch 33 | 34 | self.initial_l_r = initial_l_r 35 | self.final_l_r = final_l_r 36 | self.l_r_switch = l_r_switch 37 | 38 | self.r_eps = r_eps 39 | 40 | self.k = k 41 | self.pre_sfdp = pre_sfdp 42 | self.rmv_edge_frac = rmv_edge_frac 43 | self.rnd_seed = rnd_seed 44 | self.distance_matrix = distance_matrix 45 | 46 | # Return a description string of the configuration. 47 | # Parts are commented out to prevent too long filenames. 48 | def get_description(self): 49 | description = 'e_{:0>5d}'.format(self.n_epochs) 50 | description += '_p_{:0>.1f}'.format(self.perplexity) 51 | description += '_lrni_{:.1f}'.format(self.initial_lr) 52 | # description += '_lrnf_{:.1f}'.format(self.final_lr) 53 | # description += '_mi_{:.1f}'.format(self.initial_momentum) 54 | # description += '_mf_{:.1f}'.format(self.final_momentum) 55 | # description += '_lkli_{:.2f}'.format(self.initial_l_kl) 56 | # description += '_lklf_{:.2f}'.format(self.final_l_kl) 57 | description += '_lei_{:.2f}'.format(self.initial_l_e) 58 | # description += '_lef_{:.2f}'.format(self.final_l_e) 59 | description += '_lci_{:.2f}'.format(self.initial_l_c) 60 | # description += '_lcf_{:.2f}'.format(self.final_l_c) 61 | # description += '_lri_{:.2f}'.format(self.initial_l_r) 62 | description += '_lrf_{:.2f}'.format(self.final_l_r) 63 | description += '_reps_{:.2f}'.format(self.r_eps) 64 | description += '_k_{:0>.2f}'.format(self.k) 65 | # description += '_rer_{:.2f}'.format(self.rmv_edge_frac) 66 | description += '_rs{:d}'.format(self.rnd_seed) 67 | description += '_' + self.distance_matrix 68 | 69 | if self.pre_sfdp: 70 | description = 'pre_sfdp_' + description 71 | 72 | # S.t. LaTeX doesn't complain about the extension. 73 | description = description.replace('.', 'p') 74 | return description 75 | -------------------------------------------------------------------------------- /src/modules/graph_io.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from scipy.io import mmread 3 | import graph_tool.all as gt 4 | import csv 5 | 6 | 7 | # Read a Matrix Market file, and construct an undirected weighted graph from it. 8 | def load_mm(mm_file): 9 | adj = mmread(mm_file) 10 | 11 | assert adj.shape[0] == adj.shape[1] 12 | 13 | # Initialize graph 14 | g = gt.Graph(directed=False) 15 | 16 | edge_weight = g.edge_properties["weight"] = g.new_edge_property("double") 17 | 18 | # Create vertex for every row/column 19 | g.add_vertex(adj.shape[0]) 20 | 21 | print('[graph_io] Reading matrix market file with {0} explicit elements...'.format(len(adj.data))) 22 | 23 | # Loop over all explicit elements in the sparse matrix 24 | for iteration, (i, j, w) in enumerate(zip(adj.row, adj.col, adj.data)): 25 | # Skip self-edges. 26 | if i == j: 27 | continue 28 | 29 | # Add edge to the graph, if its 'symmetric partner' is not already there. 30 | # (Undirected graph, so g.edge(i, j) == g.edge(j, i)) 31 | if g.edge(i, j) is None: 32 | g.add_edge(i, j) 33 | 34 | edge_weight[i, j] = w 35 | 36 | # Print progress every 5% 37 | if iteration % (int(0.05 * len(adj.data))) == 0: 38 | perc = 100 * iteration / len(adj.data) 39 | print('[graph_io] {0:.1f}%'.format(perc), end='\r') 40 | print('\n[graph_io] Done!') 41 | return g 42 | 43 | 44 | # Read a csv file, and construct an undirected weighted graph from it. 45 | def load_csv(csv_file_name): 46 | g = gt.Graph(directed=False) 47 | 48 | num_lines = sum(1 for _ in open(csv_file_name)) 49 | 50 | # Property map for label that was used in the file (different from internal graph-tool index!) 51 | v_label = g.vertex_properties['label'] = g.new_vertex_property("string") 52 | # Dictionary from label to graph-tool index 53 | v_dict = {} 54 | 55 | print('[graph_io] Reading csv-file with ' + str(num_lines) + ' lines...') 56 | with open(csv_file_name) as csv_file: 57 | reader = csv.DictReader(csv_file, delimiter='\t') 58 | for iteration, row in enumerate(reader): 59 | from_node = row['FromNodeId'] 60 | to_node = row['ToNodeId'] 61 | 62 | if from_node not in v_dict.keys(): 63 | v = g.add_vertex() 64 | v_label[v] = from_node 65 | v_dict[from_node] = v 66 | if to_node not in v_dict.keys(): 67 | v = g.add_vertex() 68 | v_label[v] = to_node 69 | v_dict[to_node] = v 70 | 71 | i = v_dict[from_node] 72 | j = v_dict[to_node] 73 | 74 | # Skip self-edges. 75 | if i == j: 76 | continue 77 | 78 | # Add edge to the graph, if its 'symmetric partner' is not already there. 79 | # (Undirected graph, so g.edge(i, j) == g.edge(j, i)) 80 | if g.edge(i, j) is None: 81 | g.add_edge(i, j) 82 | 83 | # Print progress every 5% 84 | if iteration % (int(0.05 * num_lines - 1)) == 0: 85 | perc = 100 * iteration / num_lines - 1 86 | print('[graph_io] {0:.1f}%'.format(perc), end='\r') 87 | 88 | print('\n[graph_io] Done!') 89 | return g 90 | 91 | 92 | # Read a chaco file, and construct an undirected weighted graph from it. 93 | def load_chaco(file_name): 94 | g = gt.Graph(directed=False) 95 | 96 | num_lines = sum(1 for _ in open(file_name)) 97 | 98 | print('[graph_io] Reading chaco-file with ' + str(num_lines) + ' lines...') 99 | with open(file_name, 'r') as f: 100 | meta = [int(num) for num in f.readline().split()] 101 | if len(meta) > 3: 102 | raise Exception('Chaco parser cannot read this file.') 103 | if len(meta) == 3: 104 | if meta[2] != 0: 105 | raise Exception('Chaco parser cannot read this file.') 106 | 107 | # Add this many vertices. 108 | g.add_vertex(meta[0]) 109 | 110 | for i, line in enumerate(f): 111 | # Minus 1 to convert to 0-indexing, j != i to skip self-edges 112 | to_nodes = [int(j) - 1 for j in line.split() if int(j) != i] 113 | 114 | # Add edge to the graph, if its 'symmetric partner' is not already there. 115 | # (Undirected graph, so g.edge(i, j) == g.edge(j, i)) 116 | for j in to_nodes: 117 | if g.edge(i, j) is None: 118 | g.add_edge(i, j) 119 | 120 | # Print progress every 5% 121 | if i % (int(0.05 * num_lines - 1)) == 0: 122 | perc = 100 * i / num_lines - 1 123 | print('[graph_io] {0:.1f}%'.format(perc), end='\r') 124 | 125 | assert g.num_vertices() == meta[0] 126 | assert g.num_edges() == meta[1] 127 | 128 | print('\n[graph_io] Done!') 129 | return g 130 | 131 | def load_vna(in_file): 132 | with open(in_file) as f: 133 | all_lines = f.read().splitlines() 134 | 135 | it = iter(all_lines) 136 | 137 | # Ignore preamble 138 | line = next(it) 139 | while not line.startswith('*Node properties'): 140 | line = next(it) 141 | 142 | node_properties = next(it).split(' ') 143 | assert('ID' in node_properties) 144 | 145 | vertices = dict() 146 | line = next(it) 147 | gt_idx = 0 # Index for gt 148 | while not line.startswith('*Tie data'): 149 | entries = line.split(' ') 150 | vna_id = entries[0] 151 | vertex = dict() 152 | for i, prop in enumerate(node_properties): 153 | vertex[prop] = entries[i] 154 | vertex['ID'] = gt_idx # Replace VNA ID by numerical gt index 155 | vertices[vna_id] = vertex # Retain VNA ID as key of the vertices dict 156 | 157 | gt_idx += 1 158 | line = next(it) 159 | 160 | edge_properties = next(it).split(' ') 161 | assert(edge_properties[0] == 'from' and edge_properties[1] == 'to') 162 | 163 | edges = [] 164 | try: 165 | while True: 166 | line = next(it) 167 | entries = line.split(' ') 168 | v_i = vertices[entries[0]]['ID'] 169 | v_j = vertices[entries[1]]['ID'] 170 | edges.append((v_i, v_j)) 171 | except StopIteration: 172 | pass 173 | 174 | g = gt.Graph(directed=False) 175 | g.add_vertex(len(vertices)) 176 | for v_i, v_j in edges: 177 | g.add_edge(v_i, v_j) 178 | 179 | gt.remove_parallel_edges(g) 180 | 181 | return g 182 | return None 183 | 184 | 185 | 186 | def load_graph(file): 187 | if os.path.splitext(file)[1] == '.mtx': 188 | g = load_mm(file) 189 | elif os.path.splitext(file)[1] == '.csv': 190 | g = load_csv(file) 191 | elif os.path.splitext(file)[1] == '.graph': 192 | g = load_chaco(file) 193 | elif os.path.splitext(file)[1] == '.vna': 194 | g = load_vna(file) 195 | else: 196 | # Give the file to graph_tool and hope for the best. 197 | g = gt.load_graph(file) 198 | 199 | g.set_directed(False) 200 | 201 | gt.remove_parallel_edges(g) 202 | 203 | return g 204 | -------------------------------------------------------------------------------- /src/modules/layout_io.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import graph_tool.all as gt 4 | import matplotlib.cm 5 | 6 | 7 | # Save a drawing of a layout 8 | def save_drawing(output_folder, g, pos, description, color_property_map=None, color_array=None, formats=None, verbose=True, opacity=0.2, edge_colors=None, draw_vertices=True): 9 | if formats is None: 10 | formats = ['jpg', 'pdf'] 11 | 12 | # GraphViz needs the positions to be between 0 and 1. So normalize first. 13 | pos_normalized = g.new_vertex_property('vector') 14 | vertices = list(g.vertices()) 15 | if type(pos) is not np.ndarray: 16 | Y = pos.get_2d_array(range(2)) 17 | else: 18 | Y = pos 19 | # Translate s.t. smallest values for both x and y are 0. 20 | Y[0, :] += -Y[0, :].min() 21 | Y[1, :] += -Y[1, :].min() 22 | # Scale s.t. max(max(x, y)) = 1 (while keeping the same aspect ratio!) 23 | scaling = 1 / (np.absolute(Y).max()) 24 | Y *= scaling 25 | pos_normalized.set_2d_array(Y) 26 | 27 | # Output size in cm (matches UF images) 28 | out_size = [14.3] * 2 29 | 30 | # Crop for aspect ratio 31 | if max(Y[0, :]) < max(Y[1, :]): 32 | out_size[0] *= max(Y[0, :]) 33 | else: 34 | out_size[1] *= max(Y[1, :]) 35 | 36 | # Use the supplied color array for the vertex colors, if given. 37 | if color_array is not None: 38 | color_property_map = g.new_vertex_property('double') 39 | assert len(color_array) == g.num_vertices() 40 | for v in vertices: 41 | color_property_map[int(v)] = color_array[int(v)] 42 | 43 | if verbose: 44 | print('[layout_io] Saving layout drawing... ({0})'.format(description)) 45 | 46 | 47 | if edge_colors == "rgb": 48 | edge_color = g.new_edge_property('string') 49 | edge_length = g.new_edge_property('float') 50 | edges = list(g.edges()) 51 | for e in edges: 52 | v1 = e.source() 53 | v2 = e.target() 54 | length = ((Y[:, int(v1)] - Y[:, int(v2)]) ** 2).sum() ** 0.5 55 | edge_length[e] = length 56 | lengths = edge_length.get_array() 57 | for e in edges: 58 | # Colour coding the edges based on edge length 59 | x = (edge_length[e] - np.min(lengths)) / (np.max(lengths) - np.min(lengths)) 60 | red = min(max(0, 1 - 2 * x), 1) 61 | green = max(0, 1 - abs(2 * x - 1)) 62 | blue = min(max(0, -1 + 2 * x), 1) 63 | edge_color[e] = "#" 64 | edge_color[e] += "{0:0{1}x}".format(int(red * 255), 2) 65 | edge_color[e] += "{0:0{1}x}".format(int(green * 255), 2) 66 | edge_color[e] += "{0:0{1}x}".format(int(blue * 255), 2) 67 | edge_color[e] += "{0:0{1}x}".format(int(opacity * 255), 2) 68 | else: 69 | edge_color = "#000000{0:0{1}x}".format(int(opacity * 255), 2) 70 | 71 | for extension in formats: 72 | # Use the graphviz interface that graph_tool supplies to save drawings. 73 | if not draw_vertices: 74 | # For this to work correctly, the gt.graphviz_draw implementation needs some tweaking: 75 | # * Edge attribute headclip: set to "false" 76 | # * Edge attribute tailclip: set to "false" 77 | # * Node attribute shape: set to "none" 78 | gt.graphviz_draw(g, fork=True, pos=pos_normalized, pin=True, penwidth=1, ecolor=edge_color, vsize=0, vcolor='#00ff0000', output=output_folder + '/' + description + '.' + extension, size=tuple(out_size)) 79 | elif color_property_map is None: 80 | gt.graphviz_draw(g, fork=True, pos=pos_normalized, pin=True, penwidth=1, ecolor=edge_color, vsize=0.1, vcolor='#009900', output=output_folder + '/' + description + '.' + extension, size=tuple(out_size)) 81 | else: 82 | gt.graphviz_draw(g, fork=True, pos=pos_normalized, pin=True, penwidth=1, ecolor=edge_color, vsize=0.1, vcmap=matplotlib.cm.hot, vcolor=color_property_map, output=output_folder + '/' + description + '.' + extension, size=tuple(out_size)) 83 | 84 | 85 | # Save a pickle file with the serialized graph, distance matrix, and layout. 86 | def save_layout(out_file, g, X, Y): 87 | with open(out_file, 'wb') as f: 88 | pickle.dump(g, f, pickle.HIGHEST_PROTOCOL) 89 | pickle.dump(X, f, pickle.HIGHEST_PROTOCOL) 90 | pickle.dump(Y, f, pickle.HIGHEST_PROTOCOL) 91 | f.close() 92 | 93 | 94 | # Read a pickle file with the serialized graph, distance matrix, and layout. 95 | def load_layout(in_file): 96 | with open(in_file, 'rb') as f: 97 | g = pickle.load(f) 98 | X = pickle.load(f) 99 | Y = pickle.load(f) 100 | f.close() 101 | return g, X, Y 102 | 103 | # Read from a file exported by Tulip. Coordinates are in the 'graphics' vertex property. 104 | def load_tulip_layout(in_file): 105 | g = gt.load_graph(in_file) 106 | g.set_directed(False) 107 | gt.remove_parallel_edges(g) 108 | graphics = g.vertex_properties['graphics'] 109 | Y = np.zeros((g.num_vertices(), 2)) 110 | for i in range(g.num_vertices()): 111 | Y[i, :] = [graphics[i]['x'], graphics[i]['y']] 112 | return g, Y 113 | 114 | def normalize_layout(Y): 115 | Y_cpy = Y.copy() 116 | # Translate s.t. smallest values for both x and y are 0. 117 | Y_cpy[:, 0] += -Y_cpy[:, 0].min() 118 | Y_cpy[:, 1] += -Y_cpy[:, 1].min() 119 | # Scale s.t. max(max(x, y)) = 1 (while keeping the same aspect ratio!) 120 | scaling = 1 / (np.absolute(Y_cpy).max()) 121 | Y_cpy *= scaling 122 | return Y_cpy 123 | 124 | def load_vna_layout(in_file): 125 | with open(in_file) as f: 126 | all_lines = f.read().splitlines() 127 | 128 | it = iter(all_lines) 129 | 130 | # Ignore preamble 131 | line = next(it) 132 | while not line.startswith('*Node properties'): 133 | line = next(it) 134 | 135 | node_properties = next(it).split(' ') 136 | assert('ID' in node_properties and 'x' in node_properties and 'y' in node_properties) 137 | 138 | vertices = dict() 139 | line = next(it) 140 | gt_idx = 0 # Index for gt 141 | while not line.startswith('*Tie data'): 142 | entries = line.split(' ') 143 | vna_id = entries[0] 144 | vertex = dict() 145 | for i, prop in enumerate(node_properties): 146 | vertex[prop] = entries[i] 147 | vertex['ID'] = gt_idx # Replace VNA ID by numerical gt index 148 | vertices[vna_id] = vertex # Retain VNA ID as key of the vertices dict 149 | 150 | gt_idx += 1 151 | line = next(it) 152 | 153 | edge_properties = next(it).split(' ') 154 | assert(edge_properties[0] == 'from' and edge_properties[1] == 'to') 155 | 156 | edges = [] 157 | try: 158 | while True: 159 | line = next(it) 160 | entries = line.split(' ') 161 | v_i = vertices[entries[0]]['ID'] 162 | v_j = vertices[entries[1]]['ID'] 163 | edges.append((v_i, v_j)) 164 | except StopIteration: 165 | pass 166 | 167 | g = gt.Graph(directed=False) 168 | g.add_vertex(len(vertices)) 169 | for v_i, v_j in edges: 170 | g.add_edge(v_i, v_j) 171 | 172 | gt.remove_parallel_edges(g) 173 | 174 | Y = np.zeros((g.num_vertices(), 2)) 175 | for v in vertices.keys(): 176 | Y[vertices[v]['ID'], 0] = float(vertices[v]['x']) 177 | Y[vertices[v]['ID'], 1] = float(vertices[v]['y']) 178 | pos = g.new_vertex_property('vector') 179 | pos.set_2d_array(Y.T) 180 | 181 | return g, Y 182 | return None 183 | 184 | def save_vna_layout(out_file, g, Y): 185 | with open(out_file, 'w') as f: 186 | f.write('*Node properties\n') 187 | f.write('ID x y\n') 188 | for v in g.vertices(): 189 | x = Y[int(v), 0] 190 | y = Y[int(v), 1] 191 | f.write('{0} {1} {2}\n'.format(int(v), x, y)) 192 | f.write('*Tie data\n') 193 | f.write('from to\n') 194 | for v1, v2 in g.edges(): 195 | f.write('{0} {1}\n'.format(int(v1), int(v2))) 196 | f.close() 197 | 198 | 199 | 200 | # Save a text file with the (edge-based) layout. 201 | def save_layout_txt(out_file, g, Y): 202 | edges = list(g.edges()) 203 | vertices = list(g.vertices()) 204 | with open(out_file, 'w') as f: 205 | for i, e in enumerate(edges): 206 | v1 = int(e.source()) 207 | v2 = int(e.target()) 208 | f.write('{0}: {1} {2} {3} {4}\n'.format(i, Y[v1, 0], Y[v1, 1], Y[v2, 0], Y[v2, 1])) 209 | f.close() 210 | 211 | def load_ply_layout(file): 212 | g = gt.Graph(directed=False) 213 | 214 | with open(file) as f: 215 | all_lines = f.read().splitlines() 216 | it = iter(all_lines) 217 | 218 | line = next(it) 219 | assert(line == 'ply') 220 | 221 | line = next(it) 222 | assert(line.startswith('format ascii')) 223 | 224 | line = next(it) 225 | while not line.startswith('element'): 226 | line = next(it) 227 | 228 | words = line.split(' ') 229 | assert(words[0] == 'element') 230 | assert(words[1] == 'vertex') 231 | assert(words[2].isdigit()) 232 | n_vertices = int(words[2]) 233 | g.add_vertex(n_vertices) 234 | assert(g.num_vertices() == n_vertices) 235 | 236 | line = next(it) 237 | v_props = OrderedDict() 238 | while line.startswith('property'): 239 | words = line.split(' ') 240 | the_type = words[1] 241 | if the_type == 'list': 242 | name = words[4] 243 | v_props[name] = dict() 244 | count_type = words[2] 245 | entry_type = words[3] 246 | v_props[name]['count_type'] = count_type 247 | v_props[name]['entry_type'] = entry_type 248 | else: 249 | name = words[2] 250 | v_props[name] = dict() 251 | v_props[name]['type'] = the_type 252 | line = next(it) 253 | print(v_props) 254 | 255 | vps = dict() 256 | for i, v_prop in enumerate(v_props): 257 | name = list(v_props.keys())[i] 258 | the_type = v_props[name]['type'] 259 | if the_type == 'float': 260 | vp = g.new_vp(the_type) 261 | vps[name] = vp 262 | else: 263 | raise NotImplementedError() 264 | 265 | print(vps) 266 | assert('x' in vps.keys()) 267 | assert('y' in vps.keys()) 268 | assert('z' in vps.keys()) 269 | 270 | # Scan to next element 271 | while not line.startswith('element'): 272 | line = next(it) 273 | 274 | words = line.split(' ') 275 | assert(words[0] == 'element') 276 | assert(words[1] == 'face') 277 | assert(words[2].isdigit()) 278 | n_faces = int(words[2]) 279 | print(n_faces) 280 | 281 | line = next(it) 282 | f_props = OrderedDict() 283 | while line.startswith('property'): 284 | words = line.split(' ') 285 | the_type = words[1] 286 | if the_type == 'list': 287 | name = words[4] 288 | f_props[name] = dict() 289 | count_type = words[2] 290 | entry_type = words[3] 291 | f_props[name]['count_type'] = count_type 292 | f_props[name]['entry_type'] = entry_type 293 | else: 294 | name = words[2] 295 | f_props[name] = dict() 296 | f_props[name]['type'] = the_type 297 | line = next(it) 298 | print(f_props) 299 | 300 | while not line.startswith('end_header'): 301 | line = next(it) 302 | 303 | for i in range(n_vertices): 304 | line = next(it) 305 | words = line.split(' ') 306 | words = [word for word in words if word != ''] 307 | assert(len(words) == len(v_props.keys())) 308 | for j, word in enumerate(words): 309 | name = list(v_props.keys())[j] 310 | the_type = v_props[name]['type'] 311 | if the_type == 'float': 312 | vps[name][i] = float(word) 313 | else: 314 | raise NotImplementedError 315 | 316 | for _ in range (n_faces): 317 | line = next(it) 318 | words = line.split(' ') 319 | words = [word for word in words if word != ''] 320 | i = 0 321 | for name in f_props.keys(): 322 | the_type = f_props[name]['type'] 323 | if the_type == 'list': 324 | if f_props[name]['count_type'] == 'uchar': 325 | n_items = int(words[i]) 326 | else: 327 | raise NotImplementedError 328 | the_list = [int(word) for word in words[i + 1:i + 1 + n_items]] 329 | i += 1 + n_items 330 | 331 | if name == 'vertex_indices': 332 | for j, idx1 in enumerate(the_list): 333 | idx2 = the_list[(j + 1) % len(the_list)] 334 | g.add_edge(idx1, idx2) 335 | assert(i == len(words)) 336 | 337 | 338 | gt.remove_parallel_edges(g) 339 | 340 | largest_connected_component = gt.label_largest_component(g) 341 | unreferenced = sum([1 for i in largest_connected_component.a if i == 0]) 342 | if unreferenced > 0: 343 | g.set_vertex_filter(largest_connected_component) 344 | g.purge_vertices() 345 | print('Filtered {0} unreferenced vertices.'.format(unreferenced)) 346 | 347 | if 'x' in vps.keys() and 'y' in vps.keys(): 348 | if 'z' in vps.keys(): 349 | Y = np.zeros((n_vertices, 3)) 350 | for v in g.vertices(): 351 | print(type(v)) 352 | Y[v, 0] = vps['x'][v] 353 | Y[v, 1] = vps['y'][v] 354 | Y[v, 2] = vps['z'][v] 355 | else: 356 | Y = np.zeros((n_vertices, 2)) 357 | for v in g.vertices(): 358 | Y[v, 0] = vps['x'][v] 359 | Y[v, 1] = vps['y'][v] 360 | 361 | 362 | return g, Y -------------------------------------------------------------------------------- /src/modules/thesne.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2016 Paulo Eduardo Rauber 2 | 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to deal 5 | # in the Software without restriction, including without limitation the rights 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | # copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | # THE SOFTWARE. 20 | 21 | # ^ 22 | # / \ 23 | # | 24 | # | 25 | # 26 | # License included because this module is a heavily modified version based on 27 | # Paulo's implementation of dynamic t-SNE. 28 | # (https://github.com/paulorauber/thesne) 29 | 30 | 31 | import math 32 | import numpy as np 33 | import theano 34 | import theano.tensor as T 35 | from sklearn.utils import check_random_state 36 | from scipy.spatial.distance import pdist 37 | from modules.layout_io import save_drawing 38 | 39 | epsilon = 1e-16 40 | floath = np.float32 41 | 42 | 43 | class SigmaTooLowException(Exception): 44 | pass 45 | 46 | 47 | class NaNException(Exception): 48 | pass 49 | 50 | 51 | # Squared Euclidean distance between all pairs of row-vectors 52 | def sqeuclidean_var(X): 53 | N = X.shape[0] 54 | ss = (X ** 2).sum(axis=1) 55 | return ss.reshape((N, 1)) + ss.reshape((1, N)) - 2 * X.dot(X.T) 56 | 57 | 58 | # Euclidean distance between all pairs of row-vectors 59 | def euclidean_var(X): 60 | return T.maximum(sqeuclidean_var(X), epsilon) ** 0.5 61 | 62 | 63 | # Conditional probabilities of picking (ordered) pairs in high-dim space. 64 | def p_ij_conditional_var(X, sigma): 65 | N = X.shape[0] 66 | 67 | sqdistance = X**2 68 | 69 | esqdistance = T.exp(-sqdistance / ((2 * (sigma**2)).reshape((N, 1)))) 70 | esqdistance_zd = T.fill_diagonal(esqdistance, 0) 71 | 72 | row_sum = T.sum(esqdistance_zd, axis=1).reshape((N, 1)) 73 | 74 | return esqdistance_zd / row_sum # Possibly dangerous 75 | 76 | 77 | # Symmetrized probabilities of picking pairs in high-dim space. 78 | def p_ij_sym_var(p_ij_conditional): 79 | return (p_ij_conditional + p_ij_conditional.T) / (2 * p_ij_conditional.shape[0]) 80 | 81 | 82 | # Probabilities of picking pairs in low-dim space (using Student 83 | # t-distribution). 84 | def q_ij_student_t_var(Y): 85 | sqdistance = sqeuclidean_var(Y) 86 | one_over = T.fill_diagonal(1 / (sqdistance + 1), 0) 87 | return one_over / one_over.sum() 88 | 89 | 90 | # Probabilities of picking pairs in low-dim space (using Gaussian). 91 | def q_ij_gaussian_var(Y): 92 | sqdistance = sqeuclidean_var(Y) 93 | gauss = T.fill_diagonal(T.exp(-sqdistance), 0) 94 | return gauss / gauss.sum() 95 | 96 | 97 | # Per point cost function 98 | def cost_var(X, Y, sigma, Adj, l_kl, l_e, l_c, l_r, r_eps): 99 | N = X.shape[0] 100 | num_edges = 0.5 * T.sum(Adj) 101 | 102 | # Used to normalize s.t. the l_*'s sum up to one. 103 | l_sum = l_kl + l_e + l_c + l_r 104 | 105 | p_ij_conditional = p_ij_conditional_var(X, sigma) 106 | p_ij = p_ij_sym_var(p_ij_conditional) 107 | q_ij = q_ij_student_t_var(Y) 108 | 109 | p_ij_safe = T.maximum(p_ij, epsilon) 110 | q_ij_safe = T.maximum(q_ij, epsilon) 111 | 112 | # Kullback-Leibler term 113 | kl = T.sum(p_ij * T.log(p_ij_safe / q_ij_safe), axis=1) 114 | 115 | # Edge contraction term 116 | edge_contraction = (1 / (2 * num_edges)) * T.sum(Adj * sqeuclidean_var(Y), axis=1) 117 | 118 | # Compression term 119 | compression = (1 / (2 * N)) * T.sum(Y**2, axis=1) 120 | 121 | # Repulsion term 122 | # repulsion = (1 / (2 * N**2)) * T.sum(T.fill_diagonal(1 / (euclidean_var(Y) + r_eps), 0), axis=1) 123 | repulsion = -(1 / (2 * N**2)) * T.sum(T.fill_diagonal(T.log(euclidean_var(Y) + r_eps), 0), axis=1) 124 | 125 | cost = (l_kl / l_sum) * kl + (l_e / l_sum) * edge_contraction + (l_c / l_sum) * compression + (l_r / l_sum) * repulsion 126 | 127 | return cost 128 | 129 | 130 | # Binary search on sigma for a given perplexity 131 | def find_sigma(X_shared, sigma_shared, N, perplexity, sigma_iters, verbose=0): 132 | X = T.fmatrix('X') 133 | sigma = T.fvector('sigma') 134 | 135 | target = np.log(perplexity) 136 | 137 | P = T.maximum(p_ij_conditional_var(X, sigma), epsilon) 138 | 139 | entropy = -T.sum(P * T.log(P), axis=1) 140 | 141 | # Setting update for binary search interval 142 | sigmin_shared = theano.shared(np.full(N, np.sqrt(epsilon), dtype=floath)) 143 | sigmax_shared = theano.shared(np.full(N, np.inf, dtype=floath)) 144 | 145 | sigmin = T.fvector('sigmin') 146 | sigmax = T.fvector('sigmax') 147 | 148 | upmin = T.switch(T.lt(entropy, target), sigma, sigmin) 149 | upmax = T.switch(T.gt(entropy, target), sigma, sigmax) 150 | 151 | givens = {X: X_shared, sigma: sigma_shared, sigmin: sigmin_shared, 152 | sigmax: sigmax_shared} 153 | updates = [(sigmin_shared, upmin), (sigmax_shared, upmax)] 154 | 155 | update_intervals = theano.function([], entropy, givens=givens, updates=updates) 156 | 157 | # Setting update for sigma according to search interval 158 | upsigma = T.switch(T.isinf(sigmax), sigma * 2, (sigmin + sigmax) / 2.) 159 | 160 | givens = {sigma: sigma_shared, sigmin: sigmin_shared, 161 | sigmax: sigmax_shared} 162 | updates = [(sigma_shared, upsigma)] 163 | 164 | update_sigma = theano.function([], sigma, givens=givens, updates=updates) 165 | 166 | for i in range(sigma_iters): 167 | e = update_intervals() 168 | update_sigma() 169 | if verbose: 170 | print('[find_sigma] Iteration {0}: Perplexities in [{1:.4f}, {2:.4f}].'.format(i + 1, np.exp(e.min()), np.exp(e.max())), end='\r') 171 | if verbose: 172 | print('\n[find_sigma] Done! Perplexities in [{0:.4f}, {1:.4f}].'.format(np.exp(e.min()), np.exp(e.max()))) 173 | 174 | if np.any(np.isnan(np.exp(e))): 175 | raise SigmaTooLowException('Invalid sigmas. The perplexity is probably too low.') 176 | 177 | 178 | # Receives vectors in Y, and moves co-located vertices in opposite directions, 179 | # to assist in the repulsion of vertices. 180 | def switch_shake(Y, magnitude=1e-5): 181 | N = Y.shape[0] 182 | 183 | # Auxiliary functions for translating from square to condensed indexing 184 | # of the distance matrix. 185 | def calc_row_idx(k, n): 186 | return int(math.ceil((1 / 2.) * (- (-8 * k + 4 * n**2 - 4 * n - 7)**0.5 + 2 * n - 1) - 1)) 187 | 188 | def elem_in_i_rows(i, n): 189 | return i * (n - 1 - i) + (i * (i + 1)) / 2 190 | 191 | def calc_col_idx(k, i, n): 192 | return int(n - elem_in_i_rows(i + 1, n) + k) 193 | 194 | def condensed_to_square(k, n): 195 | i = calc_row_idx(k, n) 196 | j = calc_col_idx(k, i, n) 197 | return i, j 198 | 199 | euclid_dist = pdist(Y) 200 | max_dist = euclid_dist.max() 201 | for idx in np.where(euclid_dist <= np.finfo(np.float32).eps)[0]: 202 | (i, j) = condensed_to_square(idx, N) 203 | nudge = np.random.normal(0, max_dist * magnitude, 2) 204 | 205 | # v_i and v_j are co-located. Move v_i in a direction, and move v_j in 206 | # the opposite direction. 207 | Y[i, :] += nudge 208 | Y[j, :] -= nudge 209 | return Y 210 | 211 | 212 | # Perform momentum-based gradient descent on the cost function with the given 213 | # parameters. Return the vertex coordinates and per-vertex cost. 214 | def find_Y(X_shared, Y_shared, sigma_shared, N, output_dims, n_epochs, 215 | initial_lr, final_lr, lr_switch, init_stdev, initial_momentum, 216 | final_momentum, momentum_switch, 217 | initial_l_kl, final_l_kl, l_kl_switch, 218 | initial_l_e, final_l_e, l_e_switch, 219 | initial_l_c, final_l_c, l_c_switch, 220 | initial_l_r, final_l_r, l_r_switch, 221 | r_eps, 222 | Adj_shared, g=None, save_every=None, output_folder=None, verbose=0): 223 | # Optimization hyperparameters 224 | initial_lr = np.array(initial_lr, dtype=floath) 225 | final_lr = np.array(final_lr, dtype=floath) 226 | initial_momentum = np.array(initial_momentum, dtype=floath) 227 | final_momentum = np.array(final_momentum, dtype=floath) 228 | 229 | # Hyperparameters used within Theano 230 | lr = T.fscalar('lr') 231 | lr_shared = theano.shared(initial_lr) 232 | momentum = T.fscalar('momentum') 233 | momentum_shared = theano.shared(initial_momentum) 234 | 235 | # Cost parameters 236 | initial_l_kl = np.array(initial_l_kl, dtype=floath) 237 | final_l_kl = np.array(final_l_kl, dtype=floath) 238 | initial_l_e = np.array(initial_l_e, dtype=floath) 239 | final_l_e = np.array(final_l_e, dtype=floath) 240 | initial_l_c = np.array(initial_l_c, dtype=floath) 241 | final_l_c = np.array(final_l_c, dtype=floath) 242 | initial_l_r = np.array(initial_l_r, dtype=floath) 243 | final_l_r = np.array(final_l_r, dtype=floath) 244 | 245 | # Cost parameters used within Theano 246 | l_kl = T.fscalar('l_kl') 247 | l_kl_shared = theano.shared(initial_l_kl) 248 | l_e = T.fscalar('l_e') 249 | l_e_shared = theano.shared(initial_l_e) 250 | l_c = T.fscalar('l_c') 251 | l_c_shared = theano.shared(initial_l_c) 252 | l_r = T.fscalar('l_r') 253 | l_r_shared = theano.shared(initial_l_r) 254 | 255 | # High-dimensional observations (connectivities of vertices) 256 | X = T.fmatrix('X') 257 | # 2D projection (coordinates of vertices) 258 | Y = T.fmatrix('Y') 259 | 260 | # Adjacency matrix 261 | Adj = T.fmatrix('Adj') 262 | 263 | # Standard deviations used for Gaussians to attain perplexity 264 | sigma = T.fvector('sigma') 265 | 266 | # Y velocities (for momentum-based descent) 267 | Yv = T.fmatrix('Yv') 268 | Yv_shared = theano.shared(np.zeros((N, output_dims), dtype=floath)) 269 | 270 | # Function for retrieving cost for all individual data points 271 | costs = cost_var(X, Y, sigma, Adj, l_kl, l_e, l_c, l_r, r_eps) 272 | 273 | # Sum of all costs (scalar) 274 | cost = T.sum(costs) 275 | 276 | # Gradient of the cost w.r.t. Y 277 | grad_Y = T.grad(cost, Y) 278 | 279 | # Update step for velocity 280 | update_Yv = theano.function( 281 | [], None, 282 | givens={ 283 | X: X_shared, 284 | sigma: sigma_shared, 285 | Y: Y_shared, 286 | Yv: Yv_shared, 287 | Adj: Adj_shared, 288 | lr: lr_shared, 289 | momentum: momentum_shared, 290 | l_kl: l_kl_shared, 291 | l_e: l_e_shared, 292 | l_c: l_c_shared, 293 | l_r: l_r_shared 294 | }, 295 | updates=[ 296 | (Yv_shared, momentum * Yv - lr * grad_Y) 297 | ] 298 | ) 299 | 300 | # Gradient descent step 301 | update_Y = theano.function( 302 | [], [], 303 | givens={ 304 | Y: Y_shared, Yv: Yv_shared 305 | }, 306 | updates=[ 307 | (Y_shared, Y + Yv) 308 | ] 309 | ) 310 | 311 | # Build function to retrieve cost 312 | get_cost = theano.function( 313 | [], cost, 314 | givens={ 315 | X: X_shared, 316 | sigma: sigma_shared, 317 | Y: Y_shared, 318 | Adj: Adj_shared, 319 | l_kl: l_kl_shared, 320 | l_e: l_e_shared, 321 | l_c: l_c_shared, 322 | l_r: l_r_shared 323 | } 324 | ) 325 | 326 | # Build function to retrieve per-vertex cost 327 | get_costs = theano.function( 328 | [], costs, 329 | givens={ 330 | X: X_shared, 331 | sigma: sigma_shared, 332 | Y: Y_shared, 333 | Adj: Adj_shared, 334 | l_kl: l_kl_shared, 335 | l_e: l_e_shared, 336 | l_c: l_c_shared, 337 | l_r: l_r_shared 338 | } 339 | ) 340 | 341 | # Optimization loop 342 | for epoch in range(n_epochs): 343 | 344 | # Switch parameter if a switching point is reached. 345 | if epoch == lr_switch: 346 | lr_shared.set_value(final_lr) 347 | if epoch == momentum_switch: 348 | momentum_shared.set_value(final_momentum) 349 | if epoch == l_kl_switch: 350 | l_kl_shared.set_value(final_l_kl) 351 | if epoch == l_e_switch: 352 | l_e_shared.set_value(final_l_e) 353 | if epoch == l_c_switch: 354 | l_c_shared.set_value(final_l_c) 355 | if epoch == l_r_switch: 356 | l_r_shared.set_value(final_l_r) 357 | if final_l_r != 0: 358 | # Give a nudge to co-located vertices in the epoch before the 359 | # repulsion kicks in (otherwise they don't feel any). 360 | Y_shared.set_value(switch_shake(Y_shared.get_value())) 361 | 362 | # Do update step for velocity 363 | update_Yv() 364 | # Do a gradient descent step 365 | update_Y() 366 | 367 | c = get_cost() 368 | if np.isnan(float(c)): 369 | raise NaNException('Encountered NaN for cost.') 370 | 371 | if verbose: 372 | print('[tsne] Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c)), end='\r') 373 | 374 | if output_folder is not None and g is not None and save_every is not None and epoch % save_every == 0: 375 | # Get per-vertex cost for colour-coding 376 | cs = get_costs() 377 | 378 | # Save a snapshot 379 | save_drawing(output_folder, g, Y_shared.get_value().T, 'tsne_snap_' + str(epoch).zfill(5), formats=['jpg'], verbose=False, edge_colors="rgb", draw_vertices=False, opacity=0.3) 380 | 381 | # Get per-vertex cost 382 | cs = get_costs() 383 | 384 | if verbose: 385 | print('\n[tsne] Done! ') 386 | 387 | return np.array(Y_shared.get_value()), cs 388 | 389 | 390 | def tsne(X, perplexity=30, Y=None, output_dims=2, n_epochs=1000, 391 | initial_lr=10, final_lr=4, lr_switch=None, init_stdev=1e-4, 392 | sigma_iters=50, initial_momentum=0.5, final_momentum=0.8, 393 | momentum_switch=250, 394 | initial_l_kl=None, final_l_kl=None, l_kl_switch=None, 395 | initial_l_e=None, final_l_e=None, l_e_switch=None, 396 | initial_l_c=None, final_l_c=None, l_c_switch=None, 397 | initial_l_r=None, final_l_r=None, l_r_switch=None, 398 | r_eps=1, random_state=None, Adj=None, g=None, 399 | save_every=None, snaps_output_folder=None, verbose=1): 400 | random_state = check_random_state(random_state) 401 | 402 | N = X.shape[0] 403 | 404 | X_shared = theano.shared(np.asarray(X, dtype=floath)) 405 | sigma_shared = theano.shared(np.ones(N, dtype=floath)) 406 | 407 | if Y is None: 408 | Y = random_state.normal(0, init_stdev, size=(N, output_dims)) 409 | Y_shared = theano.shared(np.asarray(Y, dtype=floath)) 410 | 411 | # Find sigmas to attain the given perplexity. 412 | find_sigma(X_shared, sigma_shared, N, perplexity, sigma_iters, verbose) 413 | 414 | # Do the optimization to find Y (the vertex coordinates). 415 | Y, costs = find_Y(X_shared, Y_shared, sigma_shared, N, output_dims, n_epochs, 416 | initial_lr, final_lr, lr_switch, init_stdev, initial_momentum, 417 | final_momentum, momentum_switch, 418 | initial_l_kl, final_l_kl, l_kl_switch, 419 | initial_l_e, final_l_e, l_e_switch, 420 | initial_l_c, final_l_c, l_c_switch, 421 | initial_l_r, final_l_r, l_r_switch, 422 | r_eps, 423 | Adj, g, save_every, 424 | snaps_output_folder, verbose) 425 | 426 | # Return the vertex coordinates and the per-vertex costs. 427 | return Y, costs 428 | -------------------------------------------------------------------------------- /src/tsnetwork.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | import shutil 3 | 4 | import graph_tool.all as gt 5 | import numpy as np 6 | 7 | import modules.distance_matrix as distance_matrix 8 | import modules.graph_io as graph_io 9 | import modules.layout_io as layout_io 10 | import modules.thesne as thesne 11 | import modules.user_input as usr_input 12 | import modules.animation as animations 13 | from modules.tsn_config import TsnConfig 14 | from modules.sfdp_layout import sfdp_placement 15 | 16 | 17 | def main(): 18 | import sys 19 | import os.path 20 | import glob 21 | import itertools 22 | from argparse import ArgumentParser 23 | 24 | parser = ArgumentParser(description='Read a graph, and produce a layout with t-SNE.') 25 | 26 | # Input 27 | parser.add_argument('graphs', nargs='+', help='(List of) input graph(s). Or a folder with graphs.') 28 | 29 | # Output 30 | parser.add_argument('-o', default='./output', help='Folder to write output to. Default: ./output') 31 | parser.add_argument('--save_every', type=int, help='Save a jpg snapshot ever x epochs.') 32 | parser.add_argument('--render_video', action='store_true', help='Render a video of the layout evolution. Needs ImageMagick and ffmpeg.') 33 | parser.add_argument('--retain_snaps', action='store_true', help='Retain the snapshots. This argument is ignored if no video is rendered.') 34 | parser.add_argument('--save_layout_data', action='store_true', help='Save all layout coordinates in a .pickle file and a .txt file.') 35 | parser.add_argument('--opacity', type=float, default=0.3, help='Edge opacity.') 36 | 37 | # Manipulations to graph 38 | parser.add_argument('--strip_graph', action='store_true', help='Retain only the largest connected component in the graph.') 39 | parser.add_argument('--rnd_seed', '-r', type=int, nargs='+', default=[None], help='Seed for random state. (Default: Random seed)') 40 | parser.add_argument('--pre_sfdp', action='store_true', help='If this flag is given, the vertices will be pre-initialized with SFDP.') 41 | parser.add_argument('--only_sfdp', action='store_true', help='If this flag is given, only SFDP will be done.') 42 | parser.add_argument('--accept_all_sfdp', action='store_true', help='If this flag is given, no confirmation is asked for the SFDP layouts.') 43 | parser.add_argument('--remove_rnd_edges', nargs='+', type=float, default=[0], help='Mutate the graph by removing random edges. If this is used without a random seed, a random random seed will be generated. The value given to this argument is the fraction of edges that will be removed.') 44 | 45 | # Hyperparameters 46 | parser.add_argument('--n_epochs', '-e', nargs='+', type=int, default=[1000], help='One or more numbers of t-SNE epochs.') 47 | parser.add_argument('--lr_init', nargs='+', type=float, default=[80], help='One or more initial learning rates.') 48 | parser.add_argument('--lr_final', nargs='+', type=float, default=[None], help='One or more final learning rates. Default: Same as lr_init.') 49 | parser.add_argument('--lr_switch', nargs='+', type=int, default=[None], help='One or more learning rate switch-points.') 50 | parser.add_argument('--momentum_init', nargs='+', type=float, default=[0.5], help='One or more initial momenta.') 51 | parser.add_argument('--momentum_final', nargs='+', type=float, default=[0.5], help='One or more initial momenta.') 52 | parser.add_argument('--momentum_switch', nargs='+', type=int, default=[None], help='One or more momentum switch-points.') 53 | 54 | # Distance metric parameters 55 | parser.add_argument('--distance_metric', '-d', choices=['shortest_path', 'spdm', 'modified_adjacency', 'mam'], default='spdm', help='The distance metric that is used for the pairwise distances.') 56 | parser.add_argument('-k', nargs='+', type=float, default=[1], help='Exponent for transfer function.') 57 | 58 | # Cost function parameters 59 | # Kullback-Leibler 60 | parser.add_argument('--perplexity', '-p', nargs='+', type=float, default=[80], help='One or more perplexities.') 61 | parser.add_argument('--l_kl_init', nargs='+', type=float, default=[1], help='One or more KL factors.') 62 | parser.add_argument('--l_kl_final', nargs='+', type=float, default=[1], help='One or more KL factors.') 63 | parser.add_argument('--l_kl_switch', nargs='+', type=int, default=[None], help='One or more KL switch-points') 64 | # Edge contraction 65 | parser.add_argument('--l_e_init', nargs='+', type=float, default=[0], help='One or more edge contraction factors.') 66 | parser.add_argument('--l_e_final', nargs='+', type=float, default=[0], help='One or more edge contraction factors.') 67 | parser.add_argument('--l_e_switch', nargs='+', type=int, default=[None], help='One or more edge contraction switch-points') 68 | # Compression 69 | parser.add_argument('--l_c_init', nargs='+', type=float, default=[1.2], help='One or more compression factors.') 70 | parser.add_argument('--l_c_final', nargs='+', type=float, default=[0], help='One or more compression factors.') 71 | parser.add_argument('--l_c_switch', nargs='+', type=int, default=[None], help='One or more compression switch-points') 72 | # Repulsion 73 | parser.add_argument('--l_r_init', nargs='+', type=float, default=[0], help='One or more repulsion factors.') 74 | parser.add_argument('--l_r_final', nargs='+', type=float, default=[0.5], help='One or more repulsion factors.') 75 | parser.add_argument('--l_r_switch', nargs='+', type=int, default=[None], help='One or more repulsion switch-points') 76 | parser.add_argument('--r_eps', nargs='+', type=float, default=[0.2], help='Additional term in denominator to prevent near-singularities.') 77 | 78 | args = parser.parse_args() 79 | 80 | # Retrieve a list of all files in the directory, if args.graphs[0] is a directory. 81 | if len(args.graphs) == 1 and os.path.isdir(args.graphs[0]): 82 | args.graphs = glob.glob(args.graphs[0] + '/*') 83 | 84 | # Check graph input 85 | for g_file in args.graphs: 86 | if not os.path.isfile(g_file): 87 | raise FileNotFoundError(g_file + ' is not a file.') 88 | 89 | # Generate random random seed if none is given. 90 | if args.rnd_seed == [None]: 91 | args.rnd_seed = [np.random.randint(1e8)] 92 | 93 | # Ignore retain_snaps argument if no video is rendered. 94 | if not args.render_video: 95 | args.retain_snaps = True 96 | 97 | # Get names of the graphs (by splitting of path and extension) 98 | names = [os.path.split(os.path.splitext(file)[0])[1] for file in args.graphs] 99 | 100 | # Determine output folders. One is created in the specified output folder 101 | # for every graph that is supplied. 102 | output_folders = [args.o + '/' + name for name in names] 103 | 104 | # Check (and possibly create) output folders 105 | for folder in [args.o] + output_folders: 106 | if not os.path.exists(folder): 107 | os.makedirs(folder) 108 | 109 | # At least everything is fine for now. 110 | there_were_exceptions = False 111 | 112 | # Loop over all graphs (and their respective output folders) 113 | for g_file, g_name, output_folder in zip(args.graphs, names, output_folders): 114 | # Load the graph 115 | g = graph_io.load_graph(g_file) 116 | print('[tsnetwork] Loaded graph {0} (|V| = {1}, |E| = {2}) into memory.'.format(g_name, g.num_vertices(), g.num_edges())) 117 | 118 | # Add graph name as propery in the internal representation 119 | g.graph_properties['name'] = g.new_graph_property('string', g_name) 120 | 121 | # Usually this loop has just one iteration, with only 0 as the value 122 | # for rmv_edge_frac (that is, no edges are removed). 123 | for rmv_edge_frac in args.remove_rnd_edges: 124 | print('[tsnetwork] Original graph: (|V|, |E|) = ({0}, {1}).'.format(g.num_vertices(), g.num_edges())) 125 | 126 | # Create a temporary copy of the graph that will be manipulated. 127 | gv = gt.GraphView(g) 128 | 129 | # Remove rmv_edge_frac of the graphs edges from gv. 130 | gv.clear_filters() 131 | gv.reindex_edges() 132 | edge_list = list(gv.edges()) 133 | not_here_ep = gv.new_edge_property('bool', val=True) 134 | n_remove_edges = int(rmv_edge_frac * gv.num_edges()) 135 | for e in np.random.randint(0, gv.num_edges(), n_remove_edges): 136 | not_here_ep[edge_list[e]] = False 137 | gv.set_edge_filter(not_here_ep) 138 | 139 | if n_remove_edges > 0: 140 | print('[tsnetwork] Removed {2} random edges: (|V|, |E|) = ({0}, {1}).'.format(gv.num_vertices(), gv.num_edges(), n_remove_edges)) 141 | 142 | # Filter the graph s.t. only the largest connected component 143 | # remains. 144 | if args.strip_graph: 145 | largest_connected_component = gt.label_largest_component(gv) 146 | gv.set_vertex_filter(largest_connected_component) 147 | gv.purge_vertices() 148 | print('[tsnetwork] Filtered largest component: (|V|, |E|) = ({0}, {1}).'.format(gv.num_vertices(), gv.num_edges())) 149 | 150 | if args.pre_sfdp or args.only_sfdp: 151 | # Perform a SFDP layout (either as the only layout or as a 152 | # starting point for t-SNE.) 153 | Y_init, _ = sfdp_placement(gv, output_folder, ask_for_acceptance=not args.accept_all_sfdp, opacity=args.opacity) 154 | if args.only_sfdp: 155 | continue 156 | else: 157 | # Random positions will be generated 158 | Y_init = None 159 | 160 | # Compute distance matrix of this graph with the specified metric 161 | X = distance_matrix.get_distance_matrix(gv, args.distance_metric) 162 | 163 | # Retrieve the adjacency matrix of the graph 164 | Adj_sparse = gt.adjacency(gv) 165 | Adj = np.zeros(Adj_sparse.shape, dtype='float32') 166 | for i, j in zip(*Adj_sparse.nonzero()): 167 | Adj[i, j] = Adj_sparse[i, j] 168 | 169 | # Make list of tsnetwork configuration objects. These are objects 170 | # that represent a configuration for a t-SNE layout. 171 | tsn_configs = [] 172 | for perplexity, n_epochs, initial_lr, final_lr, lr_switch, initial_momentum,\ 173 | final_momentum, momentum_switch,\ 174 | initial_l_kl, final_l_kl, l_kl_switch,\ 175 | initial_l_e, final_l_e, l_e_switch,\ 176 | initial_l_c, final_l_c, l_c_switch,\ 177 | initial_l_r, final_l_r, l_r_switch,\ 178 | r_eps, k, rnd_seed in itertools.product( 179 | args.perplexity, args.n_epochs, args.lr_init, args.lr_final, 180 | args.lr_switch, args.momentum_init, args.momentum_final, 181 | args.momentum_switch, 182 | args.l_kl_init, args.l_kl_final, args.l_kl_switch, 183 | args.l_e_init, args.l_e_final, args.l_e_switch, 184 | args.l_c_init, args.l_c_final, args.l_c_switch, 185 | args.l_r_init, args.l_r_final, args.l_r_switch, 186 | args.r_eps, args.k, args.rnd_seed): 187 | 188 | # Use 50% for the switching points if no argument is given 189 | if lr_switch is None: 190 | lr_switch = int(n_epochs * 0.5) 191 | if momentum_switch is None: 192 | momentum_switch = int(n_epochs * 0.5) 193 | if l_kl_switch is None: 194 | l_kl_switch = int(n_epochs * 0.5) 195 | if l_e_switch is None: 196 | l_e_switch = int(n_epochs * 0.5) 197 | if l_c_switch is None: 198 | l_c_switch = int(n_epochs * 0.5) 199 | if l_r_switch is None: 200 | l_r_switch = int(n_epochs * 0.5) 201 | 202 | if final_lr is None: 203 | final_lr = initial_lr 204 | 205 | cfg = TsnConfig( 206 | perplexity=perplexity, n_epochs=n_epochs, 207 | initial_lr=initial_lr, final_lr=final_lr, lr_switch=lr_switch, 208 | initial_momentum=initial_momentum, 209 | final_momentum=final_momentum, momentum_switch=momentum_switch, 210 | initial_l_kl=initial_l_kl, final_l_kl=final_l_kl, l_kl_switch=l_kl_switch, 211 | initial_l_e=initial_l_e, final_l_e=final_l_e, l_e_switch=l_e_switch, 212 | initial_l_c=initial_l_c, final_l_c=final_l_c, l_c_switch=l_c_switch, 213 | initial_l_r=initial_l_r, final_l_r=final_l_r, l_r_switch=l_r_switch, 214 | r_eps=r_eps, k=k, pre_sfdp=args.pre_sfdp, rmv_edge_frac=rmv_edge_frac, 215 | rnd_seed=rnd_seed, distance_matrix=args.distance_metric 216 | ) 217 | 218 | # Do no add the configurations that already have files matching 219 | # the description, unless the user confirms to overwrite. 220 | if any([file.startswith(cfg.get_description() + '.') for file in os.listdir(output_folder)]): 221 | if not usr_input.confirm('[tsnetwork] ' + cfg.get_description() + ' files exists! Overwrite?'): 222 | continue 223 | tsn_configs.append(cfg) 224 | 225 | # Loop over the t-SNE configurations for a single graph 226 | for cfg in tsn_configs: 227 | print('[tsnetwork] Processing: ' + cfg.get_description()) 228 | 229 | # String that has the path to the directory where the snapshots 230 | # will come. (If --save_every is given) 231 | snaps_dir = output_folder + '/snaps_' + cfg.get_description() 232 | 233 | # Clean out existing snaps directory if it exists. 234 | if args.save_every is not None and os.path.exists(snaps_dir): 235 | if usr_input.confirm('[tsnetwork] ' + snaps_dir + ' exists. Delete contents?'): 236 | for file in os.listdir(snaps_dir): 237 | file_path = os.path.join(snaps_dir, file) 238 | try: 239 | if os.path.isfile(file_path): 240 | os.unlink(file_path) 241 | elif os.path.isdir(file_path): 242 | shutil.rmtree(file_path) 243 | except Exception as e: 244 | print(e) 245 | elif args.save_every is not None and not os.path.exists(snaps_dir): 246 | # Make folder for snaps, if it is necessary and it doesn't 247 | # exist yet. 248 | os.makedirs(snaps_dir) 249 | 250 | # Apply the transfer function 251 | X_transfered = X ** cfg.k 252 | 253 | # Try to do the tsne layout. 254 | try: 255 | Y, costs = thesne.tsne(X_transfered, random_state=cfg.rnd_seed, perplexity=cfg.perplexity, n_epochs=cfg.n_epochs, 256 | Y=Y_init, 257 | initial_lr=cfg.initial_lr, final_lr=cfg.final_lr, lr_switch=cfg.lr_switch, 258 | initial_momentum=cfg.initial_momentum, final_momentum=cfg.final_momentum, momentum_switch=cfg.momentum_switch, 259 | initial_l_kl=cfg.initial_l_kl, final_l_kl=cfg.final_l_kl, l_kl_switch=cfg.l_kl_switch, 260 | initial_l_e=cfg.initial_l_e, final_l_e=cfg.final_l_e, l_e_switch=cfg.l_e_switch, 261 | initial_l_c=cfg.initial_l_c, final_l_c=cfg.final_l_c, l_c_switch=cfg.l_c_switch, 262 | initial_l_r=cfg.initial_l_r, final_l_r=cfg.final_l_r, l_r_switch=cfg.l_r_switch, 263 | r_eps=cfg.r_eps, Adj=Adj, g=gv, snaps_output_folder=snaps_dir, save_every=args.save_every) 264 | except (thesne.NaNException, thesne.SigmaTooLowException) as e: 265 | there_were_exceptions = True 266 | print('[exception] {0}'.format(e)) 267 | 268 | # Also write exception to a file. 269 | with open(output_folder + '/exception_' + cfg.get_description() + '.out', 'w') as f: 270 | print('{0}'.format(e), file=f) 271 | f.close() 272 | print('[tsnetwork] Continuing with next TsnConfig.') 273 | continue 274 | 275 | # Render an animation of the snapshots 276 | if args.render_video: 277 | animations.save_animation(snaps_dir, cfg.get_description()) 278 | 279 | # Remove the directory with snapshots. 280 | if args.save_every is not None and not args.retain_snaps and os.path.exists(snaps_dir): 281 | print('[tsnetwork] Cleaning up snaps directory.') 282 | shutil.rmtree(snaps_dir) 283 | 284 | # Save the data (graph, vertex coordinates) 285 | if args.save_layout_data: 286 | layout_io.save_vna_layout(output_folder + '/layout_' + cfg.get_description() + '.vna', gv, Y) 287 | layout_io.save_layout_txt(output_folder + '/layout_edges_' + cfg.get_description() + '.txt', gv, Y) 288 | 289 | # Save final drawing of the layout 290 | layout_io.save_drawing(output_folder, gv, Y.T, cfg.get_description(), formats=['jpg', 'pdf'], edge_colors="rgb", draw_vertices=False, opacity=args.opacity) 291 | 292 | if there_were_exceptions: 293 | print('[tsnetwork] Done! However, be wary. There were exceptions.') 294 | else: 295 | print('[tsnetwork] Done!') 296 | 297 | 298 | if __name__ == '__main__': 299 | main() 300 | --------------------------------------------------------------------------------