├── data_mnist └── __init__.py ├── .DS_Store ├── graph.pyc ├── utils.pyc ├── pic ├── web.png └── home100.jpg ├── coarsening.pyc ├── README.md ├── graph.py ├── coarsening.py ├── utils.py ├── CayleyNet.ipynb └── .ipynb_checkpoints └── CayleyNet-checkpoint.ipynb /data_mnist/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/.DS_Store -------------------------------------------------------------------------------- /graph.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/graph.pyc -------------------------------------------------------------------------------- /utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/utils.pyc -------------------------------------------------------------------------------- /pic/web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/pic/web.png -------------------------------------------------------------------------------- /coarsening.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/coarsening.pyc -------------------------------------------------------------------------------- /pic/home100.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/pic/home100.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CayleyNets 2 | We present a TensorFlow implementation of the Graph Convolutional Neural Network illustrated in: 3 | 4 | CayleyNets: Graph Convolutional Neural Networks with Complex Rational Spectral Filters
5 | IEEE Transactions on Signal Processing, 2018
6 | Ron Levie*, Federico Monti*, Xavier Bresson, Michael M. Bronstein 7 | 8 | https://arxiv.org/abs/1705.07664 9 | 10 | The repository contains a sparse implementation of the NN used for solving the MNIST digits classification problem described in the paper. Rational spectral filters are approximated with Jacobi Method to provide an efficient solution. 11 | 12 | ## When shall I use CayleyNet? 13 | 14 | CayleyNet is a Graph CNN with spectral zoom properties able to effectively operate with signals defined over graphs. Thanks to its particular spectral properties, CayleyNet is well suited for dealing with a variety of different domains (e.g. citation networks, community graphs, user/item similarity graphs...). Variations of the architecture here implemented achieved state-of-the-art performance on vertex classification, community detection and matrix completion tasks. 15 | 16 | ## Useful links 17 | 18 | inf.usi.ch/phd/monti
19 | geometricdeeplearning.com 20 | -------------------------------------------------------------------------------- /graph.py: -------------------------------------------------------------------------------- 1 | import sklearn.metrics 2 | import sklearn.neighbors 3 | import matplotlib.pyplot as plt 4 | import scipy.sparse 5 | import scipy.sparse.linalg 6 | import scipy.spatial.distance 7 | import numpy as np 8 | 9 | 10 | def grid(m, dtype=np.float32): 11 | """Return the embedding of a grid graph.""" 12 | M = m**2 13 | x = np.linspace(0, 1, m, dtype=dtype) 14 | y = np.linspace(0, 1, m, dtype=dtype) 15 | xx, yy = np.meshgrid(x, y) 16 | z = np.empty((M, 2), dtype) 17 | z[:, 0] = xx.reshape(M) 18 | z[:, 1] = yy.reshape(M) 19 | return z 20 | 21 | 22 | def distance_scipy_spatial(z, k=4, metric='euclidean'): 23 | """Compute exact pairwise distances.""" 24 | d = scipy.spatial.distance.pdist(z, metric) 25 | d = scipy.spatial.distance.squareform(d) 26 | # k-NN graph. 27 | idx = np.argsort(d)[:, 1:k+1] 28 | d.sort() 29 | d = d[:, 1:k+1] 30 | return d, idx 31 | 32 | 33 | def distance_sklearn_metrics(z, k=4, metric='euclidean'): 34 | """Compute exact pairwise distances.""" 35 | d = sklearn.metrics.pairwise.pairwise_distances( 36 | z, metric=metric, n_jobs=-2) 37 | # k-NN graph. 38 | idx = np.argsort(d)[:, 1:k+1] 39 | d.sort() 40 | d = d[:, 1:k+1] 41 | return d, idx 42 | 43 | 44 | def distance_lshforest(z, k=4, metric='cosine'): 45 | """Return an approximation of the k-nearest cosine distances.""" 46 | assert metric is 'cosine' 47 | lshf = sklearn.neighbors.LSHForest() 48 | lshf.fit(z) 49 | dist, idx = lshf.kneighbors(z, n_neighbors=k+1) 50 | assert dist.min() < 1e-10 51 | dist[dist < 0] = 0 52 | return dist, idx 53 | 54 | # TODO: other ANNs s.a. NMSLIB, EFANNA, FLANN, Annoy, sklearn neighbors, PANN 55 | 56 | 57 | def adjacency(dist, idx): 58 | """Return the adjacency matrix of a kNN graph.""" 59 | M, k = dist.shape 60 | assert M, k == idx.shape 61 | assert dist.min() >= 0 62 | 63 | # Weights. 64 | sigma2 = np.mean(dist[:, -1])**2 65 | dist = np.exp(- dist**2 / sigma2) 66 | 67 | # Weight matrix. 68 | I = np.arange(0, M).repeat(k) 69 | J = idx.reshape(M*k) 70 | V = dist.reshape(M*k) 71 | W = scipy.sparse.coo_matrix((V, (I, J)), shape=(M, M)) 72 | 73 | # No self-connections. 74 | W.setdiag(0) 75 | 76 | # Non-directed graph. 77 | bigger = W.T > W 78 | W = W - W.multiply(bigger) + W.T.multiply(bigger) 79 | 80 | assert W.nnz % 2 == 0 81 | assert np.abs(W - W.T).mean() < 1e-10 82 | assert type(W) is scipy.sparse.csr.csr_matrix 83 | return W 84 | 85 | 86 | def replace_random_edges(A, noise_level): 87 | """Replace randomly chosen edges by random edges.""" 88 | M, M = A.shape 89 | n = int(noise_level * A.nnz // 2) 90 | 91 | indices = np.random.permutation(A.nnz//2)[:n] 92 | rows = np.random.randint(0, M, n) 93 | cols = np.random.randint(0, M, n) 94 | vals = np.random.uniform(0, 1, n) 95 | assert len(indices) == len(rows) == len(cols) == len(vals) 96 | 97 | A_coo = scipy.sparse.triu(A, format='coo') 98 | assert A_coo.nnz == A.nnz // 2 99 | assert A_coo.nnz >= n 100 | A = A.tolil() 101 | 102 | for idx, row, col, val in zip(indices, rows, cols, vals): 103 | old_row = A_coo.row[idx] 104 | old_col = A_coo.col[idx] 105 | 106 | A[old_row, old_col] = 0 107 | A[old_col, old_row] = 0 108 | A[row, col] = 1 109 | A[col, row] = 1 110 | 111 | A.setdiag(0) 112 | A = A.tocsr() 113 | A.eliminate_zeros() 114 | return A 115 | 116 | 117 | def laplacian(W, normalized=True): 118 | """Return the Laplacian of the weigth matrix.""" 119 | 120 | # Degree matrix. 121 | d = W.sum(axis=0) 122 | 123 | # Laplacian matrix. 124 | if not normalized: 125 | D = scipy.sparse.diags(d.A.squeeze(), 0) 126 | L = D - W 127 | else: 128 | d += np.spacing(np.array(0, W.dtype)) 129 | d = 1 / np.sqrt(d) 130 | D = scipy.sparse.diags(d.A.squeeze(), 0) 131 | I = scipy.sparse.identity(d.size, dtype=W.dtype) 132 | L = I - D * W * D 133 | 134 | # assert np.abs(L - L.T).mean() < 1e-9 135 | assert type(L) is scipy.sparse.csr.csr_matrix 136 | return L 137 | 138 | 139 | def lmax(L, normalized=True): 140 | """Upper-bound on the spectrum.""" 141 | if normalized: 142 | return 2 143 | else: 144 | return scipy.sparse.linalg.eigsh( 145 | L, k=1, which='LM', return_eigenvectors=False)[0] 146 | 147 | 148 | def fourier(L, algo='eigh', k=1): 149 | """Return the Fourier basis, i.e. the EVD of the Laplacian.""" 150 | 151 | def sort(lamb, U): 152 | idx = lamb.argsort() 153 | return lamb[idx], U[:, idx] 154 | 155 | if algo is 'eig': 156 | lamb, U = np.linalg.eig(L.toarray()) 157 | lamb, U = sort(lamb, U) 158 | elif algo is 'eigh': 159 | lamb, U = np.linalg.eigh(L.toarray()) 160 | elif algo is 'eigs': 161 | lamb, U = scipy.sparse.linalg.eigs(L, k=k, which='SM') 162 | lamb, U = sort(lamb, U) 163 | elif algo is 'eigsh': 164 | lamb, U = scipy.sparse.linalg.eigsh(L, k=k, which='SM') 165 | 166 | return lamb, U 167 | 168 | 169 | def plot_spectrum(L, algo='eig', ymin = 0): 170 | """Plot the spectrum of a list of multi-scale Laplacians L.""" 171 | # Algo is eig to be sure to get all eigenvalues. 172 | plt.figure(figsize=(17, 5)) 173 | for i, lap in enumerate(L): 174 | lamb, U = fourier(lap, algo) 175 | step = 2**i 176 | x = range(step//2, L[0].shape[0], step) 177 | lb = 'L_{} spectrum in [{:1.2e}, {:1.2e}]'.format(i, lamb[0], lamb[-1]) 178 | plt.plot(x, lamb, '.', label=lb) 179 | plt.legend(loc='best') 180 | plt.xlim(0, L[0].shape[0]) 181 | plt.ylim(ymin=ymin) 182 | 183 | plt.ylabel('Value') 184 | plt.xlabel('Eigenvalue ID') 185 | 186 | 187 | def lanczos(L, X, K): 188 | """ 189 | Given the graph Laplacian and a data matrix, return a data matrix which can 190 | be multiplied by the filter coefficients to filter X using the Lanczos 191 | polynomial approximation. 192 | """ 193 | M, N = X.shape 194 | assert L.dtype == X.dtype 195 | 196 | def basis(L, X, K): 197 | """ 198 | Lanczos algorithm which computes the orthogonal matrix V and the 199 | tri-diagonal matrix H. 200 | """ 201 | a = np.empty((K, N), L.dtype) 202 | b = np.zeros((K, N), L.dtype) 203 | V = np.empty((K, M, N), L.dtype) 204 | V[0, ...] = X / np.linalg.norm(X, axis=0) 205 | for k in range(K-1): 206 | W = L.dot(V[k, ...]) 207 | a[k, :] = np.sum(W * V[k, ...], axis=0) 208 | W = W - a[k, :] * V[k, ...] - ( 209 | b[k, :] * V[k-1, ...] if k > 0 else 0) 210 | b[k+1, :] = np.linalg.norm(W, axis=0) 211 | V[k+1, ...] = W / b[k+1, :] 212 | a[K-1, :] = np.sum(L.dot(V[K-1, ...]) * V[K-1, ...], axis=0) 213 | return V, a, b 214 | 215 | def diag_H(a, b, K): 216 | """Diagonalize the tri-diagonal H matrix.""" 217 | H = np.zeros((K*K, N), a.dtype) 218 | H[:K**2:K+1, :] = a 219 | H[1:(K-1)*K:K+1, :] = b[1:, :] 220 | H.shape = (K, K, N) 221 | Q = np.linalg.eigh(H.T, UPLO='L')[1] 222 | Q = np.swapaxes(Q, 1, 2).T 223 | return Q 224 | 225 | V, a, b = basis(L, X, K) 226 | Q = diag_H(a, b, K) 227 | Xt = np.empty((K, M, N), L.dtype) 228 | for n in range(N): 229 | Xt[..., n] = Q[..., n].T.dot(V[..., n]) 230 | Xt *= Q[0, :, np.newaxis, :] 231 | Xt *= np.linalg.norm(X, axis=0) 232 | return Xt # Q[0, ...] 233 | 234 | 235 | def rescale_L(L, lmax=2): 236 | """Rescale the Laplacian eigenvalues in [-1,1].""" 237 | M, M = L.shape 238 | I = scipy.sparse.identity(M, format='csr', dtype=L.dtype) 239 | L /= lmax / 2 240 | L -= I 241 | return L 242 | 243 | 244 | def chebyshev(L, X, K): 245 | """Return T_k X where T_k are the Chebyshev polynomials of order up to K. 246 | Complexity is O(KMN).""" 247 | M, N = X.shape 248 | assert L.dtype == X.dtype 249 | 250 | # L = rescale_L(L, lmax) 251 | # Xt = T @ X: MxM @ MxN. 252 | Xt = np.empty((K, M, N), L.dtype) 253 | # Xt_0 = T_0 X = I X = X. 254 | Xt[0, ...] = X 255 | # Xt_1 = T_1 X = L X. 256 | if K > 1: 257 | Xt[1, ...] = L.dot(X) 258 | # Xt_k = 2 L Xt_k-1 - Xt_k-2. 259 | for k in range(2, K): 260 | Xt[k, ...] = 2 * L.dot(Xt[k-1, ...]) - Xt[k-2, ...] 261 | return Xt 262 | -------------------------------------------------------------------------------- /coarsening.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse 3 | 4 | 5 | def coarsen(A, levels, self_connections=False): 6 | """ 7 | Coarsen a graph, represented by its adjacency matrix A, at multiple 8 | levels. 9 | """ 10 | graphs, parents = metis(A, levels) 11 | perms = compute_perm(parents) 12 | 13 | for i, A in enumerate(graphs): 14 | M, M = A.shape 15 | 16 | if not self_connections: 17 | A = A.tocoo() 18 | A.setdiag(0) 19 | 20 | if i < levels: 21 | A = perm_adjacency(A, perms[i]) 22 | 23 | A = A.tocsr() 24 | A.eliminate_zeros() 25 | graphs[i] = A 26 | 27 | Mnew, Mnew = A.shape 28 | print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added),' 29 | '|E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2)) 30 | 31 | return graphs, perms[0] if levels > 0 else None 32 | 33 | 34 | def metis(W, levels, rid=None): 35 | """ 36 | Coarsen a graph multiple times using the METIS algorithm. 37 | 38 | INPUT 39 | W: symmetric sparse weight (adjacency) matrix 40 | levels: the number of coarsened graphs 41 | 42 | OUTPUT 43 | graph[0]: original graph of size N_1 44 | graph[2]: coarser graph of size N_2 < N_1 45 | graph[levels]: coarsest graph of Size N_levels < ... < N_2 < N_1 46 | parents[i] is a vector of size N_i with entries ranging from 1 to N_{i+1} 47 | which indicate the parents in the coarser graph[i+1] 48 | nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i} 49 | 50 | NOTE 51 | if "graph" is a list of length k, then "parents" will be a list of length k-1 52 | """ 53 | 54 | N, N = W.shape 55 | if rid is None: 56 | rid = np.random.permutation(range(N)) 57 | parents = [] 58 | degree = W.sum(axis=0) - W.diagonal() 59 | graphs = [] 60 | graphs.append(W) 61 | #supernode_size = np.ones(N) 62 | #nd_sz = [supernode_size] 63 | #count = 0 64 | 65 | #while N > maxsize: 66 | for _ in range(levels): 67 | 68 | #count += 1 69 | 70 | # CHOOSE THE WEIGHTS FOR THE PAIRING 71 | # weights = ones(N,1) # metis weights 72 | weights = degree # graclus weights 73 | # weights = supernode_size # other possibility 74 | weights = np.array(weights).squeeze() 75 | 76 | # PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR 77 | idx_row, idx_col, val = scipy.sparse.find(W) 78 | perm = np.argsort(idx_row) 79 | rr = idx_row[perm] 80 | cc = idx_col[perm] 81 | vv = val[perm] 82 | cluster_id = metis_one_level(rr,cc,vv,rid,weights) # rr is ordered 83 | parents.append(cluster_id) 84 | 85 | # TO DO 86 | # COMPUTE THE SIZE OF THE SUPERNODES AND THEIR DEGREE 87 | #supernode_size = full( sparse(cluster_id, ones(N,1) , supernode_size ) ) 88 | #print(cluster_id) 89 | #print(supernode_size) 90 | #nd_sz{count+1}=supernode_size; 91 | 92 | # COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH 93 | nrr = cluster_id[rr] 94 | ncc = cluster_id[cc] 95 | nvv = vv 96 | Nnew = cluster_id.max() + 1 97 | # CSR is more appropriate: row,val pairs appear multiple times 98 | W = scipy.sparse.csr_matrix((nvv,(nrr,ncc)), shape=(Nnew,Nnew)) 99 | W.eliminate_zeros() 100 | # Add new graph to the list of all coarsened graphs 101 | graphs.append(W) 102 | N, N = W.shape 103 | 104 | # COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS) 105 | degree = W.sum(axis=0) 106 | #degree = W.sum(axis=0) - W.diagonal() 107 | 108 | # CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS 109 | #[~, rid]=sort(ss); # arthur strategy 110 | #[~, rid]=sort(supernode_size); # thomas strategy 111 | #rid=randperm(N); # metis/graclus strategy 112 | ss = np.array(W.sum(axis=0)).squeeze() 113 | rid = np.argsort(ss) 114 | 115 | return graphs, parents 116 | 117 | 118 | # Coarsen a graph given by rr,cc,vv. rr is assumed to be ordered 119 | def metis_one_level(rr,cc,vv,rid,weights): 120 | 121 | nnz = rr.shape[0] 122 | N = rr[nnz-1] + 1 123 | 124 | marked = np.zeros(N, np.bool) 125 | rowstart = np.zeros(N, np.int32) 126 | rowlength = np.zeros(N, np.int32) 127 | cluster_id = np.zeros(N, np.int32) 128 | 129 | oldval = rr[0] 130 | count = 0 131 | clustercount = 0 132 | 133 | for ii in range(nnz): 134 | rowlength[count] = rowlength[count] + 1 135 | if rr[ii] > oldval: 136 | oldval = rr[ii] 137 | rowstart[count+1] = ii 138 | count = count + 1 139 | 140 | for ii in range(N): 141 | tid = rid[ii] 142 | if not marked[tid]: 143 | wmax = 0.0 144 | rs = rowstart[tid] 145 | marked[tid] = True 146 | bestneighbor = -1 147 | for jj in range(rowlength[tid]): 148 | nid = cc[rs+jj] 149 | if marked[nid]: 150 | tval = 0.0 151 | else: 152 | tval = vv[rs+jj] * (1.0/weights[tid] + 1.0/weights[nid]) 153 | if tval > wmax: 154 | wmax = tval 155 | bestneighbor = nid 156 | 157 | cluster_id[tid] = clustercount 158 | 159 | if bestneighbor > -1: 160 | cluster_id[bestneighbor] = clustercount 161 | marked[bestneighbor] = True 162 | 163 | clustercount += 1 164 | 165 | return cluster_id 166 | 167 | def compute_perm(parents): 168 | """ 169 | Return a list of indices to reorder the adjacency and data matrices so 170 | that the union of two neighbors from layer to layer forms a binary tree. 171 | """ 172 | 173 | # Order of last layer is random (chosen by the clustering algorithm). 174 | indices = [] 175 | if len(parents) > 0: 176 | M_last = max(parents[-1]) + 1 177 | indices.append(list(range(M_last))) 178 | 179 | for parent in parents[::-1]: 180 | #print('parent: {}'.format(parent)) 181 | 182 | # Fake nodes go after real ones. 183 | pool_singeltons = len(parent) 184 | 185 | indices_layer = [] 186 | for i in indices[-1]: 187 | indices_node = list(np.where(parent == i)[0]) 188 | assert 0 <= len(indices_node) <= 2 189 | #print('indices_node: {}'.format(indices_node)) 190 | 191 | # Add a node to go with a singelton. 192 | if len(indices_node) is 1: 193 | indices_node.append(pool_singeltons) 194 | pool_singeltons += 1 195 | #print('new singelton: {}'.format(indices_node)) 196 | # Add two nodes as children of a singelton in the parent. 197 | elif len(indices_node) is 0: 198 | indices_node.append(pool_singeltons+0) 199 | indices_node.append(pool_singeltons+1) 200 | pool_singeltons += 2 201 | #print('singelton childrens: {}'.format(indices_node)) 202 | 203 | indices_layer.extend(indices_node) 204 | indices.append(indices_layer) 205 | 206 | # Sanity checks. 207 | for i,indices_layer in enumerate(indices): 208 | M = M_last*2**i 209 | # Reduction by 2 at each layer (binary tree). 210 | assert len(indices[0] == M) 211 | # The new ordering does not omit an indice. 212 | assert sorted(indices_layer) == list(range(M)) 213 | 214 | return indices[::-1] 215 | 216 | assert (compute_perm([np.array([4,1,1,2,2,3,0,0,3]),np.array([2,1,0,1,0])]) 217 | == [[3,4,0,9,1,2,5,8,6,7,10,11],[2,4,1,3,0,5],[0,1,2]]) 218 | 219 | def perm_data(x, indices): 220 | """ 221 | Permute data matrix, i.e. exchange node ids, 222 | so that binary unions form the clustering tree. 223 | """ 224 | if indices is None: 225 | return x 226 | 227 | N, M = x.shape 228 | Mnew = len(indices) 229 | assert Mnew >= M 230 | xnew = np.empty((N, Mnew)) 231 | for i,j in enumerate(indices): 232 | # Existing vertex, i.e. real data. 233 | if j < M: 234 | xnew[:,i] = x[:,j] 235 | # Fake vertex because of singeltons. 236 | # They will stay 0 so that max pooling chooses the singelton. 237 | # Or -infty ? 238 | else: 239 | xnew[:,i] = np.zeros(N) 240 | return xnew 241 | 242 | def perm_adjacency(A, indices): 243 | """ 244 | Permute adjacency matrix, i.e. exchange node ids, 245 | so that binary unions form the clustering tree. 246 | """ 247 | if indices is None: 248 | return A 249 | 250 | M, M = A.shape 251 | Mnew = len(indices) 252 | assert Mnew >= M 253 | A = A.tocoo() 254 | 255 | # Add Mnew - M isolated vertices. 256 | if Mnew > M: 257 | rows = scipy.sparse.coo_matrix((Mnew-M, M), dtype=np.float32) 258 | cols = scipy.sparse.coo_matrix((Mnew, Mnew-M), dtype=np.float32) 259 | A = scipy.sparse.vstack([A, rows]) 260 | A = scipy.sparse.hstack([A, cols]) 261 | 262 | # Permute the rows and the columns. 263 | perm = np.argsort(indices) 264 | A.row = np.array(perm)[A.row] 265 | A.col = np.array(perm)[A.col] 266 | 267 | # assert np.abs(A - A.T).mean() < 1e-9 268 | assert type(A) is scipy.sparse.coo.coo_matrix 269 | return A 270 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import gensim 2 | import sklearn, sklearn.datasets 3 | import sklearn.naive_bayes, sklearn.linear_model, sklearn.svm, sklearn.neighbors, sklearn.ensemble 4 | import matplotlib.pyplot as plt 5 | import scipy.sparse 6 | import numpy as np 7 | import time, re 8 | 9 | 10 | # Helpers to process text documents. 11 | 12 | 13 | class TextDataset(object): 14 | def clean_text(self, num='substitute'): 15 | # TODO: stemming, lemmatisation 16 | for i,doc in enumerate(self.documents): 17 | # Digits. 18 | if num is 'spell': 19 | doc = doc.replace('0', ' zero ') 20 | doc = doc.replace('1', ' one ') 21 | doc = doc.replace('2', ' two ') 22 | doc = doc.replace('3', ' three ') 23 | doc = doc.replace('4', ' four ') 24 | doc = doc.replace('5', ' five ') 25 | doc = doc.replace('6', ' six ') 26 | doc = doc.replace('7', ' seven ') 27 | doc = doc.replace('8', ' eight ') 28 | doc = doc.replace('9', ' nine ') 29 | elif num is 'substitute': 30 | # All numbers are equal. Useful for embedding (countable words) ? 31 | doc = re.sub('(\\d+)', ' NUM ', doc) 32 | elif num is 'remove': 33 | # Numbers are uninformative (they are all over the place). Useful for bag-of-words ? 34 | # But maybe some kind of documents contain more numbers, e.g. finance. 35 | # Some documents are indeed full of numbers. At least in 20NEWS. 36 | doc = re.sub('[0-9]', ' ', doc) 37 | # Remove everything except a-z characters and single space. 38 | doc = doc.replace('$', ' dollar ') 39 | doc = doc.lower() 40 | doc = re.sub('[^a-z]', ' ', doc) 41 | doc = ' '.join(doc.split()) # same as doc = re.sub('\s{2,}', ' ', doc) 42 | self.documents[i] = doc 43 | 44 | def vectorize(self, **params): 45 | # TODO: count or tf-idf. Or in normalize ? 46 | vectorizer = sklearn.feature_extraction.text.CountVectorizer(**params) 47 | self.data = vectorizer.fit_transform(self.documents) 48 | self.vocab = vectorizer.get_feature_names() 49 | assert len(self.vocab) == self.data.shape[1] 50 | 51 | def data_info(self, show_classes=False): 52 | N, M = self.data.shape 53 | sparsity = self.data.nnz / N / M * 100 54 | print('N = {} documents, M = {} words, sparsity={:.4f}%'.format(N, M, sparsity)) 55 | if show_classes: 56 | for i in range(len(self.class_names)): 57 | num = sum(self.labels == i) 58 | print(' {:5d} documents in class {:2d} ({})'.format(num, i, self.class_names[i])) 59 | 60 | def show_document(self, i): 61 | label = self.labels[i] 62 | name = self.class_names[label] 63 | try: 64 | text = self.documents[i] 65 | wc = len(text.split()) 66 | except AttributeError: 67 | text = None 68 | wc = 'N/A' 69 | print('document {}: label {} --> {}, {} words'.format(i, label, name, wc)) 70 | try: 71 | vector = self.data[i,:] 72 | for j in range(vector.shape[1]): 73 | if vector[0,j] != 0: 74 | print(' {:.2f} "{}" ({})'.format(vector[0,j], self.vocab[j], j)) 75 | except AttributeError: 76 | pass 77 | return text 78 | 79 | def keep_documents(self, idx): 80 | """Keep the documents given by the index, discard the others.""" 81 | self.documents = [self.documents[i] for i in idx] 82 | self.labels = self.labels[idx] 83 | self.data = self.data[idx,:] 84 | 85 | def keep_words(self, idx): 86 | """Keep the documents given by the index, discard the others.""" 87 | self.data = self.data[:,idx] 88 | self.vocab = [self.vocab[i] for i in idx] 89 | try: 90 | self.embeddings = self.embeddings[idx,:] 91 | except AttributeError: 92 | pass 93 | 94 | def remove_short_documents(self, nwords, vocab='selected'): 95 | """Remove a document if it contains less than nwords.""" 96 | if vocab is 'selected': 97 | # Word count with selected vocabulary. 98 | wc = self.data.sum(axis=1) 99 | wc = np.squeeze(np.asarray(wc)) 100 | elif vocab is 'full': 101 | # Word count with full vocabulary. 102 | wc = np.empty(len(self.documents), dtype=np.int) 103 | for i,doc in enumerate(self.documents): 104 | wc[i] = len(doc.split()) 105 | idx = np.argwhere(wc >= nwords).squeeze() 106 | self.keep_documents(idx) 107 | return wc 108 | 109 | def keep_top_words(self, M, Mprint=20): 110 | """Keep in the vocaluary the M words who appear most often.""" 111 | freq = self.data.sum(axis=0) 112 | freq = np.squeeze(np.asarray(freq)) 113 | idx = np.argsort(freq)[::-1] 114 | idx = idx[:M] 115 | self.keep_words(idx) 116 | print('most frequent words') 117 | for i in range(Mprint): 118 | print(' {:3d}: {:10s} {:6d} counts'.format(i, self.vocab[i], freq[idx][i])) 119 | return freq[idx] 120 | 121 | def normalize(self, norm='l1'): 122 | """Normalize data to unit length.""" 123 | # TODO: TF-IDF. 124 | data = self.data.astype(np.float64) 125 | self.data = sklearn.preprocessing.normalize(data, axis=1, norm=norm) 126 | 127 | def embed(self, filename=None, size=100): 128 | """Embed the vocabulary using pre-trained vectors.""" 129 | if filename: 130 | model = gensim.models.Word2Vec.load_word2vec_format(filename, binary=True) 131 | size = model.vector_size 132 | else: 133 | class Sentences(object): 134 | def __init__(self, documents): 135 | self.documents = documents 136 | def __iter__(self): 137 | for document in self.documents: 138 | yield document.split() 139 | model = gensim.models.Word2Vec(Sentences(self.documents), size) 140 | self.embeddings = np.empty((len(self.vocab), size)) 141 | keep = [] 142 | not_found = 0 143 | for i,word in enumerate(self.vocab): 144 | try: 145 | self.embeddings[i,:] = model[word] 146 | keep.append(i) 147 | except KeyError: 148 | not_found += 1 149 | print('{} words not found in corpus'.format(not_found, i)) 150 | self.keep_words(keep) 151 | 152 | class Text20News(TextDataset): 153 | def __init__(self, **params): 154 | dataset = sklearn.datasets.fetch_20newsgroups(**params) 155 | self.documents = dataset.data 156 | self.labels = dataset.target 157 | self.class_names = dataset.target_names 158 | assert max(self.labels) + 1 == len(self.class_names) 159 | N, C = len(self.documents), len(self.class_names) 160 | print('N = {} documents, C = {} classes'.format(N, C)) 161 | 162 | class TextRCV1(TextDataset): 163 | def __init__(self, **params): 164 | dataset = sklearn.datasets.fetch_rcv1(**params) 165 | self.data = dataset.data 166 | self.target = dataset.target 167 | self.class_names = dataset.target_names 168 | assert len(self.class_names) == 103 # 103 categories according to LYRL2004 169 | N, C = self.target.shape 170 | assert C == len(self.class_names) 171 | print('N = {} documents, C = {} classes'.format(N, C)) 172 | 173 | def remove_classes(self, keep): 174 | ## Construct a lookup table for labels. 175 | labels_row = [] 176 | labels_col = [] 177 | class_lookup = {} 178 | for i,name in enumerate(self.class_names): 179 | class_lookup[name] = i 180 | self.class_names = keep 181 | 182 | # Index of classes to keep. 183 | idx_keep = np.empty(len(keep)) 184 | for i,cat in enumerate(keep): 185 | idx_keep[i] = class_lookup[cat] 186 | self.target = self.target[:,idx_keep] 187 | assert self.target.shape[1] == len(keep) 188 | 189 | def show_doc_per_class(self, print_=False): 190 | """Number of documents per class.""" 191 | docs_per_class = np.array(self.target.astype(np.uint64).sum(axis=0)).squeeze() 192 | print('categories ({} assignments in total)'.format(docs_per_class.sum())) 193 | if print_: 194 | for i,cat in enumerate(self.class_names): 195 | print(' {:5s}: {:6d} documents'.format(cat, docs_per_class[i])) 196 | plt.figure(figsize=(17,5)) 197 | plt.plot(sorted(docs_per_class[::-1]),'.') 198 | 199 | def show_classes_per_doc(self): 200 | """Number of classes per document.""" 201 | classes_per_doc = np.array(self.target.sum(axis=1)).squeeze() 202 | plt.figure(figsize=(17,5)) 203 | plt.plot(sorted(classes_per_doc[::-1]),'.') 204 | 205 | def select_documents(self): 206 | classes_per_doc = np.array(self.target.sum(axis=1)).squeeze() 207 | self.target = self.target[classes_per_doc==1] 208 | self.data = self.data[classes_per_doc==1, :] 209 | 210 | # Convert labels from indicator form to single value. 211 | N, C = self.target.shape 212 | target = self.target.tocoo() 213 | self.labels = target.col 214 | assert self.labels.min() == 0 215 | assert self.labels.max() == C - 1 216 | 217 | # Bruna and Dropout used 2 * 201369 = 402738 documents. Probably the difference btw v1 and v2. 218 | #return classes_per_doc 219 | 220 | ### Helpers to quantify classifier's quality. 221 | 222 | 223 | def baseline(train_data, train_labels, test_data, test_labels, omit=[]): 224 | """Train various classifiers to get a baseline.""" 225 | clf, train_accuracy, test_accuracy, train_f1, test_f1, exec_time = [], [], [], [], [], [] 226 | clf.append(sklearn.neighbors.KNeighborsClassifier(n_neighbors=10)) 227 | clf.append(sklearn.linear_model.LogisticRegression()) 228 | clf.append(sklearn.naive_bayes.BernoulliNB(alpha=.01)) 229 | clf.append(sklearn.ensemble.RandomForestClassifier()) 230 | clf.append(sklearn.naive_bayes.MultinomialNB(alpha=.01)) 231 | clf.append(sklearn.linear_model.RidgeClassifier()) 232 | clf.append(sklearn.svm.LinearSVC()) 233 | for i,c in enumerate(clf): 234 | if i not in omit: 235 | t_start = time.process_time() 236 | c.fit(train_data, train_labels) 237 | train_pred = c.predict(train_data) 238 | test_pred = c.predict(test_data) 239 | train_accuracy.append('{:5.2f}'.format(100*sklearn.metrics.accuracy_score(train_labels, train_pred))) 240 | test_accuracy.append('{:5.2f}'.format(100*sklearn.metrics.accuracy_score(test_labels, test_pred))) 241 | train_f1.append('{:5.2f}'.format(100*sklearn.metrics.f1_score(train_labels, train_pred, average='weighted'))) 242 | test_f1.append('{:5.2f}'.format(100*sklearn.metrics.f1_score(test_labels, test_pred, average='weighted'))) 243 | exec_time.append('{:5.2f}'.format(time.process_time() - t_start)) 244 | print('Train accuracy: {}'.format(' '.join(train_accuracy))) 245 | print('Test accuracy: {}'.format(' '.join(test_accuracy))) 246 | print('Train F1 (weighted): {}'.format(' '.join(train_f1))) 247 | print('Test F1 (weighted): {}'.format(' '.join(test_f1))) 248 | print('Execution time: {}'.format(' '.join(exec_time))) 249 | 250 | def grid_search(params, grid_params, train_data, train_labels, val_data, 251 | val_labels, test_data, test_labels, model): 252 | """Explore the hyper-parameter space with an exhaustive grid search.""" 253 | params = params.copy() 254 | train_accuracy, test_accuracy, train_f1, test_f1 = [], [], [], [] 255 | grid = sklearn.grid_search.ParameterGrid(grid_params) 256 | print('grid search: {} combinations to evaluate'.format(len(grid))) 257 | for grid_params in grid: 258 | params.update(grid_params) 259 | name = '{}'.format(grid) 260 | print('\n\n {} \n\n'.format(grid_params)) 261 | m = model(params) 262 | m.fit(train_data, train_labels, val_data, val_labels) 263 | string, accuracy, f1, loss = m.evaluate(train_data, train_labels) 264 | train_accuracy.append('{:5.2f}'.format(accuracy)); train_f1.append('{:5.2f}'.format(f1)) 265 | print('train {}'.format(string)) 266 | string, accuracy, f1, loss = m.evaluate(test_data, test_labels) 267 | test_accuracy.append('{:5.2f}'.format(accuracy)); test_f1.append('{:5.2f}'.format(f1)) 268 | print('test {}'.format(string)) 269 | print('\n\n') 270 | print('Train accuracy: {}'.format(' '.join(train_accuracy))) 271 | print('Test accuracy: {}'.format(' '.join(test_accuracy))) 272 | print('Train F1 (weighted): {}'.format(' '.join(train_f1))) 273 | print('Test F1 (weighted): {}'.format(' '.join(test_f1))) 274 | for i,grid_params in enumerate(grid): 275 | print('{} --> {} {} {} {}'.format(grid_params, train_accuracy[i], test_accuracy[i], train_f1[i], test_f1[i])) 276 | 277 | 278 | class model_perf(object): 279 | 280 | def __init__(s): 281 | s.names, s.params = set(), {} 282 | s.fit_accuracies, s.fit_losses, s.fit_time = {}, {}, {} 283 | s.train_accuracy, s.train_f1, s.train_loss = {}, {}, {} 284 | s.test_accuracy, s.test_f1, s.test_loss = {}, {}, {} 285 | 286 | def test(s, model, name, params, train_data, train_labels, val_data, val_labels, test_data, test_labels): 287 | s.params[name] = params 288 | s.fit_accuracies[name], s.fit_losses[name], s.fit_time[name] = \ 289 | model.fit(train_data, train_labels, val_data, val_labels) 290 | string, s.train_accuracy[name], s.train_f1[name], s.train_loss[name] = \ 291 | model.evaluate(train_data, train_labels) 292 | print('train {}'.format(string)) 293 | string, s.test_accuracy[name], s.test_f1[name], s.test_loss[name] = \ 294 | model.evaluate(test_data, test_labels) 295 | print('test {}'.format(string)) 296 | s.names.add(name) 297 | 298 | def show(s, fontsize=None): 299 | if fontsize: 300 | plt.rc('pdf', fonttype=42) 301 | plt.rc('ps', fonttype=42) 302 | plt.rc('font', size=fontsize) # controls default text sizes 303 | plt.rc('axes', titlesize=fontsize) # fontsize of the axes title 304 | plt.rc('axes', labelsize=fontsize) # fontsize of the x any y labels 305 | plt.rc('xtick', labelsize=fontsize) # fontsize of the tick labels 306 | plt.rc('ytick', labelsize=fontsize) # fontsize of the tick labels 307 | plt.rc('legend', fontsize=fontsize) # legend fontsize 308 | plt.rc('figure', titlesize=fontsize) # size of the figure title 309 | print(' accuracy F1 loss time [ms] name') 310 | print('test train test train test train') 311 | for name in sorted(s.names): 312 | print('{:5.2f} {:5.2f} {:5.2f} {:5.2f} {:.2e} {:.2e} {:3.0f} {}'.format( 313 | s.test_accuracy[name], s.train_accuracy[name], 314 | s.test_f1[name], s.train_f1[name], 315 | s.test_loss[name], s.train_loss[name], s.fit_time[name]*1000, name)) 316 | 317 | fig, ax = plt.subplots(1, 2, figsize=(15, 5)) 318 | for name in sorted(s.names): 319 | steps = np.arange(len(s.fit_accuracies[name])) + 1 320 | steps *= s.params[name]['eval_frequency'] 321 | ax[0].plot(steps, s.fit_accuracies[name], '.-', label=name) 322 | ax[1].plot(steps, s.fit_losses[name], '.-', label=name) 323 | ax[0].set_xlim(min(steps), max(steps)) 324 | ax[1].set_xlim(min(steps), max(steps)) 325 | ax[0].set_xlabel('step') 326 | ax[1].set_xlabel('step') 327 | ax[0].set_ylabel('validation accuracy') 328 | ax[1].set_ylabel('training loss') 329 | ax[0].legend(loc='lower right') 330 | ax[1].legend(loc='upper right') 331 | #fig.savefig('training.pdf') 332 | -------------------------------------------------------------------------------- /CayleyNet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "import tensorflow as tf\n", 14 | "import time, shutil\n", 15 | "import numpy as np\n", 16 | "import os, collections, sklearn\n", 17 | "import joblib\n", 18 | "\n", 19 | "import graph, coarsening\n", 20 | "import scipy.sparse as sp\n", 21 | "\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "%matplotlib inline" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "deletable": true, 30 | "editable": true 31 | }, 32 | "source": [ 33 | "# Graph definition and coarsening" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": true, 41 | "deletable": true, 42 | "editable": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "#Definition of some flags useful later in the code\n", 47 | "\n", 48 | "flags = tf.app.flags\n", 49 | "FLAGS = flags.FLAGS\n", 50 | "\n", 51 | "# Graphs.\n", 52 | "flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')\n", 53 | "flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).')\n", 54 | "flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')\n", 55 | "flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')\n", 56 | "\n", 57 | "# Directories.\n", 58 | "flags.DEFINE_string('dir_data', 'data_mnist', 'Directory to store data.')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "collapsed": false, 66 | "deletable": true, 67 | "editable": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "#Here we proceed at computing the original grid where the images live and the various coarsening that are applied\n", 72 | "#for each level\n", 73 | "\n", 74 | "def grid_graph(m):\n", 75 | " z = graph.grid(m)\n", 76 | " dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric) \n", 77 | " #dist contains the distance of the 8 nearest neighbors for each node sorted in ascending order\n", 78 | " #idx contains the indexes of the 8 nearest for each node sorted in ascending order by distance\n", 79 | "\n", 80 | " A = graph.adjacency(dist, idx)\n", 81 | " return A\n", 82 | "\n", 83 | "def coarsen(A, levels):\n", 84 | " graphs, parents = coarsening.metis(A, levels) #Coarsen a graph multiple times using the METIS algorithm. \n", 85 | " #Everything starts with a random point and then decides how to \n", 86 | " #combine the points.\n", 87 | " #Construction is done a priori, so we have one graph\n", 88 | " #for all the samples!\n", 89 | " \n", 90 | " #graphs = list of spare adjacency matrices (it contains in position \n", 91 | " # 0 the original graph)\n", 92 | " #parents = list of numpy arrays (every array in position i contains \n", 93 | " # the mapping from graph i to graph i+1, i.e. the idx of\n", 94 | " # node i in the coarsed graph) \n", 95 | " perms = coarsening.compute_perm(parents) #Return a list of indices to reorder the adjacency and data matrices so\n", 96 | " #that the union of two neighbors from layer to layer forms a binary tree.\n", 97 | " #Fake nodes are appended at the end of the current graph\n", 98 | " laplacians = []\n", 99 | " for i,A in enumerate(graphs):\n", 100 | " M, M = A.shape\n", 101 | "\n", 102 | " # We remove any possible self-connection.\n", 103 | " A = A.tocoo()\n", 104 | " A.setdiag(0)\n", 105 | "\n", 106 | " if i < levels: #if we have to pool the graph \n", 107 | " A = coarsening.perm_adjacency(A, perms[i]) #matrix A is here extended with the fakes nodes\n", 108 | " #in order to do an efficient pooling operation\n", 109 | " #in tensorflow as it was a 1D pooling\n", 110 | "\n", 111 | " A = A.tocsr()\n", 112 | " A.eliminate_zeros()\n", 113 | " Mnew, Mnew = A.shape\n", 114 | " print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))\n", 115 | "\n", 116 | " L = graph.laplacian(A, normalized=FLAGS.normalized_laplacian)\n", 117 | " laplacians.append(L)\n", 118 | " return laplacians, perms[0] if len(perms) > 0 else None\n", 119 | "\n", 120 | "t_start = time.time()\n", 121 | "\n", 122 | "np.random.seed(0)\n", 123 | "A = grid_graph(28)\n", 124 | "L, perm = coarsen(A, FLAGS.coarsening_levels)\n", 125 | "\n", 126 | "print('Execution time: {:.2f}s'.format(time.time() - t_start))\n", 127 | "\n", 128 | "graph.plot_spectrum(L)\n", 129 | "del A" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": false, 137 | "deletable": true, 138 | "editable": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "#Normalize Laplacian\n", 143 | "L_norm = []\n", 144 | "for k in range(len(L)):\n", 145 | " L_norm.append(L[k] - sp.eye(L[k].shape[0]))\n", 146 | "graph.plot_spectrum(L_norm, ymin=-1)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": { 152 | "deletable": true, 153 | "editable": true 154 | }, 155 | "source": [ 156 | "# Data loading" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "collapsed": false, 164 | "deletable": true, 165 | "editable": true 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "#loading of MNIST dataset\n", 170 | "\n", 171 | "from tensorflow.examples.tutorials.mnist import input_data\n", 172 | "mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False)\n", 173 | "\n", 174 | "train_data = mnist.train.images.astype(np.float32)\n", 175 | "val_data = mnist.validation.images.astype(np.float32) #the first 5K samples of the training dataset \n", 176 | " #are used for validation\n", 177 | "test_data = mnist.test.images.astype(np.float32)\n", 178 | "train_labels = mnist.train.labels\n", 179 | "val_labels = mnist.validation.labels\n", 180 | "test_labels = mnist.test.labels\n", 181 | "\n", 182 | "t_start = time.time()\n", 183 | "train_data = coarsening.perm_data(train_data, perm)\n", 184 | "val_data = coarsening.perm_data(val_data, perm)\n", 185 | "test_data = coarsening.perm_data(test_data, perm)\n", 186 | "print('Execution time: {:.2f}s'.format(time.time() - t_start))\n", 187 | "del perm" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": { 193 | "deletable": true, 194 | "editable": true 195 | }, 196 | "source": [ 197 | "# Model definition" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": true, 205 | "deletable": true, 206 | "editable": true 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "class CayleyNet:\n", 211 | " \"\"\"\n", 212 | " The neural network model.\n", 213 | " \"\"\"\n", 214 | " \n", 215 | " #Helper functions used for constructing the model\n", 216 | " def _weight_variable(self, shape, regularization=True, name=\"\"): \n", 217 | " \"\"\"Initializer for the weights\"\"\"\n", 218 | " \n", 219 | " initial = tf.truncated_normal_initializer(0, 0.1)\n", 220 | " var = tf.get_variable('weights'+name, shape, tf.float32, initializer=initial)\n", 221 | " if regularization: #append the loss of the current variable to the regularization term \n", 222 | " self.regularizers.append(tf.nn.l2_loss(var))\n", 223 | " return var\n", 224 | " \n", 225 | " def _bias_variable(self, shape, regularization=True):\n", 226 | " \"\"\"Initializer for the bias\"\"\"\n", 227 | " \n", 228 | " initial = tf.constant_initializer(0.1)\n", 229 | " var = tf.get_variable('bias', shape, tf.float32, initializer=initial)\n", 230 | " if regularization:\n", 231 | " self.regularizers.append(tf.nn.l2_loss(var))\n", 232 | " return var\n", 233 | " \n", 234 | " def _h_variable(self, shape, regularization=False, name=''):\n", 235 | " \"\"\"Initializer for the zoom parameter h\"\"\"\n", 236 | " \n", 237 | " initial = tf.random_uniform_initializer()\n", 238 | " var = tf.get_variable('h'+name, shape, tf.float32, initializer=initial)\n", 239 | " if regularization:\n", 240 | " self.regularizers.append(tf.nn.l2_loss(var))\n", 241 | " return var\n", 242 | "\n", 243 | " def frobenius_norm(self, tensor): \n", 244 | " \"\"\"Computes the frobenius norm for a given laplacian\"\"\"\n", 245 | " \n", 246 | " square_tensor = tf.square(tensor)\n", 247 | " tensor_sum = tf.reduce_sum(square_tensor)\n", 248 | " frobenius_norm = tf.sqrt(tensor_sum)\n", 249 | " return frobenius_norm\n", 250 | " \n", 251 | " def compute_sparse_D_inv_indices(self, M):\n", 252 | " \"\"\"Computes the indices required for constructing a sparse version of D^-1.\"\"\"\n", 253 | " \n", 254 | " idx_main_diag = np.tile(np.expand_dims(np.arange(0, 2*M),1), [1, 2])\n", 255 | " idx_diag_ur = np.concatenate([np.expand_dims(np.arange(0, M),1), np.expand_dims(np.arange(0, M)+M,1)], 1)\n", 256 | " idx_diag_ll = np.concatenate([np.expand_dims(np.arange(0, M)+M,1), np.expand_dims(np.arange(0, M),1)], 1)\n", 257 | " idx = np.concatenate([idx_main_diag, idx_diag_ur, idx_diag_ll], 0)\n", 258 | " return idx \n", 259 | " \n", 260 | " def compute_sparse_R_indices(self, L_off_diag, M):\n", 261 | " \"\"\"Computes the indices required for constructing a sparse version of R.\"\"\"\n", 262 | " \n", 263 | " idx_L = np.asarray(np.where(L_off_diag)).T\n", 264 | " idx_L_sh = idx_L + np.expand_dims(np.asarray([M,M]),0)\n", 265 | " idx = np.concatenate([idx_L, idx_L_sh])\n", 266 | " return idx\n", 267 | " \n", 268 | " def compute_sparse_numerator_projection_indices(self, L, M):\n", 269 | " \"\"\"Computes the indices required for constructing the numerator projection sparse matrix.\"\"\"\n", 270 | " \n", 271 | " idx_L = np.asarray(np.where(L)).T\n", 272 | " idx_L_sh = idx_L + np.expand_dims(np.asarray([M,M]),0)\n", 273 | " idx_diag_ur = np.concatenate([np.expand_dims(np.arange(0, M),1), np.expand_dims(np.arange(0, M)+M,1)], 1)\n", 274 | " idx_diag_ll = np.concatenate([np.expand_dims(np.arange(0, M)+M,1), np.expand_dims(np.arange(0, M),1)], 1)\n", 275 | " idx = np.concatenate([idx_L, idx_L_sh, idx_diag_ur, idx_diag_ll])\n", 276 | " return idx\n", 277 | " \n", 278 | " def cayleyConv(self, x, L_np, Fout, K): \n", 279 | " \"\"\"Applies chebyshev polynomials over the graph.\"\"\"\n", 280 | " \n", 281 | " M, Fin = x.get_shape()[1:] # M the number of samples in the images, Fin the number of features\n", 282 | " M, Fin = int(M), int(Fin)\n", 283 | " N = tf.shape(x)[0] # N is the number of images\n", 284 | " \n", 285 | " # Applies cayley transform by means of Jacobi method.\n", 286 | " diag_L_np = np.diag(L_np) # vector containing the diagonal of L\n", 287 | " L_off_diag_np = L_np - np.diag(diag_L_np) # off-diagonal entries of L \n", 288 | " \n", 289 | " list_x_pos_exp = [tf.cast(tf.expand_dims(x,0), 'complex64')] # 1 x N x M x F\n", 290 | " \n", 291 | " for iii in range(self.n_h): # for every zoom parameter we want to use (typically one).\n", 292 | " h = self._h_variable([1,1], regularization=False, name='_h%f' % iii)\n", 293 | " self.list_h.append(h)\n", 294 | " \n", 295 | " # Computes matrices required by Jacobi (https://en.wikipedia.org/wiki/Jacobi_method)\n", 296 | " \n", 297 | " # To make things more efficient we reprent a complex vector of shape M as real vector of shape 2*M\n", 298 | " # where the first M values represent real coefficients while the second M the imaginary ones.\n", 299 | " # All the matrices here defined are computed according to such notation (it allows to use sparse matrices\n", 300 | " # with TF with complex values).\n", 301 | " \n", 302 | " # ************************** COMPUTES numerator projection **************************\n", 303 | " idx = self.compute_sparse_numerator_projection_indices(L_np, M)\n", 304 | " \n", 305 | " vals_L = tf.squeeze(h*L_np[np.where(L_np)])\n", 306 | " vals = tf.concat([vals_L, vals_L, tf.ones([M,]), -tf.ones([M,])], 0)\n", 307 | " \n", 308 | " cayley_op_neg_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n", 309 | " cayley_op_neg_sp = tf.sparse_reorder(cayley_op_neg_sp)\n", 310 | " \n", 311 | " # ************************** COMPUTES D **************************\n", 312 | " D_real = tf.squeeze(h*diag_L_np)\n", 313 | " D = tf.complex(D_real, tf.ones_like(D_real))\n", 314 | " D_inv = tf.pow(D, -tf.ones_like(D)) # vector of M elements <- diagonal of D^-1\n", 315 | " \n", 316 | " idx = self.compute_sparse_D_inv_indices(M)\n", 317 | " vals = tf.concat([tf.real(D_inv), tf.real(D_inv), -tf.imag(D_inv), tf.imag(D_inv)], 0)\n", 318 | " \n", 319 | " D_inv_ext_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n", 320 | " D_inv_ext_sp = tf.sparse_reorder(D_inv_ext_sp)\n", 321 | " \n", 322 | " # ************************** COMPUTES R **************************\n", 323 | " idx = self.compute_sparse_R_indices(L_off_diag_np, M)\n", 324 | " \n", 325 | " vals_L = tf.squeeze(h*L_off_diag_np[np.where(L_off_diag_np)])\n", 326 | " vals = tf.concat([vals_L, vals_L], 0)\n", 327 | " \n", 328 | " R_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n", 329 | " R_sp = tf.sparse_reorder(R_sp)\n", 330 | " \n", 331 | " # Applies Jacobi method\n", 332 | " c_transform = tf.transpose(x, [1,0,2]) # shape = M, N, F\n", 333 | " c_transform = tf.reshape(c_transform, [M, -1]) # shape = M, N*F\n", 334 | " last_sol = tf.concat([c_transform, tf.zeros_like(c_transform)],0)\n", 335 | " for k in range(K): # for every order of our polynomial\n", 336 | " \n", 337 | " # Jacobi initialization\n", 338 | " b = tf.sparse_tensor_dense_matmul(cayley_op_neg_sp, last_sol) # shape = M, N*F\n", 339 | " a = tf.sparse_tensor_dense_matmul(D_inv_ext_sp, b) # shape = M, N*F\n", 340 | " \n", 341 | " # Jacobi iterations\n", 342 | " cond = lambda i, _: tf.less(i, self.num_jacobi_iter)\n", 343 | " body = lambda i, c_sol: [tf.add(i, 1), a - tf.sparse_tensor_dense_matmul(D_inv_ext_sp, \n", 344 | " tf.sparse_tensor_dense_matmul(R_sp, c_sol))]\n", 345 | " \n", 346 | " c_sol = tf.while_loop(cond, body, [0, a], parallel_iterations=1, swap_memory=True)\n", 347 | " c_sol = c_sol[-1]\n", 348 | " \n", 349 | " # Constructs and saves the final complex matrices\n", 350 | " c_sol_complex = tf.complex(c_sol[:M,:], c_sol[M:, :]) #M x N*F\n", 351 | " c_sol_reshaped = tf.reshape(c_sol_complex, [M, -1, Fin])\n", 352 | " c_sol_reshaped = tf.transpose(c_sol_reshaped, [1, 0, 2]) #N x M x F\n", 353 | " list_x_pos_exp.append(tf.expand_dims(c_sol_reshaped,0)) #1 x N x M x Flist_x_pos_exp\n", 354 | " \n", 355 | " last_sol = c_sol\n", 356 | " x_pos_exp = tf.concat(list_x_pos_exp, 0) # shape = n_h*K x N x M x Fin\n", 357 | " x_pos_exp = tf.transpose(x_pos_exp, [1,2,0,3]) #N x M x n_h*K x Fin\n", 358 | " x_pos_exp = tf.reshape(x_pos_exp, [N*M, -1]) #N*M x 2*K*Fin\n", 359 | " \n", 360 | " real_conv_weights = self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_real')#tf.ones([Fin*(self.n_h*K+1), Fout])#self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_real')\n", 361 | " imag_conv_weights = self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_imag')#tf.ones([Fin*(self.n_h*K+1), Fout])#self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_imag')\n", 362 | " \n", 363 | " W_pos_exp = tf.complex(real_conv_weights, -imag_conv_weights)\n", 364 | " \n", 365 | " x_pos_exp_filt = tf.matmul(x_pos_exp, W_pos_exp)\n", 366 | " \n", 367 | " x_filt = 2*tf.real(x_pos_exp_filt)\n", 368 | " return tf.reshape(x_filt, [N, M, Fout])\n", 369 | "\n", 370 | "\n", 371 | " def b1relu(self, x): #sums a bias and applies relu\n", 372 | " \"\"\"Bias and ReLU. One bias per filter.\"\"\"\n", 373 | " N, M, F = x.get_shape()\n", 374 | " b = self._bias_variable([1, 1, int(F)], regularization=False)\n", 375 | " return tf.nn.relu(x + b) #add the bias to the convolutive layer\n", 376 | "\n", 377 | "\n", 378 | " def mpool1(self, x, p): #efficient pooling realized thanks to the reordering of the laplacians we have done a priori\n", 379 | " \"\"\"Max pooling of size p. Should be a power of 2.\"\"\"\n", 380 | " if p > 1:\n", 381 | " x = tf.expand_dims(x, 3) # N x M x F x 1\n", 382 | " x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')\n", 383 | " return tf.squeeze(x, [3]) # N x M/p x F\n", 384 | " else:\n", 385 | " return x\n", 386 | " \n", 387 | "\n", 388 | " def b1relu(self, x): #sums a bias and applies relu\n", 389 | " \"\"\"Bias and ReLU. One bias per filter.\"\"\"\n", 390 | " N, M, F = x.get_shape()\n", 391 | " b = self._bias_variable([1, 1, int(F)], regularization=False)\n", 392 | " return tf.nn.relu(x + b) #add the bias to the convolutive layer\n", 393 | "\n", 394 | "\n", 395 | " def mpool1(self, x, p): #efficient pooling realized thanks to the reordering of the laplacians we have done a priori\n", 396 | " \"\"\"Max pooling of size p. Should be a power of 2.\"\"\"\n", 397 | " if p > 1:\n", 398 | " x = tf.expand_dims(x, 3) # N x M x F x 1\n", 399 | " x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')\n", 400 | " return tf.squeeze(x, [3]) # N x M/p x F\n", 401 | " else:\n", 402 | " return x\n", 403 | "\n", 404 | " def fc(self, x, Mout, relu=True):\n", 405 | " \"\"\"Fully connected layer with Mout features.\"\"\"\n", 406 | " N, Min = x.get_shape()\n", 407 | " W = self._weight_variable([int(Min), Mout], regularization=True)\n", 408 | " b = self._bias_variable([Mout], regularization=True)\n", 409 | " x = tf.matmul(x, W) + b\n", 410 | " return tf.nn.relu(x) if relu else x\n", 411 | " \n", 412 | " #function used for extracting the result of our model\n", 413 | " def _inference(self, x, dropout): #definition of the model\n", 414 | " \n", 415 | " # Graph convolutional layers.\n", 416 | " x = tf.expand_dims(x, 2) # N x M x F=1\n", 417 | " j = 0\n", 418 | " self.list_h = list()\n", 419 | " for i in range(len(self.p)):\n", 420 | " with tf.variable_scope('cgconv{}'.format(i+1)):\n", 421 | " with tf.name_scope('filter'):\n", 422 | " x = self.cayleyConv(x, self.L_np[i*2], self.F[i], self.K[i])\n", 423 | " if (i==0):\n", 424 | " self.debug = x\n", 425 | " with tf.name_scope('bias_relu'):\n", 426 | " x = self.b1relu(tf.cast(tf.real(x), 'float32'))\n", 427 | " with tf.name_scope('pooling'):\n", 428 | " x = self.mpool1(x, self.p[i])\n", 429 | " \n", 430 | " j += int(np.log2(self.p[i])) if self.p[i] > 1 else 0\n", 431 | " \n", 432 | " # Fully connected hidden layers.\n", 433 | " _, M, F = x.get_shape()\n", 434 | " x = tf.reshape(x, [-1, int(M*F)]) # N x M\n", 435 | " for i,M in enumerate(self.M[:-1]): #apply a fully connected layer for each layer defined in M\n", 436 | " #(we discard the last value in M since it contains the number of classes we have\n", 437 | " #to predict)\n", 438 | " with tf.variable_scope('fc{}'.format(i+1)):\n", 439 | " x = self.fc(x, M)\n", 440 | " x = tf.nn.dropout(x, dropout)\n", 441 | " \n", 442 | " # Logits linear layer, i.e. softmax without normalization.\n", 443 | " with tf.variable_scope('logits'):\n", 444 | " x = self.fc(x, self.M[-1], relu=False)\n", 445 | " return x\n", 446 | " \n", 447 | " def __init__(self, p, K, F, M, M_0, batch_size, num_jacobi_iter, L,\n", 448 | " decay_steps, decay_rate, learning_rate=1e-4, momentum=0.9, regularization=5e-4, clip_norm=1e1,\n", 449 | " idx_gpu = '/gpu:0'):\n", 450 | " self.regularizers = list() #list of regularization l2 loss for multiple variables\n", 451 | " self.n_h = 1\n", 452 | " self.num_jacobi_iter = num_jacobi_iter\n", 453 | " self.p = p #dimensions of the pooling layers\n", 454 | " self.K = K #List of polynomial orders, i.e. filter sizes or number of hops\n", 455 | " self.F = F #Number of features of convolutional layers\n", 456 | " \n", 457 | " self.M = M #Number of neurons in fully connected layers\n", 458 | " \n", 459 | " self.M_0 = M_0 #number of elements in the first graph \n", 460 | " \n", 461 | " self.batch_size = batch_size\n", 462 | " \n", 463 | " #definition of some learning parameters\n", 464 | " self.decay_steps = decay_steps\n", 465 | " self.decay_rate = decay_rate\n", 466 | " self.learning_rate = learning_rate\n", 467 | " self.regularization = regularization\n", 468 | " \n", 469 | " with tf.Graph().as_default() as g:\n", 470 | " self.graph = g\n", 471 | " tf.set_random_seed(0)\n", 472 | " with tf.device(idx_gpu):\n", 473 | " #definition of placeholders\n", 474 | " self.L_np = [c_L.toarray().astype('float32') for c_L in L]\n", 475 | " self.ph_data = tf.placeholder(tf.float32, (self.batch_size, M_0), 'data')\n", 476 | " self.ph_labels = tf.placeholder(tf.int32, (self.batch_size), 'labels')\n", 477 | " self.ph_dropout = tf.placeholder(tf.float32, (), 'dropout')\n", 478 | " \n", 479 | " #Model construction\n", 480 | " self.logits = self._inference(self.ph_data, self.ph_dropout)\n", 481 | " \n", 482 | " #Definition of the loss function\n", 483 | " with tf.name_scope('loss'):\n", 484 | " self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.ph_labels)\n", 485 | " self.cross_entropy = tf.reduce_mean(self.cross_entropy)\n", 486 | " with tf.name_scope('regularization'):\n", 487 | " self.regularization *= tf.add_n(self.regularizers)\n", 488 | " self.loss = self.cross_entropy + self.regularization\n", 489 | " \n", 490 | " #Solver Definition\n", 491 | " with tf.name_scope('training'):\n", 492 | " # Learning rate.\n", 493 | " global_step = tf.Variable(0, name='global_step', trainable=False) #used for counting how many iterations we have done\n", 494 | " if decay_rate != 1: #applies an exponential decay of the lr wrt the number of iterations done\n", 495 | " learning_rate = tf.train.exponential_decay(\n", 496 | " learning_rate, global_step, decay_steps, decay_rate, staircase=True)\n", 497 | " # Optimizer.\n", 498 | " if momentum == 0:\n", 499 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n", 500 | " else: #applies momentum for increasing the robustness of the gradient \n", 501 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 502 | " #grads = optimizer.compute_gradients(self.loss)\n", 503 | " tvars = tf.trainable_variables()\n", 504 | " #grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clip_norm)\n", 505 | " grads, variables = zip(*optimizer.compute_gradients(self.loss))\n", 506 | " grads, _ = tf.clip_by_global_norm(grads, clip_norm)\n", 507 | " self.op_gradients = optimizer.apply_gradients(zip(grads, variables), \n", 508 | " global_step=global_step)\n", 509 | " \n", 510 | " #Computation of the norm gradients (useful for debugging)\n", 511 | " self.var_grad = tf.gradients(self.loss, tf.trainable_variables())\n", 512 | " self.norm_grad = self.frobenius_norm(tf.concat([tf.reshape(g, [-1]) for g in self.var_grad], 0))\n", 513 | "\n", 514 | " #Extraction of the predictions and computation of accuracy\n", 515 | " self.predictions = tf.cast(tf.argmax(self.logits, dimension=1), tf.int32)\n", 516 | " self.accuracy = 100 * tf.contrib.metrics.accuracy(self.predictions, self.ph_labels)\n", 517 | " \n", 518 | " # Create a session for running Ops on the Graph.\n", 519 | " config = tf.ConfigProto(allow_soft_placement = True)\n", 520 | " config.gpu_options.allow_growth = True\n", 521 | " self.session = tf.Session(config=config)\n", 522 | "\n", 523 | " # Run the Op to initialize the variables.\n", 524 | " init = tf.global_variables_initializer()\n", 525 | " self.session.run(init)" 526 | ] 527 | }, 528 | { 529 | "cell_type": "markdown", 530 | "metadata": { 531 | "deletable": true, 532 | "editable": true 533 | }, 534 | "source": [ 535 | "# Training & testing" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "metadata": { 542 | "collapsed": true, 543 | "deletable": true, 544 | "editable": true 545 | }, 546 | "outputs": [], 547 | "source": [ 548 | "#Convolutional parameters\n", 549 | "p = [4, 4] # Dimensions of the pooling layers\n", 550 | "K = [12, 12] # List of polynomial orders, i.e. filter sizes or number of hops\n", 551 | "F = [32, 64] # Number of features of convolutional layers\n", 552 | "\n", 553 | "#FC parameters\n", 554 | "C = max(train_labels) + 1 # Number of classes we have\n", 555 | "M = [512, C] # Number of neurons in fully connected layers\n", 556 | "\n", 557 | "#Solver parameters\n", 558 | "batch_size = 100\n", 559 | "decay_steps = train_data.shape[0] / batch_size # number of steps to do before decreasing the learning rate\n", 560 | "decay_rate = 0.95\n", 561 | "learning_rate = 0.01\n", 562 | "momentum = 0.9\n", 563 | "regularization = 5e-4\n", 564 | "\n", 565 | "# Definition of keep probabilities for dropout layers\n", 566 | "dropout_training = 0.5\n", 567 | "dropout_val_test = 1.0\n", 568 | "\n", 569 | "num_jacobi_iter = 10" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "metadata": { 576 | "collapsed": false, 577 | "deletable": true, 578 | "editable": true 579 | }, 580 | "outputs": [], 581 | "source": [ 582 | "# Construction of the learning obj\n", 583 | "M_0 = L[0].shape[0] # number of elements in the first graph\n", 584 | "learning_obj = CayleyNet(p, K, F, M, M_0, batch_size, num_jacobi_iter, L,\n", 585 | " decay_steps, decay_rate,\n", 586 | " learning_rate=learning_rate, regularization=regularization,\n", 587 | " momentum=momentum)#, clip_norm=100)\n", 588 | "\n", 589 | "# definition of overall number of training iterations and validation frequency\n", 590 | "num_iter_val = 600\n", 591 | "num_total_iter_training = 21000\n", 592 | "\n", 593 | "num_iter = 0\n", 594 | "\n", 595 | "list_training_loss = list()\n", 596 | "list_training_norm_grad = list()\n", 597 | "list_val_accuracy = list()" 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": null, 603 | "metadata": { 604 | "collapsed": false, 605 | "deletable": true, 606 | "editable": true, 607 | "scrolled": true 608 | }, 609 | "outputs": [], 610 | "source": [ 611 | "#training and validation\n", 612 | "indices = collections.deque() # queue containing a permutation of the training indexes\n", 613 | "for k in range(num_iter, num_total_iter_training):\n", 614 | "\n", 615 | " #Construction of the training batch\n", 616 | " if len(indices) < batch_size: # Be sure to have used all the samples before using one a second time.\n", 617 | " indices.extend(np.random.permutation(train_data.shape[0])) #reinitialize the queue of indices\n", 618 | " idx = [indices.popleft() for i in range(batch_size)] #extract the current batch of samples\n", 619 | "\n", 620 | " #data extraction\n", 621 | " batch_data, batch_labels = train_data[idx,:], train_labels[idx] \n", 622 | "\n", 623 | " feed_dict = {learning_obj.ph_data: batch_data, \n", 624 | " learning_obj.ph_labels: batch_labels, \n", 625 | " learning_obj.ph_dropout: dropout_training}\n", 626 | "\n", 627 | " #Training\n", 628 | " tic = time.time()\n", 629 | " _, current_training_loss, norm_grad = learning_obj.session.run([learning_obj.op_gradients, \n", 630 | " learning_obj.loss, \n", 631 | " learning_obj.norm_grad], feed_dict = feed_dict) \n", 632 | " training_time = time.time() - tic\n", 633 | "\n", 634 | " list_training_loss.append(current_training_loss)\n", 635 | " list_training_norm_grad.append(norm_grad)\n", 636 | " if (np.mod(num_iter, num_iter_val)==0): #validation\n", 637 | " msg = \"[TRN] iter = %03i, cost = %3.2e, |grad| = %.2e (%3.2es)\" \\\n", 638 | " % (num_iter, list_training_loss[-1], list_training_norm_grad[-1], training_time)\n", 639 | " print msg\n", 640 | "\n", 641 | " #Validation Code\n", 642 | " tic = time.time()\n", 643 | " val_accuracy = 0\n", 644 | " for begin in range(0, val_data.shape[0], batch_size):\n", 645 | " end = begin + batch_size\n", 646 | " end = min([end, val_data.shape[0]])\n", 647 | "\n", 648 | " #data extraction\n", 649 | " batch_data = np.zeros((end-begin, val_data.shape[1]))\n", 650 | " batch_data = val_data[begin:end,:]\n", 651 | " batch_labels = np.zeros(batch_size)\n", 652 | " batch_labels[:end-begin] = val_labels[begin:end]\n", 653 | "\n", 654 | " feed_dict = {learning_obj.ph_data: batch_data, \n", 655 | " learning_obj.ph_labels: batch_labels,\n", 656 | " learning_obj.ph_dropout: dropout_val_test}\n", 657 | "\n", 658 | " batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)\n", 659 | " val_accuracy += batch_accuracy*batch_data.shape[0]\n", 660 | " val_accuracy = val_accuracy/val_data.shape[0]\n", 661 | "\n", 662 | " val_time = time.time() - tic\n", 663 | " msg = \"[VAL] iter = %03i, acc = %4.2f (%3.2es)\" % (num_iter, val_accuracy, val_time)\n", 664 | " print msg\n", 665 | " num_iter += 1" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": null, 671 | "metadata": { 672 | "collapsed": false, 673 | "deletable": true, 674 | "editable": true 675 | }, 676 | "outputs": [], 677 | "source": [ 678 | "#Test code\n", 679 | "tic = time.time()\n", 680 | "test_accuracy = 0\n", 681 | "for begin in range(0, test_data.shape[0], batch_size):\n", 682 | " end = begin + batch_size\n", 683 | " end = min([end, test_data.shape[0]])\n", 684 | "\n", 685 | " batch_data = np.zeros((end-begin, test_data.shape[1]))\n", 686 | " batch_data = test_data[begin:end,:]\n", 687 | "\n", 688 | " feed_dict = {learning_obj.ph_data: batch_data, learning_obj.ph_dropout: 1}\n", 689 | "\n", 690 | " batch_labels = np.zeros(batch_size)\n", 691 | " batch_labels[:end-begin] = test_labels[begin:end]\n", 692 | " feed_dict[learning_obj.ph_labels] = batch_labels\n", 693 | "\n", 694 | " batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)\n", 695 | " test_accuracy += batch_accuracy*batch_data.shape[0]\n", 696 | "test_accuracy = test_accuracy/test_data.shape[0]\n", 697 | "test_time = time.time() - tic\n", 698 | "msg = \"[TST] iter = %03i, acc = %4.2f (%3.2es)\" % (num_iter, test_accuracy, test_time)\n", 699 | "print msg" 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": null, 705 | "metadata": { 706 | "collapsed": true, 707 | "deletable": true, 708 | "editable": true 709 | }, 710 | "outputs": [], 711 | "source": [] 712 | } 713 | ], 714 | "metadata": { 715 | "kernelspec": { 716 | "display_name": "Python 2", 717 | "language": "python", 718 | "name": "python2" 719 | }, 720 | "language_info": { 721 | "codemirror_mode": { 722 | "name": "ipython", 723 | "version": 2 724 | }, 725 | "file_extension": ".py", 726 | "mimetype": "text/x-python", 727 | "name": "python", 728 | "nbconvert_exporter": "python", 729 | "pygments_lexer": "ipython2", 730 | "version": "2.7.6" 731 | } 732 | }, 733 | "nbformat": 4, 734 | "nbformat_minor": 1 735 | } 736 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/CayleyNet-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false, 8 | "deletable": true, 9 | "editable": true 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "import tensorflow as tf\n", 14 | "import time, shutil\n", 15 | "import numpy as np\n", 16 | "import os, collections, sklearn\n", 17 | "import joblib\n", 18 | "\n", 19 | "import graph, coarsening\n", 20 | "import scipy.sparse as sp\n", 21 | "\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "%matplotlib inline" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "deletable": true, 30 | "editable": true 31 | }, 32 | "source": [ 33 | "# Graph definition and coarsening" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": true, 41 | "deletable": true, 42 | "editable": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "#Definition of some flags useful later in the code\n", 47 | "\n", 48 | "flags = tf.app.flags\n", 49 | "FLAGS = flags.FLAGS\n", 50 | "\n", 51 | "# Graphs.\n", 52 | "flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')\n", 53 | "flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).')\n", 54 | "flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')\n", 55 | "flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')\n", 56 | "\n", 57 | "# Directories.\n", 58 | "flags.DEFINE_string('dir_data', 'data_mnist', 'Directory to store data.')" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "collapsed": false, 66 | "deletable": true, 67 | "editable": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "#Here we proceed at computing the original grid where the images live and the various coarsening that are applied\n", 72 | "#for each level\n", 73 | "\n", 74 | "def grid_graph(m):\n", 75 | " z = graph.grid(m)\n", 76 | " dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric) \n", 77 | " #dist contains the distance of the 8 nearest neighbors for each node sorted in ascending order\n", 78 | " #idx contains the indexes of the 8 nearest for each node sorted in ascending order by distance\n", 79 | "\n", 80 | " A = graph.adjacency(dist, idx)\n", 81 | " return A\n", 82 | "\n", 83 | "def coarsen(A, levels):\n", 84 | " graphs, parents = coarsening.metis(A, levels) #Coarsen a graph multiple times using the METIS algorithm. \n", 85 | " #Everything starts with a random point and then decides how to \n", 86 | " #combine the points.\n", 87 | " #Construction is done a priori, so we have one graph\n", 88 | " #for all the samples!\n", 89 | " \n", 90 | " #graphs = list of spare adjacency matrices (it contains in position \n", 91 | " # 0 the original graph)\n", 92 | " #parents = list of numpy arrays (every array in position i contains \n", 93 | " # the mapping from graph i to graph i+1, i.e. the idx of\n", 94 | " # node i in the coarsed graph) \n", 95 | " perms = coarsening.compute_perm(parents) #Return a list of indices to reorder the adjacency and data matrices so\n", 96 | " #that the union of two neighbors from layer to layer forms a binary tree.\n", 97 | " #Fake nodes are appended at the end of the current graph\n", 98 | " laplacians = []\n", 99 | " for i,A in enumerate(graphs):\n", 100 | " M, M = A.shape\n", 101 | "\n", 102 | " # We remove any possible self-connection.\n", 103 | " A = A.tocoo()\n", 104 | " A.setdiag(0)\n", 105 | "\n", 106 | " if i < levels: #if we have to pool the graph \n", 107 | " A = coarsening.perm_adjacency(A, perms[i]) #matrix A is here extended with the fakes nodes\n", 108 | " #in order to do an efficient pooling operation\n", 109 | " #in tensorflow as it was a 1D pooling\n", 110 | "\n", 111 | " A = A.tocsr()\n", 112 | " A.eliminate_zeros()\n", 113 | " Mnew, Mnew = A.shape\n", 114 | " print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))\n", 115 | "\n", 116 | " L = graph.laplacian(A, normalized=FLAGS.normalized_laplacian)\n", 117 | " laplacians.append(L)\n", 118 | " return laplacians, perms[0] if len(perms) > 0 else None\n", 119 | "\n", 120 | "t_start = time.time()\n", 121 | "\n", 122 | "np.random.seed(0)\n", 123 | "A = grid_graph(28)\n", 124 | "L, perm = coarsen(A, FLAGS.coarsening_levels)\n", 125 | "\n", 126 | "print('Execution time: {:.2f}s'.format(time.time() - t_start))\n", 127 | "\n", 128 | "graph.plot_spectrum(L)\n", 129 | "del A" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": false, 137 | "deletable": true, 138 | "editable": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "#Normalize Laplacian\n", 143 | "L_norm = []\n", 144 | "for k in range(len(L)):\n", 145 | " L_norm.append(L[k] - sp.eye(L[k].shape[0]))\n", 146 | "graph.plot_spectrum(L_norm, ymin=-1)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": { 152 | "deletable": true, 153 | "editable": true 154 | }, 155 | "source": [ 156 | "# Data loading" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "collapsed": false, 164 | "deletable": true, 165 | "editable": true 166 | }, 167 | "outputs": [], 168 | "source": [ 169 | "#loading of MNIST dataset\n", 170 | "\n", 171 | "from tensorflow.examples.tutorials.mnist import input_data\n", 172 | "mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False)\n", 173 | "\n", 174 | "train_data = mnist.train.images.astype(np.float32)\n", 175 | "val_data = mnist.validation.images.astype(np.float32) #the first 5K samples of the training dataset \n", 176 | " #are used for validation\n", 177 | "test_data = mnist.test.images.astype(np.float32)\n", 178 | "train_labels = mnist.train.labels\n", 179 | "val_labels = mnist.validation.labels\n", 180 | "test_labels = mnist.test.labels\n", 181 | "\n", 182 | "t_start = time.time()\n", 183 | "train_data = coarsening.perm_data(train_data, perm)\n", 184 | "val_data = coarsening.perm_data(val_data, perm)\n", 185 | "test_data = coarsening.perm_data(test_data, perm)\n", 186 | "print('Execution time: {:.2f}s'.format(time.time() - t_start))\n", 187 | "del perm" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": { 193 | "deletable": true, 194 | "editable": true 195 | }, 196 | "source": [ 197 | "# Model definition" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": true, 205 | "deletable": true, 206 | "editable": true 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "class CayleyNet:\n", 211 | " \"\"\"\n", 212 | " The neural network model.\n", 213 | " \"\"\"\n", 214 | " \n", 215 | " #Helper functions used for constructing the model\n", 216 | " def _weight_variable(self, shape, regularization=True, name=\"\"): \n", 217 | " \"\"\"Initializer for the weights\"\"\"\n", 218 | " \n", 219 | " initial = tf.truncated_normal_initializer(0, 0.1)\n", 220 | " var = tf.get_variable('weights'+name, shape, tf.float32, initializer=initial)\n", 221 | " if regularization: #append the loss of the current variable to the regularization term \n", 222 | " self.regularizers.append(tf.nn.l2_loss(var))\n", 223 | " return var\n", 224 | " \n", 225 | " def _bias_variable(self, shape, regularization=True):\n", 226 | " \"\"\"Initializer for the bias\"\"\"\n", 227 | " \n", 228 | " initial = tf.constant_initializer(0.1)\n", 229 | " var = tf.get_variable('bias', shape, tf.float32, initializer=initial)\n", 230 | " if regularization:\n", 231 | " self.regularizers.append(tf.nn.l2_loss(var))\n", 232 | " return var\n", 233 | " \n", 234 | " def _h_variable(self, shape, regularization=False, name=''):\n", 235 | " \"\"\"Initializer for the zoom parameter h\"\"\"\n", 236 | " \n", 237 | " initial = tf.random_uniform_initializer()\n", 238 | " var = tf.get_variable('h'+name, shape, tf.float32, initializer=initial)\n", 239 | " if regularization:\n", 240 | " self.regularizers.append(tf.nn.l2_loss(var))\n", 241 | " return var\n", 242 | "\n", 243 | " def frobenius_norm(self, tensor): \n", 244 | " \"\"\"Computes the frobenius norm for a given laplacian\"\"\"\n", 245 | " \n", 246 | " square_tensor = tf.square(tensor)\n", 247 | " tensor_sum = tf.reduce_sum(square_tensor)\n", 248 | " frobenius_norm = tf.sqrt(tensor_sum)\n", 249 | " return frobenius_norm\n", 250 | " \n", 251 | " def compute_sparse_D_inv_indices(self, M):\n", 252 | " \"\"\"Computes the indices required for constructing a sparse version of D^-1.\"\"\"\n", 253 | " \n", 254 | " idx_main_diag = np.tile(np.expand_dims(np.arange(0, 2*M),1), [1, 2])\n", 255 | " idx_diag_ur = np.concatenate([np.expand_dims(np.arange(0, M),1), np.expand_dims(np.arange(0, M)+M,1)], 1)\n", 256 | " idx_diag_ll = np.concatenate([np.expand_dims(np.arange(0, M)+M,1), np.expand_dims(np.arange(0, M),1)], 1)\n", 257 | " idx = np.concatenate([idx_main_diag, idx_diag_ur, idx_diag_ll], 0)\n", 258 | " return idx \n", 259 | " \n", 260 | " def compute_sparse_R_indices(self, L_off_diag, M):\n", 261 | " \"\"\"Computes the indices required for constructing a sparse version of R.\"\"\"\n", 262 | " \n", 263 | " idx_L = np.asarray(np.where(L_off_diag)).T\n", 264 | " idx_L_sh = idx_L + np.expand_dims(np.asarray([M,M]),0)\n", 265 | " idx = np.concatenate([idx_L, idx_L_sh])\n", 266 | " return idx\n", 267 | " \n", 268 | " def compute_sparse_numerator_projection_indices(self, L, M):\n", 269 | " \"\"\"Computes the indices required for constructing the numerator projection sparse matrix.\"\"\"\n", 270 | " \n", 271 | " idx_L = np.asarray(np.where(L)).T\n", 272 | " idx_L_sh = idx_L + np.expand_dims(np.asarray([M,M]),0)\n", 273 | " idx_diag_ur = np.concatenate([np.expand_dims(np.arange(0, M),1), np.expand_dims(np.arange(0, M)+M,1)], 1)\n", 274 | " idx_diag_ll = np.concatenate([np.expand_dims(np.arange(0, M)+M,1), np.expand_dims(np.arange(0, M),1)], 1)\n", 275 | " idx = np.concatenate([idx_L, idx_L_sh, idx_diag_ur, idx_diag_ll])\n", 276 | " return idx\n", 277 | " \n", 278 | " def cayleyConv(self, x, L_np, Fout, K): \n", 279 | " \"\"\"Applies chebyshev polynomials over the graph.\"\"\"\n", 280 | " \n", 281 | " M, Fin = x.get_shape()[1:] # M the number of samples in the images, Fin the number of features\n", 282 | " M, Fin = int(M), int(Fin)\n", 283 | " N = tf.shape(x)[0] # N is the number of images\n", 284 | " \n", 285 | " # Applies cayley transform by means of Jacobi method.\n", 286 | " diag_L_np = np.diag(L_np) # vector containing the diagonal of L\n", 287 | " L_off_diag_np = L_np - np.diag(diag_L_np) # off-diagonal entries of L \n", 288 | " \n", 289 | " list_x_pos_exp = [tf.cast(tf.expand_dims(x,0), 'complex64')] # 1 x N x M x F\n", 290 | " \n", 291 | " for iii in range(self.n_h): # for every zoom parameter we want to use (typically one).\n", 292 | " h = self._h_variable([1,1], regularization=False, name='_h%f' % iii)\n", 293 | " self.list_h.append(h)\n", 294 | " \n", 295 | " # Computes matrices required by Jacobi (https://en.wikipedia.org/wiki/Jacobi_method)\n", 296 | " \n", 297 | " # To make things more efficient we reprent a complex vector of shape M as real vector of shape 2*M\n", 298 | " # where the first M values represent real coefficients while the second M the imaginary ones.\n", 299 | " # All the matrices here defined are computed according to such notation (it allows to use sparse matrices\n", 300 | " # with TF with complex values).\n", 301 | " \n", 302 | " # ************************** COMPUTES numerator projection **************************\n", 303 | " idx = self.compute_sparse_numerator_projection_indices(L_np, M)\n", 304 | " \n", 305 | " vals_L = tf.squeeze(h*L_np[np.where(L_np)])\n", 306 | " vals = tf.concat([vals_L, vals_L, tf.ones([M,]), -tf.ones([M,])], 0)\n", 307 | " \n", 308 | " cayley_op_neg_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n", 309 | " cayley_op_neg_sp = tf.sparse_reorder(cayley_op_neg_sp)\n", 310 | " \n", 311 | " # ************************** COMPUTES D **************************\n", 312 | " D_real = tf.squeeze(h*diag_L_np)\n", 313 | " D = tf.complex(D_real, tf.ones_like(D_real))\n", 314 | " D_inv = tf.pow(D, -tf.ones_like(D)) # vector of M elements <- diagonal of D^-1\n", 315 | " \n", 316 | " idx = self.compute_sparse_D_inv_indices(M)\n", 317 | " vals = tf.concat([tf.real(D_inv), tf.real(D_inv), -tf.imag(D_inv), tf.imag(D_inv)], 0)\n", 318 | " \n", 319 | " D_inv_ext_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n", 320 | " D_inv_ext_sp = tf.sparse_reorder(D_inv_ext_sp)\n", 321 | " \n", 322 | " # ************************** COMPUTES R **************************\n", 323 | " idx = self.compute_sparse_R_indices(L_off_diag_np, M)\n", 324 | " \n", 325 | " vals_L = tf.squeeze(h*L_off_diag_np[np.where(L_off_diag_np)])\n", 326 | " vals = tf.concat([vals_L, vals_L], 0)\n", 327 | " \n", 328 | " R_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n", 329 | " R_sp = tf.sparse_reorder(R_sp)\n", 330 | " \n", 331 | " # Applies Jacobi method\n", 332 | " c_transform = tf.transpose(x, [1,0,2]) # shape = M, N, F\n", 333 | " c_transform = tf.reshape(c_transform, [M, -1]) # shape = M, N*F\n", 334 | " last_sol = tf.concat([c_transform, tf.zeros_like(c_transform)],0)\n", 335 | " for k in range(K): # for every order of our polynomial\n", 336 | " \n", 337 | " # Jacobi initialization\n", 338 | " b = tf.sparse_tensor_dense_matmul(cayley_op_neg_sp, last_sol) # shape = M, N*F\n", 339 | " a = tf.sparse_tensor_dense_matmul(D_inv_ext_sp, b) # shape = M, N*F\n", 340 | " \n", 341 | " # Jacobi iterations\n", 342 | " cond = lambda i, _: tf.less(i, self.num_jacobi_iter)\n", 343 | " body = lambda i, c_sol: [tf.add(i, 1), a - tf.sparse_tensor_dense_matmul(D_inv_ext_sp, \n", 344 | " tf.sparse_tensor_dense_matmul(R_sp, c_sol))]\n", 345 | " \n", 346 | " c_sol = tf.while_loop(cond, body, [0, a], parallel_iterations=1, swap_memory=True)\n", 347 | " c_sol = c_sol[-1]\n", 348 | " \n", 349 | " # Constructs and saves the final complex matrices\n", 350 | " c_sol_complex = tf.complex(c_sol[:M,:], c_sol[M:, :]) #M x N*F\n", 351 | " c_sol_reshaped = tf.reshape(c_sol_complex, [M, -1, Fin])\n", 352 | " c_sol_reshaped = tf.transpose(c_sol_reshaped, [1, 0, 2]) #N x M x F\n", 353 | " list_x_pos_exp.append(tf.expand_dims(c_sol_reshaped,0)) #1 x N x M x Flist_x_pos_exp\n", 354 | " \n", 355 | " last_sol = c_sol\n", 356 | " x_pos_exp = tf.concat(list_x_pos_exp, 0) # shape = n_h*K x N x M x Fin\n", 357 | " x_pos_exp = tf.transpose(x_pos_exp, [1,2,0,3]) #N x M x n_h*K x Fin\n", 358 | " x_pos_exp = tf.reshape(x_pos_exp, [N*M, -1]) #N*M x 2*K*Fin\n", 359 | " \n", 360 | " real_conv_weights = self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_real')#tf.ones([Fin*(self.n_h*K+1), Fout])#self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_real')\n", 361 | " imag_conv_weights = self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_imag')#tf.ones([Fin*(self.n_h*K+1), Fout])#self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_imag')\n", 362 | " \n", 363 | " W_pos_exp = tf.complex(real_conv_weights, -imag_conv_weights)\n", 364 | " \n", 365 | " x_pos_exp_filt = tf.matmul(x_pos_exp, W_pos_exp)\n", 366 | " \n", 367 | " x_filt = 2*tf.real(x_pos_exp_filt)\n", 368 | " return tf.reshape(x_filt, [N, M, Fout])\n", 369 | "\n", 370 | "\n", 371 | " def b1relu(self, x): #sums a bias and applies relu\n", 372 | " \"\"\"Bias and ReLU. One bias per filter.\"\"\"\n", 373 | " N, M, F = x.get_shape()\n", 374 | " b = self._bias_variable([1, 1, int(F)], regularization=False)\n", 375 | " return tf.nn.relu(x + b) #add the bias to the convolutive layer\n", 376 | "\n", 377 | "\n", 378 | " def mpool1(self, x, p): #efficient pooling realized thanks to the reordering of the laplacians we have done a priori\n", 379 | " \"\"\"Max pooling of size p. Should be a power of 2.\"\"\"\n", 380 | " if p > 1:\n", 381 | " x = tf.expand_dims(x, 3) # N x M x F x 1\n", 382 | " x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')\n", 383 | " return tf.squeeze(x, [3]) # N x M/p x F\n", 384 | " else:\n", 385 | " return x\n", 386 | " \n", 387 | "\n", 388 | " def b1relu(self, x): #sums a bias and applies relu\n", 389 | " \"\"\"Bias and ReLU. One bias per filter.\"\"\"\n", 390 | " N, M, F = x.get_shape()\n", 391 | " b = self._bias_variable([1, 1, int(F)], regularization=False)\n", 392 | " return tf.nn.relu(x + b) #add the bias to the convolutive layer\n", 393 | "\n", 394 | "\n", 395 | " def mpool1(self, x, p): #efficient pooling realized thanks to the reordering of the laplacians we have done a priori\n", 396 | " \"\"\"Max pooling of size p. Should be a power of 2.\"\"\"\n", 397 | " if p > 1:\n", 398 | " x = tf.expand_dims(x, 3) # N x M x F x 1\n", 399 | " x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')\n", 400 | " return tf.squeeze(x, [3]) # N x M/p x F\n", 401 | " else:\n", 402 | " return x\n", 403 | "\n", 404 | " def fc(self, x, Mout, relu=True):\n", 405 | " \"\"\"Fully connected layer with Mout features.\"\"\"\n", 406 | " N, Min = x.get_shape()\n", 407 | " W = self._weight_variable([int(Min), Mout], regularization=True)\n", 408 | " b = self._bias_variable([Mout], regularization=True)\n", 409 | " x = tf.matmul(x, W) + b\n", 410 | " return tf.nn.relu(x) if relu else x\n", 411 | " \n", 412 | " #function used for extracting the result of our model\n", 413 | " def _inference(self, x, dropout): #definition of the model\n", 414 | " \n", 415 | " # Graph convolutional layers.\n", 416 | " x = tf.expand_dims(x, 2) # N x M x F=1\n", 417 | " j = 0\n", 418 | " self.list_h = list()\n", 419 | " for i in range(len(self.p)):\n", 420 | " with tf.variable_scope('cgconv{}'.format(i+1)):\n", 421 | " with tf.name_scope('filter'):\n", 422 | " x = self.cayleyConv(x, self.L_np[i*2], self.F[i], self.K[i])\n", 423 | " if (i==0):\n", 424 | " self.debug = x\n", 425 | " with tf.name_scope('bias_relu'):\n", 426 | " x = self.b1relu(tf.cast(tf.real(x), 'float32'))\n", 427 | " with tf.name_scope('pooling'):\n", 428 | " x = self.mpool1(x, self.p[i])\n", 429 | " \n", 430 | " j += int(np.log2(self.p[i])) if self.p[i] > 1 else 0\n", 431 | " \n", 432 | " # Fully connected hidden layers.\n", 433 | " _, M, F = x.get_shape()\n", 434 | " x = tf.reshape(x, [-1, int(M*F)]) # N x M\n", 435 | " for i,M in enumerate(self.M[:-1]): #apply a fully connected layer for each layer defined in M\n", 436 | " #(we discard the last value in M since it contains the number of classes we have\n", 437 | " #to predict)\n", 438 | " with tf.variable_scope('fc{}'.format(i+1)):\n", 439 | " x = self.fc(x, M)\n", 440 | " x = tf.nn.dropout(x, dropout)\n", 441 | " \n", 442 | " # Logits linear layer, i.e. softmax without normalization.\n", 443 | " with tf.variable_scope('logits'):\n", 444 | " x = self.fc(x, self.M[-1], relu=False)\n", 445 | " return x\n", 446 | " \n", 447 | " def __init__(self, p, K, F, M, M_0, batch_size, num_jacobi_iter, L,\n", 448 | " decay_steps, decay_rate, learning_rate=1e-4, momentum=0.9, regularization=5e-4, clip_norm=1e1,\n", 449 | " idx_gpu = '/gpu:0'):\n", 450 | " self.regularizers = list() #list of regularization l2 loss for multiple variables\n", 451 | " self.n_h = 1\n", 452 | " self.num_jacobi_iter = num_jacobi_iter\n", 453 | " self.p = p #dimensions of the pooling layers\n", 454 | " self.K = K #List of polynomial orders, i.e. filter sizes or number of hops\n", 455 | " self.F = F #Number of features of convolutional layers\n", 456 | " \n", 457 | " self.M = M #Number of neurons in fully connected layers\n", 458 | " \n", 459 | " self.M_0 = M_0 #number of elements in the first graph \n", 460 | " \n", 461 | " self.batch_size = batch_size\n", 462 | " \n", 463 | " #definition of some learning parameters\n", 464 | " self.decay_steps = decay_steps\n", 465 | " self.decay_rate = decay_rate\n", 466 | " self.learning_rate = learning_rate\n", 467 | " self.regularization = regularization\n", 468 | " \n", 469 | " with tf.Graph().as_default() as g:\n", 470 | " self.graph = g\n", 471 | " tf.set_random_seed(0)\n", 472 | " with tf.device(idx_gpu):\n", 473 | " #definition of placeholders\n", 474 | " self.L_np = [c_L.toarray().astype('float32') for c_L in L]\n", 475 | " self.ph_data = tf.placeholder(tf.float32, (self.batch_size, M_0), 'data')\n", 476 | " self.ph_labels = tf.placeholder(tf.int32, (self.batch_size), 'labels')\n", 477 | " self.ph_dropout = tf.placeholder(tf.float32, (), 'dropout')\n", 478 | " \n", 479 | " #Model construction\n", 480 | " self.logits = self._inference(self.ph_data, self.ph_dropout)\n", 481 | " \n", 482 | " #Definition of the loss function\n", 483 | " with tf.name_scope('loss'):\n", 484 | " self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.ph_labels)\n", 485 | " self.cross_entropy = tf.reduce_mean(self.cross_entropy)\n", 486 | " with tf.name_scope('regularization'):\n", 487 | " self.regularization *= tf.add_n(self.regularizers)\n", 488 | " self.loss = self.cross_entropy + self.regularization\n", 489 | " \n", 490 | " #Solver Definition\n", 491 | " with tf.name_scope('training'):\n", 492 | " # Learning rate.\n", 493 | " global_step = tf.Variable(0, name='global_step', trainable=False) #used for counting how many iterations we have done\n", 494 | " if decay_rate != 1: #applies an exponential decay of the lr wrt the number of iterations done\n", 495 | " learning_rate = tf.train.exponential_decay(\n", 496 | " learning_rate, global_step, decay_steps, decay_rate, staircase=True)\n", 497 | " # Optimizer.\n", 498 | " if momentum == 0:\n", 499 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n", 500 | " else: #applies momentum for increasing the robustness of the gradient \n", 501 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n", 502 | " #grads = optimizer.compute_gradients(self.loss)\n", 503 | " tvars = tf.trainable_variables()\n", 504 | " #grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clip_norm)\n", 505 | " grads, variables = zip(*optimizer.compute_gradients(self.loss))\n", 506 | " grads, _ = tf.clip_by_global_norm(grads, clip_norm)\n", 507 | " self.op_gradients = optimizer.apply_gradients(zip(grads, variables), \n", 508 | " global_step=global_step)\n", 509 | " \n", 510 | " #Computation of the norm gradients (useful for debugging)\n", 511 | " self.var_grad = tf.gradients(self.loss, tf.trainable_variables())\n", 512 | " self.norm_grad = self.frobenius_norm(tf.concat([tf.reshape(g, [-1]) for g in self.var_grad], 0))\n", 513 | "\n", 514 | " #Extraction of the predictions and computation of accuracy\n", 515 | " self.predictions = tf.cast(tf.argmax(self.logits, dimension=1), tf.int32)\n", 516 | " self.accuracy = 100 * tf.contrib.metrics.accuracy(self.predictions, self.ph_labels)\n", 517 | " \n", 518 | " # Create a session for running Ops on the Graph.\n", 519 | " config = tf.ConfigProto(allow_soft_placement = True)\n", 520 | " config.gpu_options.allow_growth = True\n", 521 | " self.session = tf.Session(config=config)\n", 522 | "\n", 523 | " # Run the Op to initialize the variables.\n", 524 | " init = tf.global_variables_initializer()\n", 525 | " self.session.run(init)" 526 | ] 527 | }, 528 | { 529 | "cell_type": "markdown", 530 | "metadata": { 531 | "deletable": true, 532 | "editable": true 533 | }, 534 | "source": [ 535 | "# Training & testing" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "metadata": { 542 | "collapsed": true, 543 | "deletable": true, 544 | "editable": true 545 | }, 546 | "outputs": [], 547 | "source": [ 548 | "#Convolutional parameters\n", 549 | "p = [4, 4] # Dimensions of the pooling layers\n", 550 | "K = [12, 12] # List of polynomial orders, i.e. filter sizes or number of hops\n", 551 | "F = [32, 64] # Number of features of convolutional layers\n", 552 | "\n", 553 | "#FC parameters\n", 554 | "C = max(train_labels) + 1 # Number of classes we have\n", 555 | "M = [512, C] # Number of neurons in fully connected layers\n", 556 | "\n", 557 | "#Solver parameters\n", 558 | "batch_size = 100\n", 559 | "decay_steps = train_data.shape[0] / batch_size # number of steps to do before decreasing the learning rate\n", 560 | "decay_rate = 0.95\n", 561 | "learning_rate = 0.01\n", 562 | "momentum = 0.9\n", 563 | "regularization = 5e-4\n", 564 | "\n", 565 | "# Definition of keep probabilities for dropout layers\n", 566 | "dropout_training = 0.5\n", 567 | "dropout_val_test = 1.0\n", 568 | "\n", 569 | "num_jacobi_iter = 10" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "metadata": { 576 | "collapsed": false, 577 | "deletable": true, 578 | "editable": true 579 | }, 580 | "outputs": [], 581 | "source": [ 582 | "# Construction of the learning obj\n", 583 | "M_0 = L[0].shape[0] # number of elements in the first graph\n", 584 | "learning_obj = CayleyNet(p, K, F, M, M_0, batch_size, num_jacobi_iter, L,\n", 585 | " decay_steps, decay_rate,\n", 586 | " learning_rate=learning_rate, regularization=regularization,\n", 587 | " momentum=momentum)#, clip_norm=100)\n", 588 | "\n", 589 | "# definition of overall number of training iterations and validation frequency\n", 590 | "num_iter_val = 600\n", 591 | "num_total_iter_training = 21000\n", 592 | "\n", 593 | "num_iter = 0\n", 594 | "\n", 595 | "list_training_loss = list()\n", 596 | "list_training_norm_grad = list()\n", 597 | "list_val_accuracy = list()" 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": null, 603 | "metadata": { 604 | "collapsed": false, 605 | "deletable": true, 606 | "editable": true, 607 | "scrolled": true 608 | }, 609 | "outputs": [], 610 | "source": [ 611 | "#training and validation\n", 612 | "indices = collections.deque() # queue containing a permutation of the training indexes\n", 613 | "for k in range(num_iter, num_total_iter_training):\n", 614 | "\n", 615 | " #Construction of the training batch\n", 616 | " if len(indices) < batch_size: # Be sure to have used all the samples before using one a second time.\n", 617 | " indices.extend(np.random.permutation(train_data.shape[0])) #reinitialize the queue of indices\n", 618 | " idx = [indices.popleft() for i in range(batch_size)] #extract the current batch of samples\n", 619 | "\n", 620 | " #data extraction\n", 621 | " batch_data, batch_labels = train_data[idx,:], train_labels[idx] \n", 622 | "\n", 623 | " feed_dict = {learning_obj.ph_data: batch_data, \n", 624 | " learning_obj.ph_labels: batch_labels, \n", 625 | " learning_obj.ph_dropout: dropout_training}\n", 626 | "\n", 627 | " #Training\n", 628 | " tic = time.time()\n", 629 | " _, current_training_loss, norm_grad = learning_obj.session.run([learning_obj.op_gradients, \n", 630 | " learning_obj.loss, \n", 631 | " learning_obj.norm_grad], feed_dict = feed_dict) \n", 632 | " training_time = time.time() - tic\n", 633 | "\n", 634 | " list_training_loss.append(current_training_loss)\n", 635 | " list_training_norm_grad.append(norm_grad)\n", 636 | " if (np.mod(num_iter, num_iter_val)==0): #validation\n", 637 | " msg = \"[TRN] iter = %03i, cost = %3.2e, |grad| = %.2e (%3.2es)\" \\\n", 638 | " % (num_iter, list_training_loss[-1], list_training_norm_grad[-1], training_time)\n", 639 | " print msg\n", 640 | "\n", 641 | " #Validation Code\n", 642 | " tic = time.time()\n", 643 | " val_accuracy = 0\n", 644 | " for begin in range(0, val_data.shape[0], batch_size):\n", 645 | " end = begin + batch_size\n", 646 | " end = min([end, val_data.shape[0]])\n", 647 | "\n", 648 | " #data extraction\n", 649 | " batch_data = np.zeros((end-begin, val_data.shape[1]))\n", 650 | " batch_data = val_data[begin:end,:]\n", 651 | " batch_labels = np.zeros(batch_size)\n", 652 | " batch_labels[:end-begin] = val_labels[begin:end]\n", 653 | "\n", 654 | " feed_dict = {learning_obj.ph_data: batch_data, \n", 655 | " learning_obj.ph_labels: batch_labels,\n", 656 | " learning_obj.ph_dropout: dropout_val_test}\n", 657 | "\n", 658 | " batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)\n", 659 | " val_accuracy += batch_accuracy*batch_data.shape[0]\n", 660 | " val_accuracy = val_accuracy/val_data.shape[0]\n", 661 | "\n", 662 | " val_time = time.time() - tic\n", 663 | " msg = \"[VAL] iter = %03i, acc = %4.2f (%3.2es)\" % (num_iter, val_accuracy, val_time)\n", 664 | " print msg\n", 665 | " num_iter += 1" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": null, 671 | "metadata": { 672 | "collapsed": false, 673 | "deletable": true, 674 | "editable": true 675 | }, 676 | "outputs": [], 677 | "source": [ 678 | "#Test code\n", 679 | "tic = time.time()\n", 680 | "test_accuracy = 0\n", 681 | "for begin in range(0, test_data.shape[0], batch_size):\n", 682 | " end = begin + batch_size\n", 683 | " end = min([end, test_data.shape[0]])\n", 684 | "\n", 685 | " batch_data = np.zeros((end-begin, test_data.shape[1]))\n", 686 | " batch_data = test_data[begin:end,:]\n", 687 | "\n", 688 | " feed_dict = {learning_obj.ph_data: batch_data, learning_obj.ph_dropout: 1}\n", 689 | "\n", 690 | " batch_labels = np.zeros(batch_size)\n", 691 | " batch_labels[:end-begin] = test_labels[begin:end]\n", 692 | " feed_dict[learning_obj.ph_labels] = batch_labels\n", 693 | "\n", 694 | " batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)\n", 695 | " test_accuracy += batch_accuracy*batch_data.shape[0]\n", 696 | "test_accuracy = test_accuracy/test_data.shape[0]\n", 697 | "test_time = time.time() - tic\n", 698 | "msg = \"[TST] iter = %03i, acc = %4.2f (%3.2es)\" % (num_iter, test_accuracy, test_time)\n", 699 | "print msg" 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": null, 705 | "metadata": { 706 | "collapsed": true, 707 | "deletable": true, 708 | "editable": true 709 | }, 710 | "outputs": [], 711 | "source": [] 712 | } 713 | ], 714 | "metadata": { 715 | "kernelspec": { 716 | "display_name": "Python 2", 717 | "language": "python", 718 | "name": "python2" 719 | }, 720 | "language_info": { 721 | "codemirror_mode": { 722 | "name": "ipython", 723 | "version": 2 724 | }, 725 | "file_extension": ".py", 726 | "mimetype": "text/x-python", 727 | "name": "python", 728 | "nbconvert_exporter": "python", 729 | "pygments_lexer": "ipython2", 730 | "version": "2.7.6" 731 | } 732 | }, 733 | "nbformat": 4, 734 | "nbformat_minor": 1 735 | } 736 | --------------------------------------------------------------------------------