├── data_mnist
└── __init__.py
├── .DS_Store
├── graph.pyc
├── utils.pyc
├── pic
├── web.png
└── home100.jpg
├── coarsening.pyc
├── README.md
├── graph.py
├── coarsening.py
├── utils.py
├── CayleyNet.ipynb
└── .ipynb_checkpoints
└── CayleyNet-checkpoint.ipynb
/data_mnist/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/.DS_Store
--------------------------------------------------------------------------------
/graph.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/graph.pyc
--------------------------------------------------------------------------------
/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/utils.pyc
--------------------------------------------------------------------------------
/pic/web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/pic/web.png
--------------------------------------------------------------------------------
/coarsening.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/coarsening.pyc
--------------------------------------------------------------------------------
/pic/home100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amoliu/CayleyNet/HEAD/pic/home100.jpg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CayleyNets
2 | We present a TensorFlow implementation of the Graph Convolutional Neural Network illustrated in:
3 |
4 | CayleyNets: Graph Convolutional Neural Networks with Complex Rational Spectral Filters
5 | IEEE Transactions on Signal Processing, 2018
6 | Ron Levie*, Federico Monti*, Xavier Bresson, Michael M. Bronstein
7 |
8 | https://arxiv.org/abs/1705.07664
9 |
10 | The repository contains a sparse implementation of the NN used for solving the MNIST digits classification problem described in the paper. Rational spectral filters are approximated with Jacobi Method to provide an efficient solution.
11 |
12 | ## When shall I use CayleyNet?
13 |
14 | CayleyNet is a Graph CNN with spectral zoom properties able to effectively operate with signals defined over graphs. Thanks to its particular spectral properties, CayleyNet is well suited for dealing with a variety of different domains (e.g. citation networks, community graphs, user/item similarity graphs...). Variations of the architecture here implemented achieved state-of-the-art performance on vertex classification, community detection and matrix completion tasks.
15 |
16 | ## Useful links
17 |
18 |
inf.usi.ch/phd/monti
19 |
geometricdeeplearning.com
20 |
--------------------------------------------------------------------------------
/graph.py:
--------------------------------------------------------------------------------
1 | import sklearn.metrics
2 | import sklearn.neighbors
3 | import matplotlib.pyplot as plt
4 | import scipy.sparse
5 | import scipy.sparse.linalg
6 | import scipy.spatial.distance
7 | import numpy as np
8 |
9 |
10 | def grid(m, dtype=np.float32):
11 | """Return the embedding of a grid graph."""
12 | M = m**2
13 | x = np.linspace(0, 1, m, dtype=dtype)
14 | y = np.linspace(0, 1, m, dtype=dtype)
15 | xx, yy = np.meshgrid(x, y)
16 | z = np.empty((M, 2), dtype)
17 | z[:, 0] = xx.reshape(M)
18 | z[:, 1] = yy.reshape(M)
19 | return z
20 |
21 |
22 | def distance_scipy_spatial(z, k=4, metric='euclidean'):
23 | """Compute exact pairwise distances."""
24 | d = scipy.spatial.distance.pdist(z, metric)
25 | d = scipy.spatial.distance.squareform(d)
26 | # k-NN graph.
27 | idx = np.argsort(d)[:, 1:k+1]
28 | d.sort()
29 | d = d[:, 1:k+1]
30 | return d, idx
31 |
32 |
33 | def distance_sklearn_metrics(z, k=4, metric='euclidean'):
34 | """Compute exact pairwise distances."""
35 | d = sklearn.metrics.pairwise.pairwise_distances(
36 | z, metric=metric, n_jobs=-2)
37 | # k-NN graph.
38 | idx = np.argsort(d)[:, 1:k+1]
39 | d.sort()
40 | d = d[:, 1:k+1]
41 | return d, idx
42 |
43 |
44 | def distance_lshforest(z, k=4, metric='cosine'):
45 | """Return an approximation of the k-nearest cosine distances."""
46 | assert metric is 'cosine'
47 | lshf = sklearn.neighbors.LSHForest()
48 | lshf.fit(z)
49 | dist, idx = lshf.kneighbors(z, n_neighbors=k+1)
50 | assert dist.min() < 1e-10
51 | dist[dist < 0] = 0
52 | return dist, idx
53 |
54 | # TODO: other ANNs s.a. NMSLIB, EFANNA, FLANN, Annoy, sklearn neighbors, PANN
55 |
56 |
57 | def adjacency(dist, idx):
58 | """Return the adjacency matrix of a kNN graph."""
59 | M, k = dist.shape
60 | assert M, k == idx.shape
61 | assert dist.min() >= 0
62 |
63 | # Weights.
64 | sigma2 = np.mean(dist[:, -1])**2
65 | dist = np.exp(- dist**2 / sigma2)
66 |
67 | # Weight matrix.
68 | I = np.arange(0, M).repeat(k)
69 | J = idx.reshape(M*k)
70 | V = dist.reshape(M*k)
71 | W = scipy.sparse.coo_matrix((V, (I, J)), shape=(M, M))
72 |
73 | # No self-connections.
74 | W.setdiag(0)
75 |
76 | # Non-directed graph.
77 | bigger = W.T > W
78 | W = W - W.multiply(bigger) + W.T.multiply(bigger)
79 |
80 | assert W.nnz % 2 == 0
81 | assert np.abs(W - W.T).mean() < 1e-10
82 | assert type(W) is scipy.sparse.csr.csr_matrix
83 | return W
84 |
85 |
86 | def replace_random_edges(A, noise_level):
87 | """Replace randomly chosen edges by random edges."""
88 | M, M = A.shape
89 | n = int(noise_level * A.nnz // 2)
90 |
91 | indices = np.random.permutation(A.nnz//2)[:n]
92 | rows = np.random.randint(0, M, n)
93 | cols = np.random.randint(0, M, n)
94 | vals = np.random.uniform(0, 1, n)
95 | assert len(indices) == len(rows) == len(cols) == len(vals)
96 |
97 | A_coo = scipy.sparse.triu(A, format='coo')
98 | assert A_coo.nnz == A.nnz // 2
99 | assert A_coo.nnz >= n
100 | A = A.tolil()
101 |
102 | for idx, row, col, val in zip(indices, rows, cols, vals):
103 | old_row = A_coo.row[idx]
104 | old_col = A_coo.col[idx]
105 |
106 | A[old_row, old_col] = 0
107 | A[old_col, old_row] = 0
108 | A[row, col] = 1
109 | A[col, row] = 1
110 |
111 | A.setdiag(0)
112 | A = A.tocsr()
113 | A.eliminate_zeros()
114 | return A
115 |
116 |
117 | def laplacian(W, normalized=True):
118 | """Return the Laplacian of the weigth matrix."""
119 |
120 | # Degree matrix.
121 | d = W.sum(axis=0)
122 |
123 | # Laplacian matrix.
124 | if not normalized:
125 | D = scipy.sparse.diags(d.A.squeeze(), 0)
126 | L = D - W
127 | else:
128 | d += np.spacing(np.array(0, W.dtype))
129 | d = 1 / np.sqrt(d)
130 | D = scipy.sparse.diags(d.A.squeeze(), 0)
131 | I = scipy.sparse.identity(d.size, dtype=W.dtype)
132 | L = I - D * W * D
133 |
134 | # assert np.abs(L - L.T).mean() < 1e-9
135 | assert type(L) is scipy.sparse.csr.csr_matrix
136 | return L
137 |
138 |
139 | def lmax(L, normalized=True):
140 | """Upper-bound on the spectrum."""
141 | if normalized:
142 | return 2
143 | else:
144 | return scipy.sparse.linalg.eigsh(
145 | L, k=1, which='LM', return_eigenvectors=False)[0]
146 |
147 |
148 | def fourier(L, algo='eigh', k=1):
149 | """Return the Fourier basis, i.e. the EVD of the Laplacian."""
150 |
151 | def sort(lamb, U):
152 | idx = lamb.argsort()
153 | return lamb[idx], U[:, idx]
154 |
155 | if algo is 'eig':
156 | lamb, U = np.linalg.eig(L.toarray())
157 | lamb, U = sort(lamb, U)
158 | elif algo is 'eigh':
159 | lamb, U = np.linalg.eigh(L.toarray())
160 | elif algo is 'eigs':
161 | lamb, U = scipy.sparse.linalg.eigs(L, k=k, which='SM')
162 | lamb, U = sort(lamb, U)
163 | elif algo is 'eigsh':
164 | lamb, U = scipy.sparse.linalg.eigsh(L, k=k, which='SM')
165 |
166 | return lamb, U
167 |
168 |
169 | def plot_spectrum(L, algo='eig', ymin = 0):
170 | """Plot the spectrum of a list of multi-scale Laplacians L."""
171 | # Algo is eig to be sure to get all eigenvalues.
172 | plt.figure(figsize=(17, 5))
173 | for i, lap in enumerate(L):
174 | lamb, U = fourier(lap, algo)
175 | step = 2**i
176 | x = range(step//2, L[0].shape[0], step)
177 | lb = 'L_{} spectrum in [{:1.2e}, {:1.2e}]'.format(i, lamb[0], lamb[-1])
178 | plt.plot(x, lamb, '.', label=lb)
179 | plt.legend(loc='best')
180 | plt.xlim(0, L[0].shape[0])
181 | plt.ylim(ymin=ymin)
182 |
183 | plt.ylabel('Value')
184 | plt.xlabel('Eigenvalue ID')
185 |
186 |
187 | def lanczos(L, X, K):
188 | """
189 | Given the graph Laplacian and a data matrix, return a data matrix which can
190 | be multiplied by the filter coefficients to filter X using the Lanczos
191 | polynomial approximation.
192 | """
193 | M, N = X.shape
194 | assert L.dtype == X.dtype
195 |
196 | def basis(L, X, K):
197 | """
198 | Lanczos algorithm which computes the orthogonal matrix V and the
199 | tri-diagonal matrix H.
200 | """
201 | a = np.empty((K, N), L.dtype)
202 | b = np.zeros((K, N), L.dtype)
203 | V = np.empty((K, M, N), L.dtype)
204 | V[0, ...] = X / np.linalg.norm(X, axis=0)
205 | for k in range(K-1):
206 | W = L.dot(V[k, ...])
207 | a[k, :] = np.sum(W * V[k, ...], axis=0)
208 | W = W - a[k, :] * V[k, ...] - (
209 | b[k, :] * V[k-1, ...] if k > 0 else 0)
210 | b[k+1, :] = np.linalg.norm(W, axis=0)
211 | V[k+1, ...] = W / b[k+1, :]
212 | a[K-1, :] = np.sum(L.dot(V[K-1, ...]) * V[K-1, ...], axis=0)
213 | return V, a, b
214 |
215 | def diag_H(a, b, K):
216 | """Diagonalize the tri-diagonal H matrix."""
217 | H = np.zeros((K*K, N), a.dtype)
218 | H[:K**2:K+1, :] = a
219 | H[1:(K-1)*K:K+1, :] = b[1:, :]
220 | H.shape = (K, K, N)
221 | Q = np.linalg.eigh(H.T, UPLO='L')[1]
222 | Q = np.swapaxes(Q, 1, 2).T
223 | return Q
224 |
225 | V, a, b = basis(L, X, K)
226 | Q = diag_H(a, b, K)
227 | Xt = np.empty((K, M, N), L.dtype)
228 | for n in range(N):
229 | Xt[..., n] = Q[..., n].T.dot(V[..., n])
230 | Xt *= Q[0, :, np.newaxis, :]
231 | Xt *= np.linalg.norm(X, axis=0)
232 | return Xt # Q[0, ...]
233 |
234 |
235 | def rescale_L(L, lmax=2):
236 | """Rescale the Laplacian eigenvalues in [-1,1]."""
237 | M, M = L.shape
238 | I = scipy.sparse.identity(M, format='csr', dtype=L.dtype)
239 | L /= lmax / 2
240 | L -= I
241 | return L
242 |
243 |
244 | def chebyshev(L, X, K):
245 | """Return T_k X where T_k are the Chebyshev polynomials of order up to K.
246 | Complexity is O(KMN)."""
247 | M, N = X.shape
248 | assert L.dtype == X.dtype
249 |
250 | # L = rescale_L(L, lmax)
251 | # Xt = T @ X: MxM @ MxN.
252 | Xt = np.empty((K, M, N), L.dtype)
253 | # Xt_0 = T_0 X = I X = X.
254 | Xt[0, ...] = X
255 | # Xt_1 = T_1 X = L X.
256 | if K > 1:
257 | Xt[1, ...] = L.dot(X)
258 | # Xt_k = 2 L Xt_k-1 - Xt_k-2.
259 | for k in range(2, K):
260 | Xt[k, ...] = 2 * L.dot(Xt[k-1, ...]) - Xt[k-2, ...]
261 | return Xt
262 |
--------------------------------------------------------------------------------
/coarsening.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.sparse
3 |
4 |
5 | def coarsen(A, levels, self_connections=False):
6 | """
7 | Coarsen a graph, represented by its adjacency matrix A, at multiple
8 | levels.
9 | """
10 | graphs, parents = metis(A, levels)
11 | perms = compute_perm(parents)
12 |
13 | for i, A in enumerate(graphs):
14 | M, M = A.shape
15 |
16 | if not self_connections:
17 | A = A.tocoo()
18 | A.setdiag(0)
19 |
20 | if i < levels:
21 | A = perm_adjacency(A, perms[i])
22 |
23 | A = A.tocsr()
24 | A.eliminate_zeros()
25 | graphs[i] = A
26 |
27 | Mnew, Mnew = A.shape
28 | print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added),'
29 | '|E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))
30 |
31 | return graphs, perms[0] if levels > 0 else None
32 |
33 |
34 | def metis(W, levels, rid=None):
35 | """
36 | Coarsen a graph multiple times using the METIS algorithm.
37 |
38 | INPUT
39 | W: symmetric sparse weight (adjacency) matrix
40 | levels: the number of coarsened graphs
41 |
42 | OUTPUT
43 | graph[0]: original graph of size N_1
44 | graph[2]: coarser graph of size N_2 < N_1
45 | graph[levels]: coarsest graph of Size N_levels < ... < N_2 < N_1
46 | parents[i] is a vector of size N_i with entries ranging from 1 to N_{i+1}
47 | which indicate the parents in the coarser graph[i+1]
48 | nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i}
49 |
50 | NOTE
51 | if "graph" is a list of length k, then "parents" will be a list of length k-1
52 | """
53 |
54 | N, N = W.shape
55 | if rid is None:
56 | rid = np.random.permutation(range(N))
57 | parents = []
58 | degree = W.sum(axis=0) - W.diagonal()
59 | graphs = []
60 | graphs.append(W)
61 | #supernode_size = np.ones(N)
62 | #nd_sz = [supernode_size]
63 | #count = 0
64 |
65 | #while N > maxsize:
66 | for _ in range(levels):
67 |
68 | #count += 1
69 |
70 | # CHOOSE THE WEIGHTS FOR THE PAIRING
71 | # weights = ones(N,1) # metis weights
72 | weights = degree # graclus weights
73 | # weights = supernode_size # other possibility
74 | weights = np.array(weights).squeeze()
75 |
76 | # PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR
77 | idx_row, idx_col, val = scipy.sparse.find(W)
78 | perm = np.argsort(idx_row)
79 | rr = idx_row[perm]
80 | cc = idx_col[perm]
81 | vv = val[perm]
82 | cluster_id = metis_one_level(rr,cc,vv,rid,weights) # rr is ordered
83 | parents.append(cluster_id)
84 |
85 | # TO DO
86 | # COMPUTE THE SIZE OF THE SUPERNODES AND THEIR DEGREE
87 | #supernode_size = full( sparse(cluster_id, ones(N,1) , supernode_size ) )
88 | #print(cluster_id)
89 | #print(supernode_size)
90 | #nd_sz{count+1}=supernode_size;
91 |
92 | # COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH
93 | nrr = cluster_id[rr]
94 | ncc = cluster_id[cc]
95 | nvv = vv
96 | Nnew = cluster_id.max() + 1
97 | # CSR is more appropriate: row,val pairs appear multiple times
98 | W = scipy.sparse.csr_matrix((nvv,(nrr,ncc)), shape=(Nnew,Nnew))
99 | W.eliminate_zeros()
100 | # Add new graph to the list of all coarsened graphs
101 | graphs.append(W)
102 | N, N = W.shape
103 |
104 | # COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS)
105 | degree = W.sum(axis=0)
106 | #degree = W.sum(axis=0) - W.diagonal()
107 |
108 | # CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS
109 | #[~, rid]=sort(ss); # arthur strategy
110 | #[~, rid]=sort(supernode_size); # thomas strategy
111 | #rid=randperm(N); # metis/graclus strategy
112 | ss = np.array(W.sum(axis=0)).squeeze()
113 | rid = np.argsort(ss)
114 |
115 | return graphs, parents
116 |
117 |
118 | # Coarsen a graph given by rr,cc,vv. rr is assumed to be ordered
119 | def metis_one_level(rr,cc,vv,rid,weights):
120 |
121 | nnz = rr.shape[0]
122 | N = rr[nnz-1] + 1
123 |
124 | marked = np.zeros(N, np.bool)
125 | rowstart = np.zeros(N, np.int32)
126 | rowlength = np.zeros(N, np.int32)
127 | cluster_id = np.zeros(N, np.int32)
128 |
129 | oldval = rr[0]
130 | count = 0
131 | clustercount = 0
132 |
133 | for ii in range(nnz):
134 | rowlength[count] = rowlength[count] + 1
135 | if rr[ii] > oldval:
136 | oldval = rr[ii]
137 | rowstart[count+1] = ii
138 | count = count + 1
139 |
140 | for ii in range(N):
141 | tid = rid[ii]
142 | if not marked[tid]:
143 | wmax = 0.0
144 | rs = rowstart[tid]
145 | marked[tid] = True
146 | bestneighbor = -1
147 | for jj in range(rowlength[tid]):
148 | nid = cc[rs+jj]
149 | if marked[nid]:
150 | tval = 0.0
151 | else:
152 | tval = vv[rs+jj] * (1.0/weights[tid] + 1.0/weights[nid])
153 | if tval > wmax:
154 | wmax = tval
155 | bestneighbor = nid
156 |
157 | cluster_id[tid] = clustercount
158 |
159 | if bestneighbor > -1:
160 | cluster_id[bestneighbor] = clustercount
161 | marked[bestneighbor] = True
162 |
163 | clustercount += 1
164 |
165 | return cluster_id
166 |
167 | def compute_perm(parents):
168 | """
169 | Return a list of indices to reorder the adjacency and data matrices so
170 | that the union of two neighbors from layer to layer forms a binary tree.
171 | """
172 |
173 | # Order of last layer is random (chosen by the clustering algorithm).
174 | indices = []
175 | if len(parents) > 0:
176 | M_last = max(parents[-1]) + 1
177 | indices.append(list(range(M_last)))
178 |
179 | for parent in parents[::-1]:
180 | #print('parent: {}'.format(parent))
181 |
182 | # Fake nodes go after real ones.
183 | pool_singeltons = len(parent)
184 |
185 | indices_layer = []
186 | for i in indices[-1]:
187 | indices_node = list(np.where(parent == i)[0])
188 | assert 0 <= len(indices_node) <= 2
189 | #print('indices_node: {}'.format(indices_node))
190 |
191 | # Add a node to go with a singelton.
192 | if len(indices_node) is 1:
193 | indices_node.append(pool_singeltons)
194 | pool_singeltons += 1
195 | #print('new singelton: {}'.format(indices_node))
196 | # Add two nodes as children of a singelton in the parent.
197 | elif len(indices_node) is 0:
198 | indices_node.append(pool_singeltons+0)
199 | indices_node.append(pool_singeltons+1)
200 | pool_singeltons += 2
201 | #print('singelton childrens: {}'.format(indices_node))
202 |
203 | indices_layer.extend(indices_node)
204 | indices.append(indices_layer)
205 |
206 | # Sanity checks.
207 | for i,indices_layer in enumerate(indices):
208 | M = M_last*2**i
209 | # Reduction by 2 at each layer (binary tree).
210 | assert len(indices[0] == M)
211 | # The new ordering does not omit an indice.
212 | assert sorted(indices_layer) == list(range(M))
213 |
214 | return indices[::-1]
215 |
216 | assert (compute_perm([np.array([4,1,1,2,2,3,0,0,3]),np.array([2,1,0,1,0])])
217 | == [[3,4,0,9,1,2,5,8,6,7,10,11],[2,4,1,3,0,5],[0,1,2]])
218 |
219 | def perm_data(x, indices):
220 | """
221 | Permute data matrix, i.e. exchange node ids,
222 | so that binary unions form the clustering tree.
223 | """
224 | if indices is None:
225 | return x
226 |
227 | N, M = x.shape
228 | Mnew = len(indices)
229 | assert Mnew >= M
230 | xnew = np.empty((N, Mnew))
231 | for i,j in enumerate(indices):
232 | # Existing vertex, i.e. real data.
233 | if j < M:
234 | xnew[:,i] = x[:,j]
235 | # Fake vertex because of singeltons.
236 | # They will stay 0 so that max pooling chooses the singelton.
237 | # Or -infty ?
238 | else:
239 | xnew[:,i] = np.zeros(N)
240 | return xnew
241 |
242 | def perm_adjacency(A, indices):
243 | """
244 | Permute adjacency matrix, i.e. exchange node ids,
245 | so that binary unions form the clustering tree.
246 | """
247 | if indices is None:
248 | return A
249 |
250 | M, M = A.shape
251 | Mnew = len(indices)
252 | assert Mnew >= M
253 | A = A.tocoo()
254 |
255 | # Add Mnew - M isolated vertices.
256 | if Mnew > M:
257 | rows = scipy.sparse.coo_matrix((Mnew-M, M), dtype=np.float32)
258 | cols = scipy.sparse.coo_matrix((Mnew, Mnew-M), dtype=np.float32)
259 | A = scipy.sparse.vstack([A, rows])
260 | A = scipy.sparse.hstack([A, cols])
261 |
262 | # Permute the rows and the columns.
263 | perm = np.argsort(indices)
264 | A.row = np.array(perm)[A.row]
265 | A.col = np.array(perm)[A.col]
266 |
267 | # assert np.abs(A - A.T).mean() < 1e-9
268 | assert type(A) is scipy.sparse.coo.coo_matrix
269 | return A
270 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import gensim
2 | import sklearn, sklearn.datasets
3 | import sklearn.naive_bayes, sklearn.linear_model, sklearn.svm, sklearn.neighbors, sklearn.ensemble
4 | import matplotlib.pyplot as plt
5 | import scipy.sparse
6 | import numpy as np
7 | import time, re
8 |
9 |
10 | # Helpers to process text documents.
11 |
12 |
13 | class TextDataset(object):
14 | def clean_text(self, num='substitute'):
15 | # TODO: stemming, lemmatisation
16 | for i,doc in enumerate(self.documents):
17 | # Digits.
18 | if num is 'spell':
19 | doc = doc.replace('0', ' zero ')
20 | doc = doc.replace('1', ' one ')
21 | doc = doc.replace('2', ' two ')
22 | doc = doc.replace('3', ' three ')
23 | doc = doc.replace('4', ' four ')
24 | doc = doc.replace('5', ' five ')
25 | doc = doc.replace('6', ' six ')
26 | doc = doc.replace('7', ' seven ')
27 | doc = doc.replace('8', ' eight ')
28 | doc = doc.replace('9', ' nine ')
29 | elif num is 'substitute':
30 | # All numbers are equal. Useful for embedding (countable words) ?
31 | doc = re.sub('(\\d+)', ' NUM ', doc)
32 | elif num is 'remove':
33 | # Numbers are uninformative (they are all over the place). Useful for bag-of-words ?
34 | # But maybe some kind of documents contain more numbers, e.g. finance.
35 | # Some documents are indeed full of numbers. At least in 20NEWS.
36 | doc = re.sub('[0-9]', ' ', doc)
37 | # Remove everything except a-z characters and single space.
38 | doc = doc.replace('$', ' dollar ')
39 | doc = doc.lower()
40 | doc = re.sub('[^a-z]', ' ', doc)
41 | doc = ' '.join(doc.split()) # same as doc = re.sub('\s{2,}', ' ', doc)
42 | self.documents[i] = doc
43 |
44 | def vectorize(self, **params):
45 | # TODO: count or tf-idf. Or in normalize ?
46 | vectorizer = sklearn.feature_extraction.text.CountVectorizer(**params)
47 | self.data = vectorizer.fit_transform(self.documents)
48 | self.vocab = vectorizer.get_feature_names()
49 | assert len(self.vocab) == self.data.shape[1]
50 |
51 | def data_info(self, show_classes=False):
52 | N, M = self.data.shape
53 | sparsity = self.data.nnz / N / M * 100
54 | print('N = {} documents, M = {} words, sparsity={:.4f}%'.format(N, M, sparsity))
55 | if show_classes:
56 | for i in range(len(self.class_names)):
57 | num = sum(self.labels == i)
58 | print(' {:5d} documents in class {:2d} ({})'.format(num, i, self.class_names[i]))
59 |
60 | def show_document(self, i):
61 | label = self.labels[i]
62 | name = self.class_names[label]
63 | try:
64 | text = self.documents[i]
65 | wc = len(text.split())
66 | except AttributeError:
67 | text = None
68 | wc = 'N/A'
69 | print('document {}: label {} --> {}, {} words'.format(i, label, name, wc))
70 | try:
71 | vector = self.data[i,:]
72 | for j in range(vector.shape[1]):
73 | if vector[0,j] != 0:
74 | print(' {:.2f} "{}" ({})'.format(vector[0,j], self.vocab[j], j))
75 | except AttributeError:
76 | pass
77 | return text
78 |
79 | def keep_documents(self, idx):
80 | """Keep the documents given by the index, discard the others."""
81 | self.documents = [self.documents[i] for i in idx]
82 | self.labels = self.labels[idx]
83 | self.data = self.data[idx,:]
84 |
85 | def keep_words(self, idx):
86 | """Keep the documents given by the index, discard the others."""
87 | self.data = self.data[:,idx]
88 | self.vocab = [self.vocab[i] for i in idx]
89 | try:
90 | self.embeddings = self.embeddings[idx,:]
91 | except AttributeError:
92 | pass
93 |
94 | def remove_short_documents(self, nwords, vocab='selected'):
95 | """Remove a document if it contains less than nwords."""
96 | if vocab is 'selected':
97 | # Word count with selected vocabulary.
98 | wc = self.data.sum(axis=1)
99 | wc = np.squeeze(np.asarray(wc))
100 | elif vocab is 'full':
101 | # Word count with full vocabulary.
102 | wc = np.empty(len(self.documents), dtype=np.int)
103 | for i,doc in enumerate(self.documents):
104 | wc[i] = len(doc.split())
105 | idx = np.argwhere(wc >= nwords).squeeze()
106 | self.keep_documents(idx)
107 | return wc
108 |
109 | def keep_top_words(self, M, Mprint=20):
110 | """Keep in the vocaluary the M words who appear most often."""
111 | freq = self.data.sum(axis=0)
112 | freq = np.squeeze(np.asarray(freq))
113 | idx = np.argsort(freq)[::-1]
114 | idx = idx[:M]
115 | self.keep_words(idx)
116 | print('most frequent words')
117 | for i in range(Mprint):
118 | print(' {:3d}: {:10s} {:6d} counts'.format(i, self.vocab[i], freq[idx][i]))
119 | return freq[idx]
120 |
121 | def normalize(self, norm='l1'):
122 | """Normalize data to unit length."""
123 | # TODO: TF-IDF.
124 | data = self.data.astype(np.float64)
125 | self.data = sklearn.preprocessing.normalize(data, axis=1, norm=norm)
126 |
127 | def embed(self, filename=None, size=100):
128 | """Embed the vocabulary using pre-trained vectors."""
129 | if filename:
130 | model = gensim.models.Word2Vec.load_word2vec_format(filename, binary=True)
131 | size = model.vector_size
132 | else:
133 | class Sentences(object):
134 | def __init__(self, documents):
135 | self.documents = documents
136 | def __iter__(self):
137 | for document in self.documents:
138 | yield document.split()
139 | model = gensim.models.Word2Vec(Sentences(self.documents), size)
140 | self.embeddings = np.empty((len(self.vocab), size))
141 | keep = []
142 | not_found = 0
143 | for i,word in enumerate(self.vocab):
144 | try:
145 | self.embeddings[i,:] = model[word]
146 | keep.append(i)
147 | except KeyError:
148 | not_found += 1
149 | print('{} words not found in corpus'.format(not_found, i))
150 | self.keep_words(keep)
151 |
152 | class Text20News(TextDataset):
153 | def __init__(self, **params):
154 | dataset = sklearn.datasets.fetch_20newsgroups(**params)
155 | self.documents = dataset.data
156 | self.labels = dataset.target
157 | self.class_names = dataset.target_names
158 | assert max(self.labels) + 1 == len(self.class_names)
159 | N, C = len(self.documents), len(self.class_names)
160 | print('N = {} documents, C = {} classes'.format(N, C))
161 |
162 | class TextRCV1(TextDataset):
163 | def __init__(self, **params):
164 | dataset = sklearn.datasets.fetch_rcv1(**params)
165 | self.data = dataset.data
166 | self.target = dataset.target
167 | self.class_names = dataset.target_names
168 | assert len(self.class_names) == 103 # 103 categories according to LYRL2004
169 | N, C = self.target.shape
170 | assert C == len(self.class_names)
171 | print('N = {} documents, C = {} classes'.format(N, C))
172 |
173 | def remove_classes(self, keep):
174 | ## Construct a lookup table for labels.
175 | labels_row = []
176 | labels_col = []
177 | class_lookup = {}
178 | for i,name in enumerate(self.class_names):
179 | class_lookup[name] = i
180 | self.class_names = keep
181 |
182 | # Index of classes to keep.
183 | idx_keep = np.empty(len(keep))
184 | for i,cat in enumerate(keep):
185 | idx_keep[i] = class_lookup[cat]
186 | self.target = self.target[:,idx_keep]
187 | assert self.target.shape[1] == len(keep)
188 |
189 | def show_doc_per_class(self, print_=False):
190 | """Number of documents per class."""
191 | docs_per_class = np.array(self.target.astype(np.uint64).sum(axis=0)).squeeze()
192 | print('categories ({} assignments in total)'.format(docs_per_class.sum()))
193 | if print_:
194 | for i,cat in enumerate(self.class_names):
195 | print(' {:5s}: {:6d} documents'.format(cat, docs_per_class[i]))
196 | plt.figure(figsize=(17,5))
197 | plt.plot(sorted(docs_per_class[::-1]),'.')
198 |
199 | def show_classes_per_doc(self):
200 | """Number of classes per document."""
201 | classes_per_doc = np.array(self.target.sum(axis=1)).squeeze()
202 | plt.figure(figsize=(17,5))
203 | plt.plot(sorted(classes_per_doc[::-1]),'.')
204 |
205 | def select_documents(self):
206 | classes_per_doc = np.array(self.target.sum(axis=1)).squeeze()
207 | self.target = self.target[classes_per_doc==1]
208 | self.data = self.data[classes_per_doc==1, :]
209 |
210 | # Convert labels from indicator form to single value.
211 | N, C = self.target.shape
212 | target = self.target.tocoo()
213 | self.labels = target.col
214 | assert self.labels.min() == 0
215 | assert self.labels.max() == C - 1
216 |
217 | # Bruna and Dropout used 2 * 201369 = 402738 documents. Probably the difference btw v1 and v2.
218 | #return classes_per_doc
219 |
220 | ### Helpers to quantify classifier's quality.
221 |
222 |
223 | def baseline(train_data, train_labels, test_data, test_labels, omit=[]):
224 | """Train various classifiers to get a baseline."""
225 | clf, train_accuracy, test_accuracy, train_f1, test_f1, exec_time = [], [], [], [], [], []
226 | clf.append(sklearn.neighbors.KNeighborsClassifier(n_neighbors=10))
227 | clf.append(sklearn.linear_model.LogisticRegression())
228 | clf.append(sklearn.naive_bayes.BernoulliNB(alpha=.01))
229 | clf.append(sklearn.ensemble.RandomForestClassifier())
230 | clf.append(sklearn.naive_bayes.MultinomialNB(alpha=.01))
231 | clf.append(sklearn.linear_model.RidgeClassifier())
232 | clf.append(sklearn.svm.LinearSVC())
233 | for i,c in enumerate(clf):
234 | if i not in omit:
235 | t_start = time.process_time()
236 | c.fit(train_data, train_labels)
237 | train_pred = c.predict(train_data)
238 | test_pred = c.predict(test_data)
239 | train_accuracy.append('{:5.2f}'.format(100*sklearn.metrics.accuracy_score(train_labels, train_pred)))
240 | test_accuracy.append('{:5.2f}'.format(100*sklearn.metrics.accuracy_score(test_labels, test_pred)))
241 | train_f1.append('{:5.2f}'.format(100*sklearn.metrics.f1_score(train_labels, train_pred, average='weighted')))
242 | test_f1.append('{:5.2f}'.format(100*sklearn.metrics.f1_score(test_labels, test_pred, average='weighted')))
243 | exec_time.append('{:5.2f}'.format(time.process_time() - t_start))
244 | print('Train accuracy: {}'.format(' '.join(train_accuracy)))
245 | print('Test accuracy: {}'.format(' '.join(test_accuracy)))
246 | print('Train F1 (weighted): {}'.format(' '.join(train_f1)))
247 | print('Test F1 (weighted): {}'.format(' '.join(test_f1)))
248 | print('Execution time: {}'.format(' '.join(exec_time)))
249 |
250 | def grid_search(params, grid_params, train_data, train_labels, val_data,
251 | val_labels, test_data, test_labels, model):
252 | """Explore the hyper-parameter space with an exhaustive grid search."""
253 | params = params.copy()
254 | train_accuracy, test_accuracy, train_f1, test_f1 = [], [], [], []
255 | grid = sklearn.grid_search.ParameterGrid(grid_params)
256 | print('grid search: {} combinations to evaluate'.format(len(grid)))
257 | for grid_params in grid:
258 | params.update(grid_params)
259 | name = '{}'.format(grid)
260 | print('\n\n {} \n\n'.format(grid_params))
261 | m = model(params)
262 | m.fit(train_data, train_labels, val_data, val_labels)
263 | string, accuracy, f1, loss = m.evaluate(train_data, train_labels)
264 | train_accuracy.append('{:5.2f}'.format(accuracy)); train_f1.append('{:5.2f}'.format(f1))
265 | print('train {}'.format(string))
266 | string, accuracy, f1, loss = m.evaluate(test_data, test_labels)
267 | test_accuracy.append('{:5.2f}'.format(accuracy)); test_f1.append('{:5.2f}'.format(f1))
268 | print('test {}'.format(string))
269 | print('\n\n')
270 | print('Train accuracy: {}'.format(' '.join(train_accuracy)))
271 | print('Test accuracy: {}'.format(' '.join(test_accuracy)))
272 | print('Train F1 (weighted): {}'.format(' '.join(train_f1)))
273 | print('Test F1 (weighted): {}'.format(' '.join(test_f1)))
274 | for i,grid_params in enumerate(grid):
275 | print('{} --> {} {} {} {}'.format(grid_params, train_accuracy[i], test_accuracy[i], train_f1[i], test_f1[i]))
276 |
277 |
278 | class model_perf(object):
279 |
280 | def __init__(s):
281 | s.names, s.params = set(), {}
282 | s.fit_accuracies, s.fit_losses, s.fit_time = {}, {}, {}
283 | s.train_accuracy, s.train_f1, s.train_loss = {}, {}, {}
284 | s.test_accuracy, s.test_f1, s.test_loss = {}, {}, {}
285 |
286 | def test(s, model, name, params, train_data, train_labels, val_data, val_labels, test_data, test_labels):
287 | s.params[name] = params
288 | s.fit_accuracies[name], s.fit_losses[name], s.fit_time[name] = \
289 | model.fit(train_data, train_labels, val_data, val_labels)
290 | string, s.train_accuracy[name], s.train_f1[name], s.train_loss[name] = \
291 | model.evaluate(train_data, train_labels)
292 | print('train {}'.format(string))
293 | string, s.test_accuracy[name], s.test_f1[name], s.test_loss[name] = \
294 | model.evaluate(test_data, test_labels)
295 | print('test {}'.format(string))
296 | s.names.add(name)
297 |
298 | def show(s, fontsize=None):
299 | if fontsize:
300 | plt.rc('pdf', fonttype=42)
301 | plt.rc('ps', fonttype=42)
302 | plt.rc('font', size=fontsize) # controls default text sizes
303 | plt.rc('axes', titlesize=fontsize) # fontsize of the axes title
304 | plt.rc('axes', labelsize=fontsize) # fontsize of the x any y labels
305 | plt.rc('xtick', labelsize=fontsize) # fontsize of the tick labels
306 | plt.rc('ytick', labelsize=fontsize) # fontsize of the tick labels
307 | plt.rc('legend', fontsize=fontsize) # legend fontsize
308 | plt.rc('figure', titlesize=fontsize) # size of the figure title
309 | print(' accuracy F1 loss time [ms] name')
310 | print('test train test train test train')
311 | for name in sorted(s.names):
312 | print('{:5.2f} {:5.2f} {:5.2f} {:5.2f} {:.2e} {:.2e} {:3.0f} {}'.format(
313 | s.test_accuracy[name], s.train_accuracy[name],
314 | s.test_f1[name], s.train_f1[name],
315 | s.test_loss[name], s.train_loss[name], s.fit_time[name]*1000, name))
316 |
317 | fig, ax = plt.subplots(1, 2, figsize=(15, 5))
318 | for name in sorted(s.names):
319 | steps = np.arange(len(s.fit_accuracies[name])) + 1
320 | steps *= s.params[name]['eval_frequency']
321 | ax[0].plot(steps, s.fit_accuracies[name], '.-', label=name)
322 | ax[1].plot(steps, s.fit_losses[name], '.-', label=name)
323 | ax[0].set_xlim(min(steps), max(steps))
324 | ax[1].set_xlim(min(steps), max(steps))
325 | ax[0].set_xlabel('step')
326 | ax[1].set_xlabel('step')
327 | ax[0].set_ylabel('validation accuracy')
328 | ax[1].set_ylabel('training loss')
329 | ax[0].legend(loc='lower right')
330 | ax[1].legend(loc='upper right')
331 | #fig.savefig('training.pdf')
332 |
--------------------------------------------------------------------------------
/CayleyNet.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": false,
8 | "deletable": true,
9 | "editable": true
10 | },
11 | "outputs": [],
12 | "source": [
13 | "import tensorflow as tf\n",
14 | "import time, shutil\n",
15 | "import numpy as np\n",
16 | "import os, collections, sklearn\n",
17 | "import joblib\n",
18 | "\n",
19 | "import graph, coarsening\n",
20 | "import scipy.sparse as sp\n",
21 | "\n",
22 | "import matplotlib.pyplot as plt\n",
23 | "%matplotlib inline"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "deletable": true,
30 | "editable": true
31 | },
32 | "source": [
33 | "# Graph definition and coarsening"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {
40 | "collapsed": true,
41 | "deletable": true,
42 | "editable": true
43 | },
44 | "outputs": [],
45 | "source": [
46 | "#Definition of some flags useful later in the code\n",
47 | "\n",
48 | "flags = tf.app.flags\n",
49 | "FLAGS = flags.FLAGS\n",
50 | "\n",
51 | "# Graphs.\n",
52 | "flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')\n",
53 | "flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).')\n",
54 | "flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')\n",
55 | "flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')\n",
56 | "\n",
57 | "# Directories.\n",
58 | "flags.DEFINE_string('dir_data', 'data_mnist', 'Directory to store data.')"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "collapsed": false,
66 | "deletable": true,
67 | "editable": true
68 | },
69 | "outputs": [],
70 | "source": [
71 | "#Here we proceed at computing the original grid where the images live and the various coarsening that are applied\n",
72 | "#for each level\n",
73 | "\n",
74 | "def grid_graph(m):\n",
75 | " z = graph.grid(m)\n",
76 | " dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric) \n",
77 | " #dist contains the distance of the 8 nearest neighbors for each node sorted in ascending order\n",
78 | " #idx contains the indexes of the 8 nearest for each node sorted in ascending order by distance\n",
79 | "\n",
80 | " A = graph.adjacency(dist, idx)\n",
81 | " return A\n",
82 | "\n",
83 | "def coarsen(A, levels):\n",
84 | " graphs, parents = coarsening.metis(A, levels) #Coarsen a graph multiple times using the METIS algorithm. \n",
85 | " #Everything starts with a random point and then decides how to \n",
86 | " #combine the points.\n",
87 | " #Construction is done a priori, so we have one graph\n",
88 | " #for all the samples!\n",
89 | " \n",
90 | " #graphs = list of spare adjacency matrices (it contains in position \n",
91 | " # 0 the original graph)\n",
92 | " #parents = list of numpy arrays (every array in position i contains \n",
93 | " # the mapping from graph i to graph i+1, i.e. the idx of\n",
94 | " # node i in the coarsed graph) \n",
95 | " perms = coarsening.compute_perm(parents) #Return a list of indices to reorder the adjacency and data matrices so\n",
96 | " #that the union of two neighbors from layer to layer forms a binary tree.\n",
97 | " #Fake nodes are appended at the end of the current graph\n",
98 | " laplacians = []\n",
99 | " for i,A in enumerate(graphs):\n",
100 | " M, M = A.shape\n",
101 | "\n",
102 | " # We remove any possible self-connection.\n",
103 | " A = A.tocoo()\n",
104 | " A.setdiag(0)\n",
105 | "\n",
106 | " if i < levels: #if we have to pool the graph \n",
107 | " A = coarsening.perm_adjacency(A, perms[i]) #matrix A is here extended with the fakes nodes\n",
108 | " #in order to do an efficient pooling operation\n",
109 | " #in tensorflow as it was a 1D pooling\n",
110 | "\n",
111 | " A = A.tocsr()\n",
112 | " A.eliminate_zeros()\n",
113 | " Mnew, Mnew = A.shape\n",
114 | " print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))\n",
115 | "\n",
116 | " L = graph.laplacian(A, normalized=FLAGS.normalized_laplacian)\n",
117 | " laplacians.append(L)\n",
118 | " return laplacians, perms[0] if len(perms) > 0 else None\n",
119 | "\n",
120 | "t_start = time.time()\n",
121 | "\n",
122 | "np.random.seed(0)\n",
123 | "A = grid_graph(28)\n",
124 | "L, perm = coarsen(A, FLAGS.coarsening_levels)\n",
125 | "\n",
126 | "print('Execution time: {:.2f}s'.format(time.time() - t_start))\n",
127 | "\n",
128 | "graph.plot_spectrum(L)\n",
129 | "del A"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": null,
135 | "metadata": {
136 | "collapsed": false,
137 | "deletable": true,
138 | "editable": true
139 | },
140 | "outputs": [],
141 | "source": [
142 | "#Normalize Laplacian\n",
143 | "L_norm = []\n",
144 | "for k in range(len(L)):\n",
145 | " L_norm.append(L[k] - sp.eye(L[k].shape[0]))\n",
146 | "graph.plot_spectrum(L_norm, ymin=-1)"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {
152 | "deletable": true,
153 | "editable": true
154 | },
155 | "source": [
156 | "# Data loading"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": null,
162 | "metadata": {
163 | "collapsed": false,
164 | "deletable": true,
165 | "editable": true
166 | },
167 | "outputs": [],
168 | "source": [
169 | "#loading of MNIST dataset\n",
170 | "\n",
171 | "from tensorflow.examples.tutorials.mnist import input_data\n",
172 | "mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False)\n",
173 | "\n",
174 | "train_data = mnist.train.images.astype(np.float32)\n",
175 | "val_data = mnist.validation.images.astype(np.float32) #the first 5K samples of the training dataset \n",
176 | " #are used for validation\n",
177 | "test_data = mnist.test.images.astype(np.float32)\n",
178 | "train_labels = mnist.train.labels\n",
179 | "val_labels = mnist.validation.labels\n",
180 | "test_labels = mnist.test.labels\n",
181 | "\n",
182 | "t_start = time.time()\n",
183 | "train_data = coarsening.perm_data(train_data, perm)\n",
184 | "val_data = coarsening.perm_data(val_data, perm)\n",
185 | "test_data = coarsening.perm_data(test_data, perm)\n",
186 | "print('Execution time: {:.2f}s'.format(time.time() - t_start))\n",
187 | "del perm"
188 | ]
189 | },
190 | {
191 | "cell_type": "markdown",
192 | "metadata": {
193 | "deletable": true,
194 | "editable": true
195 | },
196 | "source": [
197 | "# Model definition"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {
204 | "collapsed": true,
205 | "deletable": true,
206 | "editable": true
207 | },
208 | "outputs": [],
209 | "source": [
210 | "class CayleyNet:\n",
211 | " \"\"\"\n",
212 | " The neural network model.\n",
213 | " \"\"\"\n",
214 | " \n",
215 | " #Helper functions used for constructing the model\n",
216 | " def _weight_variable(self, shape, regularization=True, name=\"\"): \n",
217 | " \"\"\"Initializer for the weights\"\"\"\n",
218 | " \n",
219 | " initial = tf.truncated_normal_initializer(0, 0.1)\n",
220 | " var = tf.get_variable('weights'+name, shape, tf.float32, initializer=initial)\n",
221 | " if regularization: #append the loss of the current variable to the regularization term \n",
222 | " self.regularizers.append(tf.nn.l2_loss(var))\n",
223 | " return var\n",
224 | " \n",
225 | " def _bias_variable(self, shape, regularization=True):\n",
226 | " \"\"\"Initializer for the bias\"\"\"\n",
227 | " \n",
228 | " initial = tf.constant_initializer(0.1)\n",
229 | " var = tf.get_variable('bias', shape, tf.float32, initializer=initial)\n",
230 | " if regularization:\n",
231 | " self.regularizers.append(tf.nn.l2_loss(var))\n",
232 | " return var\n",
233 | " \n",
234 | " def _h_variable(self, shape, regularization=False, name=''):\n",
235 | " \"\"\"Initializer for the zoom parameter h\"\"\"\n",
236 | " \n",
237 | " initial = tf.random_uniform_initializer()\n",
238 | " var = tf.get_variable('h'+name, shape, tf.float32, initializer=initial)\n",
239 | " if regularization:\n",
240 | " self.regularizers.append(tf.nn.l2_loss(var))\n",
241 | " return var\n",
242 | "\n",
243 | " def frobenius_norm(self, tensor): \n",
244 | " \"\"\"Computes the frobenius norm for a given laplacian\"\"\"\n",
245 | " \n",
246 | " square_tensor = tf.square(tensor)\n",
247 | " tensor_sum = tf.reduce_sum(square_tensor)\n",
248 | " frobenius_norm = tf.sqrt(tensor_sum)\n",
249 | " return frobenius_norm\n",
250 | " \n",
251 | " def compute_sparse_D_inv_indices(self, M):\n",
252 | " \"\"\"Computes the indices required for constructing a sparse version of D^-1.\"\"\"\n",
253 | " \n",
254 | " idx_main_diag = np.tile(np.expand_dims(np.arange(0, 2*M),1), [1, 2])\n",
255 | " idx_diag_ur = np.concatenate([np.expand_dims(np.arange(0, M),1), np.expand_dims(np.arange(0, M)+M,1)], 1)\n",
256 | " idx_diag_ll = np.concatenate([np.expand_dims(np.arange(0, M)+M,1), np.expand_dims(np.arange(0, M),1)], 1)\n",
257 | " idx = np.concatenate([idx_main_diag, idx_diag_ur, idx_diag_ll], 0)\n",
258 | " return idx \n",
259 | " \n",
260 | " def compute_sparse_R_indices(self, L_off_diag, M):\n",
261 | " \"\"\"Computes the indices required for constructing a sparse version of R.\"\"\"\n",
262 | " \n",
263 | " idx_L = np.asarray(np.where(L_off_diag)).T\n",
264 | " idx_L_sh = idx_L + np.expand_dims(np.asarray([M,M]),0)\n",
265 | " idx = np.concatenate([idx_L, idx_L_sh])\n",
266 | " return idx\n",
267 | " \n",
268 | " def compute_sparse_numerator_projection_indices(self, L, M):\n",
269 | " \"\"\"Computes the indices required for constructing the numerator projection sparse matrix.\"\"\"\n",
270 | " \n",
271 | " idx_L = np.asarray(np.where(L)).T\n",
272 | " idx_L_sh = idx_L + np.expand_dims(np.asarray([M,M]),0)\n",
273 | " idx_diag_ur = np.concatenate([np.expand_dims(np.arange(0, M),1), np.expand_dims(np.arange(0, M)+M,1)], 1)\n",
274 | " idx_diag_ll = np.concatenate([np.expand_dims(np.arange(0, M)+M,1), np.expand_dims(np.arange(0, M),1)], 1)\n",
275 | " idx = np.concatenate([idx_L, idx_L_sh, idx_diag_ur, idx_diag_ll])\n",
276 | " return idx\n",
277 | " \n",
278 | " def cayleyConv(self, x, L_np, Fout, K): \n",
279 | " \"\"\"Applies chebyshev polynomials over the graph.\"\"\"\n",
280 | " \n",
281 | " M, Fin = x.get_shape()[1:] # M the number of samples in the images, Fin the number of features\n",
282 | " M, Fin = int(M), int(Fin)\n",
283 | " N = tf.shape(x)[0] # N is the number of images\n",
284 | " \n",
285 | " # Applies cayley transform by means of Jacobi method.\n",
286 | " diag_L_np = np.diag(L_np) # vector containing the diagonal of L\n",
287 | " L_off_diag_np = L_np - np.diag(diag_L_np) # off-diagonal entries of L \n",
288 | " \n",
289 | " list_x_pos_exp = [tf.cast(tf.expand_dims(x,0), 'complex64')] # 1 x N x M x F\n",
290 | " \n",
291 | " for iii in range(self.n_h): # for every zoom parameter we want to use (typically one).\n",
292 | " h = self._h_variable([1,1], regularization=False, name='_h%f' % iii)\n",
293 | " self.list_h.append(h)\n",
294 | " \n",
295 | " # Computes matrices required by Jacobi (https://en.wikipedia.org/wiki/Jacobi_method)\n",
296 | " \n",
297 | " # To make things more efficient we reprent a complex vector of shape M as real vector of shape 2*M\n",
298 | " # where the first M values represent real coefficients while the second M the imaginary ones.\n",
299 | " # All the matrices here defined are computed according to such notation (it allows to use sparse matrices\n",
300 | " # with TF with complex values).\n",
301 | " \n",
302 | " # ************************** COMPUTES numerator projection **************************\n",
303 | " idx = self.compute_sparse_numerator_projection_indices(L_np, M)\n",
304 | " \n",
305 | " vals_L = tf.squeeze(h*L_np[np.where(L_np)])\n",
306 | " vals = tf.concat([vals_L, vals_L, tf.ones([M,]), -tf.ones([M,])], 0)\n",
307 | " \n",
308 | " cayley_op_neg_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n",
309 | " cayley_op_neg_sp = tf.sparse_reorder(cayley_op_neg_sp)\n",
310 | " \n",
311 | " # ************************** COMPUTES D **************************\n",
312 | " D_real = tf.squeeze(h*diag_L_np)\n",
313 | " D = tf.complex(D_real, tf.ones_like(D_real))\n",
314 | " D_inv = tf.pow(D, -tf.ones_like(D)) # vector of M elements <- diagonal of D^-1\n",
315 | " \n",
316 | " idx = self.compute_sparse_D_inv_indices(M)\n",
317 | " vals = tf.concat([tf.real(D_inv), tf.real(D_inv), -tf.imag(D_inv), tf.imag(D_inv)], 0)\n",
318 | " \n",
319 | " D_inv_ext_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n",
320 | " D_inv_ext_sp = tf.sparse_reorder(D_inv_ext_sp)\n",
321 | " \n",
322 | " # ************************** COMPUTES R **************************\n",
323 | " idx = self.compute_sparse_R_indices(L_off_diag_np, M)\n",
324 | " \n",
325 | " vals_L = tf.squeeze(h*L_off_diag_np[np.where(L_off_diag_np)])\n",
326 | " vals = tf.concat([vals_L, vals_L], 0)\n",
327 | " \n",
328 | " R_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n",
329 | " R_sp = tf.sparse_reorder(R_sp)\n",
330 | " \n",
331 | " # Applies Jacobi method\n",
332 | " c_transform = tf.transpose(x, [1,0,2]) # shape = M, N, F\n",
333 | " c_transform = tf.reshape(c_transform, [M, -1]) # shape = M, N*F\n",
334 | " last_sol = tf.concat([c_transform, tf.zeros_like(c_transform)],0)\n",
335 | " for k in range(K): # for every order of our polynomial\n",
336 | " \n",
337 | " # Jacobi initialization\n",
338 | " b = tf.sparse_tensor_dense_matmul(cayley_op_neg_sp, last_sol) # shape = M, N*F\n",
339 | " a = tf.sparse_tensor_dense_matmul(D_inv_ext_sp, b) # shape = M, N*F\n",
340 | " \n",
341 | " # Jacobi iterations\n",
342 | " cond = lambda i, _: tf.less(i, self.num_jacobi_iter)\n",
343 | " body = lambda i, c_sol: [tf.add(i, 1), a - tf.sparse_tensor_dense_matmul(D_inv_ext_sp, \n",
344 | " tf.sparse_tensor_dense_matmul(R_sp, c_sol))]\n",
345 | " \n",
346 | " c_sol = tf.while_loop(cond, body, [0, a], parallel_iterations=1, swap_memory=True)\n",
347 | " c_sol = c_sol[-1]\n",
348 | " \n",
349 | " # Constructs and saves the final complex matrices\n",
350 | " c_sol_complex = tf.complex(c_sol[:M,:], c_sol[M:, :]) #M x N*F\n",
351 | " c_sol_reshaped = tf.reshape(c_sol_complex, [M, -1, Fin])\n",
352 | " c_sol_reshaped = tf.transpose(c_sol_reshaped, [1, 0, 2]) #N x M x F\n",
353 | " list_x_pos_exp.append(tf.expand_dims(c_sol_reshaped,0)) #1 x N x M x Flist_x_pos_exp\n",
354 | " \n",
355 | " last_sol = c_sol\n",
356 | " x_pos_exp = tf.concat(list_x_pos_exp, 0) # shape = n_h*K x N x M x Fin\n",
357 | " x_pos_exp = tf.transpose(x_pos_exp, [1,2,0,3]) #N x M x n_h*K x Fin\n",
358 | " x_pos_exp = tf.reshape(x_pos_exp, [N*M, -1]) #N*M x 2*K*Fin\n",
359 | " \n",
360 | " real_conv_weights = self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_real')#tf.ones([Fin*(self.n_h*K+1), Fout])#self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_real')\n",
361 | " imag_conv_weights = self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_imag')#tf.ones([Fin*(self.n_h*K+1), Fout])#self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_imag')\n",
362 | " \n",
363 | " W_pos_exp = tf.complex(real_conv_weights, -imag_conv_weights)\n",
364 | " \n",
365 | " x_pos_exp_filt = tf.matmul(x_pos_exp, W_pos_exp)\n",
366 | " \n",
367 | " x_filt = 2*tf.real(x_pos_exp_filt)\n",
368 | " return tf.reshape(x_filt, [N, M, Fout])\n",
369 | "\n",
370 | "\n",
371 | " def b1relu(self, x): #sums a bias and applies relu\n",
372 | " \"\"\"Bias and ReLU. One bias per filter.\"\"\"\n",
373 | " N, M, F = x.get_shape()\n",
374 | " b = self._bias_variable([1, 1, int(F)], regularization=False)\n",
375 | " return tf.nn.relu(x + b) #add the bias to the convolutive layer\n",
376 | "\n",
377 | "\n",
378 | " def mpool1(self, x, p): #efficient pooling realized thanks to the reordering of the laplacians we have done a priori\n",
379 | " \"\"\"Max pooling of size p. Should be a power of 2.\"\"\"\n",
380 | " if p > 1:\n",
381 | " x = tf.expand_dims(x, 3) # N x M x F x 1\n",
382 | " x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')\n",
383 | " return tf.squeeze(x, [3]) # N x M/p x F\n",
384 | " else:\n",
385 | " return x\n",
386 | " \n",
387 | "\n",
388 | " def b1relu(self, x): #sums a bias and applies relu\n",
389 | " \"\"\"Bias and ReLU. One bias per filter.\"\"\"\n",
390 | " N, M, F = x.get_shape()\n",
391 | " b = self._bias_variable([1, 1, int(F)], regularization=False)\n",
392 | " return tf.nn.relu(x + b) #add the bias to the convolutive layer\n",
393 | "\n",
394 | "\n",
395 | " def mpool1(self, x, p): #efficient pooling realized thanks to the reordering of the laplacians we have done a priori\n",
396 | " \"\"\"Max pooling of size p. Should be a power of 2.\"\"\"\n",
397 | " if p > 1:\n",
398 | " x = tf.expand_dims(x, 3) # N x M x F x 1\n",
399 | " x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')\n",
400 | " return tf.squeeze(x, [3]) # N x M/p x F\n",
401 | " else:\n",
402 | " return x\n",
403 | "\n",
404 | " def fc(self, x, Mout, relu=True):\n",
405 | " \"\"\"Fully connected layer with Mout features.\"\"\"\n",
406 | " N, Min = x.get_shape()\n",
407 | " W = self._weight_variable([int(Min), Mout], regularization=True)\n",
408 | " b = self._bias_variable([Mout], regularization=True)\n",
409 | " x = tf.matmul(x, W) + b\n",
410 | " return tf.nn.relu(x) if relu else x\n",
411 | " \n",
412 | " #function used for extracting the result of our model\n",
413 | " def _inference(self, x, dropout): #definition of the model\n",
414 | " \n",
415 | " # Graph convolutional layers.\n",
416 | " x = tf.expand_dims(x, 2) # N x M x F=1\n",
417 | " j = 0\n",
418 | " self.list_h = list()\n",
419 | " for i in range(len(self.p)):\n",
420 | " with tf.variable_scope('cgconv{}'.format(i+1)):\n",
421 | " with tf.name_scope('filter'):\n",
422 | " x = self.cayleyConv(x, self.L_np[i*2], self.F[i], self.K[i])\n",
423 | " if (i==0):\n",
424 | " self.debug = x\n",
425 | " with tf.name_scope('bias_relu'):\n",
426 | " x = self.b1relu(tf.cast(tf.real(x), 'float32'))\n",
427 | " with tf.name_scope('pooling'):\n",
428 | " x = self.mpool1(x, self.p[i])\n",
429 | " \n",
430 | " j += int(np.log2(self.p[i])) if self.p[i] > 1 else 0\n",
431 | " \n",
432 | " # Fully connected hidden layers.\n",
433 | " _, M, F = x.get_shape()\n",
434 | " x = tf.reshape(x, [-1, int(M*F)]) # N x M\n",
435 | " for i,M in enumerate(self.M[:-1]): #apply a fully connected layer for each layer defined in M\n",
436 | " #(we discard the last value in M since it contains the number of classes we have\n",
437 | " #to predict)\n",
438 | " with tf.variable_scope('fc{}'.format(i+1)):\n",
439 | " x = self.fc(x, M)\n",
440 | " x = tf.nn.dropout(x, dropout)\n",
441 | " \n",
442 | " # Logits linear layer, i.e. softmax without normalization.\n",
443 | " with tf.variable_scope('logits'):\n",
444 | " x = self.fc(x, self.M[-1], relu=False)\n",
445 | " return x\n",
446 | " \n",
447 | " def __init__(self, p, K, F, M, M_0, batch_size, num_jacobi_iter, L,\n",
448 | " decay_steps, decay_rate, learning_rate=1e-4, momentum=0.9, regularization=5e-4, clip_norm=1e1,\n",
449 | " idx_gpu = '/gpu:0'):\n",
450 | " self.regularizers = list() #list of regularization l2 loss for multiple variables\n",
451 | " self.n_h = 1\n",
452 | " self.num_jacobi_iter = num_jacobi_iter\n",
453 | " self.p = p #dimensions of the pooling layers\n",
454 | " self.K = K #List of polynomial orders, i.e. filter sizes or number of hops\n",
455 | " self.F = F #Number of features of convolutional layers\n",
456 | " \n",
457 | " self.M = M #Number of neurons in fully connected layers\n",
458 | " \n",
459 | " self.M_0 = M_0 #number of elements in the first graph \n",
460 | " \n",
461 | " self.batch_size = batch_size\n",
462 | " \n",
463 | " #definition of some learning parameters\n",
464 | " self.decay_steps = decay_steps\n",
465 | " self.decay_rate = decay_rate\n",
466 | " self.learning_rate = learning_rate\n",
467 | " self.regularization = regularization\n",
468 | " \n",
469 | " with tf.Graph().as_default() as g:\n",
470 | " self.graph = g\n",
471 | " tf.set_random_seed(0)\n",
472 | " with tf.device(idx_gpu):\n",
473 | " #definition of placeholders\n",
474 | " self.L_np = [c_L.toarray().astype('float32') for c_L in L]\n",
475 | " self.ph_data = tf.placeholder(tf.float32, (self.batch_size, M_0), 'data')\n",
476 | " self.ph_labels = tf.placeholder(tf.int32, (self.batch_size), 'labels')\n",
477 | " self.ph_dropout = tf.placeholder(tf.float32, (), 'dropout')\n",
478 | " \n",
479 | " #Model construction\n",
480 | " self.logits = self._inference(self.ph_data, self.ph_dropout)\n",
481 | " \n",
482 | " #Definition of the loss function\n",
483 | " with tf.name_scope('loss'):\n",
484 | " self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.ph_labels)\n",
485 | " self.cross_entropy = tf.reduce_mean(self.cross_entropy)\n",
486 | " with tf.name_scope('regularization'):\n",
487 | " self.regularization *= tf.add_n(self.regularizers)\n",
488 | " self.loss = self.cross_entropy + self.regularization\n",
489 | " \n",
490 | " #Solver Definition\n",
491 | " with tf.name_scope('training'):\n",
492 | " # Learning rate.\n",
493 | " global_step = tf.Variable(0, name='global_step', trainable=False) #used for counting how many iterations we have done\n",
494 | " if decay_rate != 1: #applies an exponential decay of the lr wrt the number of iterations done\n",
495 | " learning_rate = tf.train.exponential_decay(\n",
496 | " learning_rate, global_step, decay_steps, decay_rate, staircase=True)\n",
497 | " # Optimizer.\n",
498 | " if momentum == 0:\n",
499 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
500 | " else: #applies momentum for increasing the robustness of the gradient \n",
501 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
502 | " #grads = optimizer.compute_gradients(self.loss)\n",
503 | " tvars = tf.trainable_variables()\n",
504 | " #grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clip_norm)\n",
505 | " grads, variables = zip(*optimizer.compute_gradients(self.loss))\n",
506 | " grads, _ = tf.clip_by_global_norm(grads, clip_norm)\n",
507 | " self.op_gradients = optimizer.apply_gradients(zip(grads, variables), \n",
508 | " global_step=global_step)\n",
509 | " \n",
510 | " #Computation of the norm gradients (useful for debugging)\n",
511 | " self.var_grad = tf.gradients(self.loss, tf.trainable_variables())\n",
512 | " self.norm_grad = self.frobenius_norm(tf.concat([tf.reshape(g, [-1]) for g in self.var_grad], 0))\n",
513 | "\n",
514 | " #Extraction of the predictions and computation of accuracy\n",
515 | " self.predictions = tf.cast(tf.argmax(self.logits, dimension=1), tf.int32)\n",
516 | " self.accuracy = 100 * tf.contrib.metrics.accuracy(self.predictions, self.ph_labels)\n",
517 | " \n",
518 | " # Create a session for running Ops on the Graph.\n",
519 | " config = tf.ConfigProto(allow_soft_placement = True)\n",
520 | " config.gpu_options.allow_growth = True\n",
521 | " self.session = tf.Session(config=config)\n",
522 | "\n",
523 | " # Run the Op to initialize the variables.\n",
524 | " init = tf.global_variables_initializer()\n",
525 | " self.session.run(init)"
526 | ]
527 | },
528 | {
529 | "cell_type": "markdown",
530 | "metadata": {
531 | "deletable": true,
532 | "editable": true
533 | },
534 | "source": [
535 | "# Training & testing"
536 | ]
537 | },
538 | {
539 | "cell_type": "code",
540 | "execution_count": null,
541 | "metadata": {
542 | "collapsed": true,
543 | "deletable": true,
544 | "editable": true
545 | },
546 | "outputs": [],
547 | "source": [
548 | "#Convolutional parameters\n",
549 | "p = [4, 4] # Dimensions of the pooling layers\n",
550 | "K = [12, 12] # List of polynomial orders, i.e. filter sizes or number of hops\n",
551 | "F = [32, 64] # Number of features of convolutional layers\n",
552 | "\n",
553 | "#FC parameters\n",
554 | "C = max(train_labels) + 1 # Number of classes we have\n",
555 | "M = [512, C] # Number of neurons in fully connected layers\n",
556 | "\n",
557 | "#Solver parameters\n",
558 | "batch_size = 100\n",
559 | "decay_steps = train_data.shape[0] / batch_size # number of steps to do before decreasing the learning rate\n",
560 | "decay_rate = 0.95\n",
561 | "learning_rate = 0.01\n",
562 | "momentum = 0.9\n",
563 | "regularization = 5e-4\n",
564 | "\n",
565 | "# Definition of keep probabilities for dropout layers\n",
566 | "dropout_training = 0.5\n",
567 | "dropout_val_test = 1.0\n",
568 | "\n",
569 | "num_jacobi_iter = 10"
570 | ]
571 | },
572 | {
573 | "cell_type": "code",
574 | "execution_count": null,
575 | "metadata": {
576 | "collapsed": false,
577 | "deletable": true,
578 | "editable": true
579 | },
580 | "outputs": [],
581 | "source": [
582 | "# Construction of the learning obj\n",
583 | "M_0 = L[0].shape[0] # number of elements in the first graph\n",
584 | "learning_obj = CayleyNet(p, K, F, M, M_0, batch_size, num_jacobi_iter, L,\n",
585 | " decay_steps, decay_rate,\n",
586 | " learning_rate=learning_rate, regularization=regularization,\n",
587 | " momentum=momentum)#, clip_norm=100)\n",
588 | "\n",
589 | "# definition of overall number of training iterations and validation frequency\n",
590 | "num_iter_val = 600\n",
591 | "num_total_iter_training = 21000\n",
592 | "\n",
593 | "num_iter = 0\n",
594 | "\n",
595 | "list_training_loss = list()\n",
596 | "list_training_norm_grad = list()\n",
597 | "list_val_accuracy = list()"
598 | ]
599 | },
600 | {
601 | "cell_type": "code",
602 | "execution_count": null,
603 | "metadata": {
604 | "collapsed": false,
605 | "deletable": true,
606 | "editable": true,
607 | "scrolled": true
608 | },
609 | "outputs": [],
610 | "source": [
611 | "#training and validation\n",
612 | "indices = collections.deque() # queue containing a permutation of the training indexes\n",
613 | "for k in range(num_iter, num_total_iter_training):\n",
614 | "\n",
615 | " #Construction of the training batch\n",
616 | " if len(indices) < batch_size: # Be sure to have used all the samples before using one a second time.\n",
617 | " indices.extend(np.random.permutation(train_data.shape[0])) #reinitialize the queue of indices\n",
618 | " idx = [indices.popleft() for i in range(batch_size)] #extract the current batch of samples\n",
619 | "\n",
620 | " #data extraction\n",
621 | " batch_data, batch_labels = train_data[idx,:], train_labels[idx] \n",
622 | "\n",
623 | " feed_dict = {learning_obj.ph_data: batch_data, \n",
624 | " learning_obj.ph_labels: batch_labels, \n",
625 | " learning_obj.ph_dropout: dropout_training}\n",
626 | "\n",
627 | " #Training\n",
628 | " tic = time.time()\n",
629 | " _, current_training_loss, norm_grad = learning_obj.session.run([learning_obj.op_gradients, \n",
630 | " learning_obj.loss, \n",
631 | " learning_obj.norm_grad], feed_dict = feed_dict) \n",
632 | " training_time = time.time() - tic\n",
633 | "\n",
634 | " list_training_loss.append(current_training_loss)\n",
635 | " list_training_norm_grad.append(norm_grad)\n",
636 | " if (np.mod(num_iter, num_iter_val)==0): #validation\n",
637 | " msg = \"[TRN] iter = %03i, cost = %3.2e, |grad| = %.2e (%3.2es)\" \\\n",
638 | " % (num_iter, list_training_loss[-1], list_training_norm_grad[-1], training_time)\n",
639 | " print msg\n",
640 | "\n",
641 | " #Validation Code\n",
642 | " tic = time.time()\n",
643 | " val_accuracy = 0\n",
644 | " for begin in range(0, val_data.shape[0], batch_size):\n",
645 | " end = begin + batch_size\n",
646 | " end = min([end, val_data.shape[0]])\n",
647 | "\n",
648 | " #data extraction\n",
649 | " batch_data = np.zeros((end-begin, val_data.shape[1]))\n",
650 | " batch_data = val_data[begin:end,:]\n",
651 | " batch_labels = np.zeros(batch_size)\n",
652 | " batch_labels[:end-begin] = val_labels[begin:end]\n",
653 | "\n",
654 | " feed_dict = {learning_obj.ph_data: batch_data, \n",
655 | " learning_obj.ph_labels: batch_labels,\n",
656 | " learning_obj.ph_dropout: dropout_val_test}\n",
657 | "\n",
658 | " batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)\n",
659 | " val_accuracy += batch_accuracy*batch_data.shape[0]\n",
660 | " val_accuracy = val_accuracy/val_data.shape[0]\n",
661 | "\n",
662 | " val_time = time.time() - tic\n",
663 | " msg = \"[VAL] iter = %03i, acc = %4.2f (%3.2es)\" % (num_iter, val_accuracy, val_time)\n",
664 | " print msg\n",
665 | " num_iter += 1"
666 | ]
667 | },
668 | {
669 | "cell_type": "code",
670 | "execution_count": null,
671 | "metadata": {
672 | "collapsed": false,
673 | "deletable": true,
674 | "editable": true
675 | },
676 | "outputs": [],
677 | "source": [
678 | "#Test code\n",
679 | "tic = time.time()\n",
680 | "test_accuracy = 0\n",
681 | "for begin in range(0, test_data.shape[0], batch_size):\n",
682 | " end = begin + batch_size\n",
683 | " end = min([end, test_data.shape[0]])\n",
684 | "\n",
685 | " batch_data = np.zeros((end-begin, test_data.shape[1]))\n",
686 | " batch_data = test_data[begin:end,:]\n",
687 | "\n",
688 | " feed_dict = {learning_obj.ph_data: batch_data, learning_obj.ph_dropout: 1}\n",
689 | "\n",
690 | " batch_labels = np.zeros(batch_size)\n",
691 | " batch_labels[:end-begin] = test_labels[begin:end]\n",
692 | " feed_dict[learning_obj.ph_labels] = batch_labels\n",
693 | "\n",
694 | " batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)\n",
695 | " test_accuracy += batch_accuracy*batch_data.shape[0]\n",
696 | "test_accuracy = test_accuracy/test_data.shape[0]\n",
697 | "test_time = time.time() - tic\n",
698 | "msg = \"[TST] iter = %03i, acc = %4.2f (%3.2es)\" % (num_iter, test_accuracy, test_time)\n",
699 | "print msg"
700 | ]
701 | },
702 | {
703 | "cell_type": "code",
704 | "execution_count": null,
705 | "metadata": {
706 | "collapsed": true,
707 | "deletable": true,
708 | "editable": true
709 | },
710 | "outputs": [],
711 | "source": []
712 | }
713 | ],
714 | "metadata": {
715 | "kernelspec": {
716 | "display_name": "Python 2",
717 | "language": "python",
718 | "name": "python2"
719 | },
720 | "language_info": {
721 | "codemirror_mode": {
722 | "name": "ipython",
723 | "version": 2
724 | },
725 | "file_extension": ".py",
726 | "mimetype": "text/x-python",
727 | "name": "python",
728 | "nbconvert_exporter": "python",
729 | "pygments_lexer": "ipython2",
730 | "version": "2.7.6"
731 | }
732 | },
733 | "nbformat": 4,
734 | "nbformat_minor": 1
735 | }
736 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/CayleyNet-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": false,
8 | "deletable": true,
9 | "editable": true
10 | },
11 | "outputs": [],
12 | "source": [
13 | "import tensorflow as tf\n",
14 | "import time, shutil\n",
15 | "import numpy as np\n",
16 | "import os, collections, sklearn\n",
17 | "import joblib\n",
18 | "\n",
19 | "import graph, coarsening\n",
20 | "import scipy.sparse as sp\n",
21 | "\n",
22 | "import matplotlib.pyplot as plt\n",
23 | "%matplotlib inline"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "deletable": true,
30 | "editable": true
31 | },
32 | "source": [
33 | "# Graph definition and coarsening"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {
40 | "collapsed": true,
41 | "deletable": true,
42 | "editable": true
43 | },
44 | "outputs": [],
45 | "source": [
46 | "#Definition of some flags useful later in the code\n",
47 | "\n",
48 | "flags = tf.app.flags\n",
49 | "FLAGS = flags.FLAGS\n",
50 | "\n",
51 | "# Graphs.\n",
52 | "flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')\n",
53 | "flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).')\n",
54 | "flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')\n",
55 | "flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')\n",
56 | "\n",
57 | "# Directories.\n",
58 | "flags.DEFINE_string('dir_data', 'data_mnist', 'Directory to store data.')"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {
65 | "collapsed": false,
66 | "deletable": true,
67 | "editable": true
68 | },
69 | "outputs": [],
70 | "source": [
71 | "#Here we proceed at computing the original grid where the images live and the various coarsening that are applied\n",
72 | "#for each level\n",
73 | "\n",
74 | "def grid_graph(m):\n",
75 | " z = graph.grid(m)\n",
76 | " dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric) \n",
77 | " #dist contains the distance of the 8 nearest neighbors for each node sorted in ascending order\n",
78 | " #idx contains the indexes of the 8 nearest for each node sorted in ascending order by distance\n",
79 | "\n",
80 | " A = graph.adjacency(dist, idx)\n",
81 | " return A\n",
82 | "\n",
83 | "def coarsen(A, levels):\n",
84 | " graphs, parents = coarsening.metis(A, levels) #Coarsen a graph multiple times using the METIS algorithm. \n",
85 | " #Everything starts with a random point and then decides how to \n",
86 | " #combine the points.\n",
87 | " #Construction is done a priori, so we have one graph\n",
88 | " #for all the samples!\n",
89 | " \n",
90 | " #graphs = list of spare adjacency matrices (it contains in position \n",
91 | " # 0 the original graph)\n",
92 | " #parents = list of numpy arrays (every array in position i contains \n",
93 | " # the mapping from graph i to graph i+1, i.e. the idx of\n",
94 | " # node i in the coarsed graph) \n",
95 | " perms = coarsening.compute_perm(parents) #Return a list of indices to reorder the adjacency and data matrices so\n",
96 | " #that the union of two neighbors from layer to layer forms a binary tree.\n",
97 | " #Fake nodes are appended at the end of the current graph\n",
98 | " laplacians = []\n",
99 | " for i,A in enumerate(graphs):\n",
100 | " M, M = A.shape\n",
101 | "\n",
102 | " # We remove any possible self-connection.\n",
103 | " A = A.tocoo()\n",
104 | " A.setdiag(0)\n",
105 | "\n",
106 | " if i < levels: #if we have to pool the graph \n",
107 | " A = coarsening.perm_adjacency(A, perms[i]) #matrix A is here extended with the fakes nodes\n",
108 | " #in order to do an efficient pooling operation\n",
109 | " #in tensorflow as it was a 1D pooling\n",
110 | "\n",
111 | " A = A.tocsr()\n",
112 | " A.eliminate_zeros()\n",
113 | " Mnew, Mnew = A.shape\n",
114 | " print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))\n",
115 | "\n",
116 | " L = graph.laplacian(A, normalized=FLAGS.normalized_laplacian)\n",
117 | " laplacians.append(L)\n",
118 | " return laplacians, perms[0] if len(perms) > 0 else None\n",
119 | "\n",
120 | "t_start = time.time()\n",
121 | "\n",
122 | "np.random.seed(0)\n",
123 | "A = grid_graph(28)\n",
124 | "L, perm = coarsen(A, FLAGS.coarsening_levels)\n",
125 | "\n",
126 | "print('Execution time: {:.2f}s'.format(time.time() - t_start))\n",
127 | "\n",
128 | "graph.plot_spectrum(L)\n",
129 | "del A"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": null,
135 | "metadata": {
136 | "collapsed": false,
137 | "deletable": true,
138 | "editable": true
139 | },
140 | "outputs": [],
141 | "source": [
142 | "#Normalize Laplacian\n",
143 | "L_norm = []\n",
144 | "for k in range(len(L)):\n",
145 | " L_norm.append(L[k] - sp.eye(L[k].shape[0]))\n",
146 | "graph.plot_spectrum(L_norm, ymin=-1)"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {
152 | "deletable": true,
153 | "editable": true
154 | },
155 | "source": [
156 | "# Data loading"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": null,
162 | "metadata": {
163 | "collapsed": false,
164 | "deletable": true,
165 | "editable": true
166 | },
167 | "outputs": [],
168 | "source": [
169 | "#loading of MNIST dataset\n",
170 | "\n",
171 | "from tensorflow.examples.tutorials.mnist import input_data\n",
172 | "mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False)\n",
173 | "\n",
174 | "train_data = mnist.train.images.astype(np.float32)\n",
175 | "val_data = mnist.validation.images.astype(np.float32) #the first 5K samples of the training dataset \n",
176 | " #are used for validation\n",
177 | "test_data = mnist.test.images.astype(np.float32)\n",
178 | "train_labels = mnist.train.labels\n",
179 | "val_labels = mnist.validation.labels\n",
180 | "test_labels = mnist.test.labels\n",
181 | "\n",
182 | "t_start = time.time()\n",
183 | "train_data = coarsening.perm_data(train_data, perm)\n",
184 | "val_data = coarsening.perm_data(val_data, perm)\n",
185 | "test_data = coarsening.perm_data(test_data, perm)\n",
186 | "print('Execution time: {:.2f}s'.format(time.time() - t_start))\n",
187 | "del perm"
188 | ]
189 | },
190 | {
191 | "cell_type": "markdown",
192 | "metadata": {
193 | "deletable": true,
194 | "editable": true
195 | },
196 | "source": [
197 | "# Model definition"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {
204 | "collapsed": true,
205 | "deletable": true,
206 | "editable": true
207 | },
208 | "outputs": [],
209 | "source": [
210 | "class CayleyNet:\n",
211 | " \"\"\"\n",
212 | " The neural network model.\n",
213 | " \"\"\"\n",
214 | " \n",
215 | " #Helper functions used for constructing the model\n",
216 | " def _weight_variable(self, shape, regularization=True, name=\"\"): \n",
217 | " \"\"\"Initializer for the weights\"\"\"\n",
218 | " \n",
219 | " initial = tf.truncated_normal_initializer(0, 0.1)\n",
220 | " var = tf.get_variable('weights'+name, shape, tf.float32, initializer=initial)\n",
221 | " if regularization: #append the loss of the current variable to the regularization term \n",
222 | " self.regularizers.append(tf.nn.l2_loss(var))\n",
223 | " return var\n",
224 | " \n",
225 | " def _bias_variable(self, shape, regularization=True):\n",
226 | " \"\"\"Initializer for the bias\"\"\"\n",
227 | " \n",
228 | " initial = tf.constant_initializer(0.1)\n",
229 | " var = tf.get_variable('bias', shape, tf.float32, initializer=initial)\n",
230 | " if regularization:\n",
231 | " self.regularizers.append(tf.nn.l2_loss(var))\n",
232 | " return var\n",
233 | " \n",
234 | " def _h_variable(self, shape, regularization=False, name=''):\n",
235 | " \"\"\"Initializer for the zoom parameter h\"\"\"\n",
236 | " \n",
237 | " initial = tf.random_uniform_initializer()\n",
238 | " var = tf.get_variable('h'+name, shape, tf.float32, initializer=initial)\n",
239 | " if regularization:\n",
240 | " self.regularizers.append(tf.nn.l2_loss(var))\n",
241 | " return var\n",
242 | "\n",
243 | " def frobenius_norm(self, tensor): \n",
244 | " \"\"\"Computes the frobenius norm for a given laplacian\"\"\"\n",
245 | " \n",
246 | " square_tensor = tf.square(tensor)\n",
247 | " tensor_sum = tf.reduce_sum(square_tensor)\n",
248 | " frobenius_norm = tf.sqrt(tensor_sum)\n",
249 | " return frobenius_norm\n",
250 | " \n",
251 | " def compute_sparse_D_inv_indices(self, M):\n",
252 | " \"\"\"Computes the indices required for constructing a sparse version of D^-1.\"\"\"\n",
253 | " \n",
254 | " idx_main_diag = np.tile(np.expand_dims(np.arange(0, 2*M),1), [1, 2])\n",
255 | " idx_diag_ur = np.concatenate([np.expand_dims(np.arange(0, M),1), np.expand_dims(np.arange(0, M)+M,1)], 1)\n",
256 | " idx_diag_ll = np.concatenate([np.expand_dims(np.arange(0, M)+M,1), np.expand_dims(np.arange(0, M),1)], 1)\n",
257 | " idx = np.concatenate([idx_main_diag, idx_diag_ur, idx_diag_ll], 0)\n",
258 | " return idx \n",
259 | " \n",
260 | " def compute_sparse_R_indices(self, L_off_diag, M):\n",
261 | " \"\"\"Computes the indices required for constructing a sparse version of R.\"\"\"\n",
262 | " \n",
263 | " idx_L = np.asarray(np.where(L_off_diag)).T\n",
264 | " idx_L_sh = idx_L + np.expand_dims(np.asarray([M,M]),0)\n",
265 | " idx = np.concatenate([idx_L, idx_L_sh])\n",
266 | " return idx\n",
267 | " \n",
268 | " def compute_sparse_numerator_projection_indices(self, L, M):\n",
269 | " \"\"\"Computes the indices required for constructing the numerator projection sparse matrix.\"\"\"\n",
270 | " \n",
271 | " idx_L = np.asarray(np.where(L)).T\n",
272 | " idx_L_sh = idx_L + np.expand_dims(np.asarray([M,M]),0)\n",
273 | " idx_diag_ur = np.concatenate([np.expand_dims(np.arange(0, M),1), np.expand_dims(np.arange(0, M)+M,1)], 1)\n",
274 | " idx_diag_ll = np.concatenate([np.expand_dims(np.arange(0, M)+M,1), np.expand_dims(np.arange(0, M),1)], 1)\n",
275 | " idx = np.concatenate([idx_L, idx_L_sh, idx_diag_ur, idx_diag_ll])\n",
276 | " return idx\n",
277 | " \n",
278 | " def cayleyConv(self, x, L_np, Fout, K): \n",
279 | " \"\"\"Applies chebyshev polynomials over the graph.\"\"\"\n",
280 | " \n",
281 | " M, Fin = x.get_shape()[1:] # M the number of samples in the images, Fin the number of features\n",
282 | " M, Fin = int(M), int(Fin)\n",
283 | " N = tf.shape(x)[0] # N is the number of images\n",
284 | " \n",
285 | " # Applies cayley transform by means of Jacobi method.\n",
286 | " diag_L_np = np.diag(L_np) # vector containing the diagonal of L\n",
287 | " L_off_diag_np = L_np - np.diag(diag_L_np) # off-diagonal entries of L \n",
288 | " \n",
289 | " list_x_pos_exp = [tf.cast(tf.expand_dims(x,0), 'complex64')] # 1 x N x M x F\n",
290 | " \n",
291 | " for iii in range(self.n_h): # for every zoom parameter we want to use (typically one).\n",
292 | " h = self._h_variable([1,1], regularization=False, name='_h%f' % iii)\n",
293 | " self.list_h.append(h)\n",
294 | " \n",
295 | " # Computes matrices required by Jacobi (https://en.wikipedia.org/wiki/Jacobi_method)\n",
296 | " \n",
297 | " # To make things more efficient we reprent a complex vector of shape M as real vector of shape 2*M\n",
298 | " # where the first M values represent real coefficients while the second M the imaginary ones.\n",
299 | " # All the matrices here defined are computed according to such notation (it allows to use sparse matrices\n",
300 | " # with TF with complex values).\n",
301 | " \n",
302 | " # ************************** COMPUTES numerator projection **************************\n",
303 | " idx = self.compute_sparse_numerator_projection_indices(L_np, M)\n",
304 | " \n",
305 | " vals_L = tf.squeeze(h*L_np[np.where(L_np)])\n",
306 | " vals = tf.concat([vals_L, vals_L, tf.ones([M,]), -tf.ones([M,])], 0)\n",
307 | " \n",
308 | " cayley_op_neg_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n",
309 | " cayley_op_neg_sp = tf.sparse_reorder(cayley_op_neg_sp)\n",
310 | " \n",
311 | " # ************************** COMPUTES D **************************\n",
312 | " D_real = tf.squeeze(h*diag_L_np)\n",
313 | " D = tf.complex(D_real, tf.ones_like(D_real))\n",
314 | " D_inv = tf.pow(D, -tf.ones_like(D)) # vector of M elements <- diagonal of D^-1\n",
315 | " \n",
316 | " idx = self.compute_sparse_D_inv_indices(M)\n",
317 | " vals = tf.concat([tf.real(D_inv), tf.real(D_inv), -tf.imag(D_inv), tf.imag(D_inv)], 0)\n",
318 | " \n",
319 | " D_inv_ext_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n",
320 | " D_inv_ext_sp = tf.sparse_reorder(D_inv_ext_sp)\n",
321 | " \n",
322 | " # ************************** COMPUTES R **************************\n",
323 | " idx = self.compute_sparse_R_indices(L_off_diag_np, M)\n",
324 | " \n",
325 | " vals_L = tf.squeeze(h*L_off_diag_np[np.where(L_off_diag_np)])\n",
326 | " vals = tf.concat([vals_L, vals_L], 0)\n",
327 | " \n",
328 | " R_sp = tf.SparseTensor(idx, vals, [M*2, M*2])\n",
329 | " R_sp = tf.sparse_reorder(R_sp)\n",
330 | " \n",
331 | " # Applies Jacobi method\n",
332 | " c_transform = tf.transpose(x, [1,0,2]) # shape = M, N, F\n",
333 | " c_transform = tf.reshape(c_transform, [M, -1]) # shape = M, N*F\n",
334 | " last_sol = tf.concat([c_transform, tf.zeros_like(c_transform)],0)\n",
335 | " for k in range(K): # for every order of our polynomial\n",
336 | " \n",
337 | " # Jacobi initialization\n",
338 | " b = tf.sparse_tensor_dense_matmul(cayley_op_neg_sp, last_sol) # shape = M, N*F\n",
339 | " a = tf.sparse_tensor_dense_matmul(D_inv_ext_sp, b) # shape = M, N*F\n",
340 | " \n",
341 | " # Jacobi iterations\n",
342 | " cond = lambda i, _: tf.less(i, self.num_jacobi_iter)\n",
343 | " body = lambda i, c_sol: [tf.add(i, 1), a - tf.sparse_tensor_dense_matmul(D_inv_ext_sp, \n",
344 | " tf.sparse_tensor_dense_matmul(R_sp, c_sol))]\n",
345 | " \n",
346 | " c_sol = tf.while_loop(cond, body, [0, a], parallel_iterations=1, swap_memory=True)\n",
347 | " c_sol = c_sol[-1]\n",
348 | " \n",
349 | " # Constructs and saves the final complex matrices\n",
350 | " c_sol_complex = tf.complex(c_sol[:M,:], c_sol[M:, :]) #M x N*F\n",
351 | " c_sol_reshaped = tf.reshape(c_sol_complex, [M, -1, Fin])\n",
352 | " c_sol_reshaped = tf.transpose(c_sol_reshaped, [1, 0, 2]) #N x M x F\n",
353 | " list_x_pos_exp.append(tf.expand_dims(c_sol_reshaped,0)) #1 x N x M x Flist_x_pos_exp\n",
354 | " \n",
355 | " last_sol = c_sol\n",
356 | " x_pos_exp = tf.concat(list_x_pos_exp, 0) # shape = n_h*K x N x M x Fin\n",
357 | " x_pos_exp = tf.transpose(x_pos_exp, [1,2,0,3]) #N x M x n_h*K x Fin\n",
358 | " x_pos_exp = tf.reshape(x_pos_exp, [N*M, -1]) #N*M x 2*K*Fin\n",
359 | " \n",
360 | " real_conv_weights = self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_real')#tf.ones([Fin*(self.n_h*K+1), Fout])#self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_real')\n",
361 | " imag_conv_weights = self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_imag')#tf.ones([Fin*(self.n_h*K+1), Fout])#self._weight_variable([Fin*(self.n_h*K+1), Fout], regularization=False, name='_imag')\n",
362 | " \n",
363 | " W_pos_exp = tf.complex(real_conv_weights, -imag_conv_weights)\n",
364 | " \n",
365 | " x_pos_exp_filt = tf.matmul(x_pos_exp, W_pos_exp)\n",
366 | " \n",
367 | " x_filt = 2*tf.real(x_pos_exp_filt)\n",
368 | " return tf.reshape(x_filt, [N, M, Fout])\n",
369 | "\n",
370 | "\n",
371 | " def b1relu(self, x): #sums a bias and applies relu\n",
372 | " \"\"\"Bias and ReLU. One bias per filter.\"\"\"\n",
373 | " N, M, F = x.get_shape()\n",
374 | " b = self._bias_variable([1, 1, int(F)], regularization=False)\n",
375 | " return tf.nn.relu(x + b) #add the bias to the convolutive layer\n",
376 | "\n",
377 | "\n",
378 | " def mpool1(self, x, p): #efficient pooling realized thanks to the reordering of the laplacians we have done a priori\n",
379 | " \"\"\"Max pooling of size p. Should be a power of 2.\"\"\"\n",
380 | " if p > 1:\n",
381 | " x = tf.expand_dims(x, 3) # N x M x F x 1\n",
382 | " x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')\n",
383 | " return tf.squeeze(x, [3]) # N x M/p x F\n",
384 | " else:\n",
385 | " return x\n",
386 | " \n",
387 | "\n",
388 | " def b1relu(self, x): #sums a bias and applies relu\n",
389 | " \"\"\"Bias and ReLU. One bias per filter.\"\"\"\n",
390 | " N, M, F = x.get_shape()\n",
391 | " b = self._bias_variable([1, 1, int(F)], regularization=False)\n",
392 | " return tf.nn.relu(x + b) #add the bias to the convolutive layer\n",
393 | "\n",
394 | "\n",
395 | " def mpool1(self, x, p): #efficient pooling realized thanks to the reordering of the laplacians we have done a priori\n",
396 | " \"\"\"Max pooling of size p. Should be a power of 2.\"\"\"\n",
397 | " if p > 1:\n",
398 | " x = tf.expand_dims(x, 3) # N x M x F x 1\n",
399 | " x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')\n",
400 | " return tf.squeeze(x, [3]) # N x M/p x F\n",
401 | " else:\n",
402 | " return x\n",
403 | "\n",
404 | " def fc(self, x, Mout, relu=True):\n",
405 | " \"\"\"Fully connected layer with Mout features.\"\"\"\n",
406 | " N, Min = x.get_shape()\n",
407 | " W = self._weight_variable([int(Min), Mout], regularization=True)\n",
408 | " b = self._bias_variable([Mout], regularization=True)\n",
409 | " x = tf.matmul(x, W) + b\n",
410 | " return tf.nn.relu(x) if relu else x\n",
411 | " \n",
412 | " #function used for extracting the result of our model\n",
413 | " def _inference(self, x, dropout): #definition of the model\n",
414 | " \n",
415 | " # Graph convolutional layers.\n",
416 | " x = tf.expand_dims(x, 2) # N x M x F=1\n",
417 | " j = 0\n",
418 | " self.list_h = list()\n",
419 | " for i in range(len(self.p)):\n",
420 | " with tf.variable_scope('cgconv{}'.format(i+1)):\n",
421 | " with tf.name_scope('filter'):\n",
422 | " x = self.cayleyConv(x, self.L_np[i*2], self.F[i], self.K[i])\n",
423 | " if (i==0):\n",
424 | " self.debug = x\n",
425 | " with tf.name_scope('bias_relu'):\n",
426 | " x = self.b1relu(tf.cast(tf.real(x), 'float32'))\n",
427 | " with tf.name_scope('pooling'):\n",
428 | " x = self.mpool1(x, self.p[i])\n",
429 | " \n",
430 | " j += int(np.log2(self.p[i])) if self.p[i] > 1 else 0\n",
431 | " \n",
432 | " # Fully connected hidden layers.\n",
433 | " _, M, F = x.get_shape()\n",
434 | " x = tf.reshape(x, [-1, int(M*F)]) # N x M\n",
435 | " for i,M in enumerate(self.M[:-1]): #apply a fully connected layer for each layer defined in M\n",
436 | " #(we discard the last value in M since it contains the number of classes we have\n",
437 | " #to predict)\n",
438 | " with tf.variable_scope('fc{}'.format(i+1)):\n",
439 | " x = self.fc(x, M)\n",
440 | " x = tf.nn.dropout(x, dropout)\n",
441 | " \n",
442 | " # Logits linear layer, i.e. softmax without normalization.\n",
443 | " with tf.variable_scope('logits'):\n",
444 | " x = self.fc(x, self.M[-1], relu=False)\n",
445 | " return x\n",
446 | " \n",
447 | " def __init__(self, p, K, F, M, M_0, batch_size, num_jacobi_iter, L,\n",
448 | " decay_steps, decay_rate, learning_rate=1e-4, momentum=0.9, regularization=5e-4, clip_norm=1e1,\n",
449 | " idx_gpu = '/gpu:0'):\n",
450 | " self.regularizers = list() #list of regularization l2 loss for multiple variables\n",
451 | " self.n_h = 1\n",
452 | " self.num_jacobi_iter = num_jacobi_iter\n",
453 | " self.p = p #dimensions of the pooling layers\n",
454 | " self.K = K #List of polynomial orders, i.e. filter sizes or number of hops\n",
455 | " self.F = F #Number of features of convolutional layers\n",
456 | " \n",
457 | " self.M = M #Number of neurons in fully connected layers\n",
458 | " \n",
459 | " self.M_0 = M_0 #number of elements in the first graph \n",
460 | " \n",
461 | " self.batch_size = batch_size\n",
462 | " \n",
463 | " #definition of some learning parameters\n",
464 | " self.decay_steps = decay_steps\n",
465 | " self.decay_rate = decay_rate\n",
466 | " self.learning_rate = learning_rate\n",
467 | " self.regularization = regularization\n",
468 | " \n",
469 | " with tf.Graph().as_default() as g:\n",
470 | " self.graph = g\n",
471 | " tf.set_random_seed(0)\n",
472 | " with tf.device(idx_gpu):\n",
473 | " #definition of placeholders\n",
474 | " self.L_np = [c_L.toarray().astype('float32') for c_L in L]\n",
475 | " self.ph_data = tf.placeholder(tf.float32, (self.batch_size, M_0), 'data')\n",
476 | " self.ph_labels = tf.placeholder(tf.int32, (self.batch_size), 'labels')\n",
477 | " self.ph_dropout = tf.placeholder(tf.float32, (), 'dropout')\n",
478 | " \n",
479 | " #Model construction\n",
480 | " self.logits = self._inference(self.ph_data, self.ph_dropout)\n",
481 | " \n",
482 | " #Definition of the loss function\n",
483 | " with tf.name_scope('loss'):\n",
484 | " self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.ph_labels)\n",
485 | " self.cross_entropy = tf.reduce_mean(self.cross_entropy)\n",
486 | " with tf.name_scope('regularization'):\n",
487 | " self.regularization *= tf.add_n(self.regularizers)\n",
488 | " self.loss = self.cross_entropy + self.regularization\n",
489 | " \n",
490 | " #Solver Definition\n",
491 | " with tf.name_scope('training'):\n",
492 | " # Learning rate.\n",
493 | " global_step = tf.Variable(0, name='global_step', trainable=False) #used for counting how many iterations we have done\n",
494 | " if decay_rate != 1: #applies an exponential decay of the lr wrt the number of iterations done\n",
495 | " learning_rate = tf.train.exponential_decay(\n",
496 | " learning_rate, global_step, decay_steps, decay_rate, staircase=True)\n",
497 | " # Optimizer.\n",
498 | " if momentum == 0:\n",
499 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
500 | " else: #applies momentum for increasing the robustness of the gradient \n",
501 | " optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)\n",
502 | " #grads = optimizer.compute_gradients(self.loss)\n",
503 | " tvars = tf.trainable_variables()\n",
504 | " #grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clip_norm)\n",
505 | " grads, variables = zip(*optimizer.compute_gradients(self.loss))\n",
506 | " grads, _ = tf.clip_by_global_norm(grads, clip_norm)\n",
507 | " self.op_gradients = optimizer.apply_gradients(zip(grads, variables), \n",
508 | " global_step=global_step)\n",
509 | " \n",
510 | " #Computation of the norm gradients (useful for debugging)\n",
511 | " self.var_grad = tf.gradients(self.loss, tf.trainable_variables())\n",
512 | " self.norm_grad = self.frobenius_norm(tf.concat([tf.reshape(g, [-1]) for g in self.var_grad], 0))\n",
513 | "\n",
514 | " #Extraction of the predictions and computation of accuracy\n",
515 | " self.predictions = tf.cast(tf.argmax(self.logits, dimension=1), tf.int32)\n",
516 | " self.accuracy = 100 * tf.contrib.metrics.accuracy(self.predictions, self.ph_labels)\n",
517 | " \n",
518 | " # Create a session for running Ops on the Graph.\n",
519 | " config = tf.ConfigProto(allow_soft_placement = True)\n",
520 | " config.gpu_options.allow_growth = True\n",
521 | " self.session = tf.Session(config=config)\n",
522 | "\n",
523 | " # Run the Op to initialize the variables.\n",
524 | " init = tf.global_variables_initializer()\n",
525 | " self.session.run(init)"
526 | ]
527 | },
528 | {
529 | "cell_type": "markdown",
530 | "metadata": {
531 | "deletable": true,
532 | "editable": true
533 | },
534 | "source": [
535 | "# Training & testing"
536 | ]
537 | },
538 | {
539 | "cell_type": "code",
540 | "execution_count": null,
541 | "metadata": {
542 | "collapsed": true,
543 | "deletable": true,
544 | "editable": true
545 | },
546 | "outputs": [],
547 | "source": [
548 | "#Convolutional parameters\n",
549 | "p = [4, 4] # Dimensions of the pooling layers\n",
550 | "K = [12, 12] # List of polynomial orders, i.e. filter sizes or number of hops\n",
551 | "F = [32, 64] # Number of features of convolutional layers\n",
552 | "\n",
553 | "#FC parameters\n",
554 | "C = max(train_labels) + 1 # Number of classes we have\n",
555 | "M = [512, C] # Number of neurons in fully connected layers\n",
556 | "\n",
557 | "#Solver parameters\n",
558 | "batch_size = 100\n",
559 | "decay_steps = train_data.shape[0] / batch_size # number of steps to do before decreasing the learning rate\n",
560 | "decay_rate = 0.95\n",
561 | "learning_rate = 0.01\n",
562 | "momentum = 0.9\n",
563 | "regularization = 5e-4\n",
564 | "\n",
565 | "# Definition of keep probabilities for dropout layers\n",
566 | "dropout_training = 0.5\n",
567 | "dropout_val_test = 1.0\n",
568 | "\n",
569 | "num_jacobi_iter = 10"
570 | ]
571 | },
572 | {
573 | "cell_type": "code",
574 | "execution_count": null,
575 | "metadata": {
576 | "collapsed": false,
577 | "deletable": true,
578 | "editable": true
579 | },
580 | "outputs": [],
581 | "source": [
582 | "# Construction of the learning obj\n",
583 | "M_0 = L[0].shape[0] # number of elements in the first graph\n",
584 | "learning_obj = CayleyNet(p, K, F, M, M_0, batch_size, num_jacobi_iter, L,\n",
585 | " decay_steps, decay_rate,\n",
586 | " learning_rate=learning_rate, regularization=regularization,\n",
587 | " momentum=momentum)#, clip_norm=100)\n",
588 | "\n",
589 | "# definition of overall number of training iterations and validation frequency\n",
590 | "num_iter_val = 600\n",
591 | "num_total_iter_training = 21000\n",
592 | "\n",
593 | "num_iter = 0\n",
594 | "\n",
595 | "list_training_loss = list()\n",
596 | "list_training_norm_grad = list()\n",
597 | "list_val_accuracy = list()"
598 | ]
599 | },
600 | {
601 | "cell_type": "code",
602 | "execution_count": null,
603 | "metadata": {
604 | "collapsed": false,
605 | "deletable": true,
606 | "editable": true,
607 | "scrolled": true
608 | },
609 | "outputs": [],
610 | "source": [
611 | "#training and validation\n",
612 | "indices = collections.deque() # queue containing a permutation of the training indexes\n",
613 | "for k in range(num_iter, num_total_iter_training):\n",
614 | "\n",
615 | " #Construction of the training batch\n",
616 | " if len(indices) < batch_size: # Be sure to have used all the samples before using one a second time.\n",
617 | " indices.extend(np.random.permutation(train_data.shape[0])) #reinitialize the queue of indices\n",
618 | " idx = [indices.popleft() for i in range(batch_size)] #extract the current batch of samples\n",
619 | "\n",
620 | " #data extraction\n",
621 | " batch_data, batch_labels = train_data[idx,:], train_labels[idx] \n",
622 | "\n",
623 | " feed_dict = {learning_obj.ph_data: batch_data, \n",
624 | " learning_obj.ph_labels: batch_labels, \n",
625 | " learning_obj.ph_dropout: dropout_training}\n",
626 | "\n",
627 | " #Training\n",
628 | " tic = time.time()\n",
629 | " _, current_training_loss, norm_grad = learning_obj.session.run([learning_obj.op_gradients, \n",
630 | " learning_obj.loss, \n",
631 | " learning_obj.norm_grad], feed_dict = feed_dict) \n",
632 | " training_time = time.time() - tic\n",
633 | "\n",
634 | " list_training_loss.append(current_training_loss)\n",
635 | " list_training_norm_grad.append(norm_grad)\n",
636 | " if (np.mod(num_iter, num_iter_val)==0): #validation\n",
637 | " msg = \"[TRN] iter = %03i, cost = %3.2e, |grad| = %.2e (%3.2es)\" \\\n",
638 | " % (num_iter, list_training_loss[-1], list_training_norm_grad[-1], training_time)\n",
639 | " print msg\n",
640 | "\n",
641 | " #Validation Code\n",
642 | " tic = time.time()\n",
643 | " val_accuracy = 0\n",
644 | " for begin in range(0, val_data.shape[0], batch_size):\n",
645 | " end = begin + batch_size\n",
646 | " end = min([end, val_data.shape[0]])\n",
647 | "\n",
648 | " #data extraction\n",
649 | " batch_data = np.zeros((end-begin, val_data.shape[1]))\n",
650 | " batch_data = val_data[begin:end,:]\n",
651 | " batch_labels = np.zeros(batch_size)\n",
652 | " batch_labels[:end-begin] = val_labels[begin:end]\n",
653 | "\n",
654 | " feed_dict = {learning_obj.ph_data: batch_data, \n",
655 | " learning_obj.ph_labels: batch_labels,\n",
656 | " learning_obj.ph_dropout: dropout_val_test}\n",
657 | "\n",
658 | " batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)\n",
659 | " val_accuracy += batch_accuracy*batch_data.shape[0]\n",
660 | " val_accuracy = val_accuracy/val_data.shape[0]\n",
661 | "\n",
662 | " val_time = time.time() - tic\n",
663 | " msg = \"[VAL] iter = %03i, acc = %4.2f (%3.2es)\" % (num_iter, val_accuracy, val_time)\n",
664 | " print msg\n",
665 | " num_iter += 1"
666 | ]
667 | },
668 | {
669 | "cell_type": "code",
670 | "execution_count": null,
671 | "metadata": {
672 | "collapsed": false,
673 | "deletable": true,
674 | "editable": true
675 | },
676 | "outputs": [],
677 | "source": [
678 | "#Test code\n",
679 | "tic = time.time()\n",
680 | "test_accuracy = 0\n",
681 | "for begin in range(0, test_data.shape[0], batch_size):\n",
682 | " end = begin + batch_size\n",
683 | " end = min([end, test_data.shape[0]])\n",
684 | "\n",
685 | " batch_data = np.zeros((end-begin, test_data.shape[1]))\n",
686 | " batch_data = test_data[begin:end,:]\n",
687 | "\n",
688 | " feed_dict = {learning_obj.ph_data: batch_data, learning_obj.ph_dropout: 1}\n",
689 | "\n",
690 | " batch_labels = np.zeros(batch_size)\n",
691 | " batch_labels[:end-begin] = test_labels[begin:end]\n",
692 | " feed_dict[learning_obj.ph_labels] = batch_labels\n",
693 | "\n",
694 | " batch_accuracy = learning_obj.session.run(learning_obj.accuracy, feed_dict)\n",
695 | " test_accuracy += batch_accuracy*batch_data.shape[0]\n",
696 | "test_accuracy = test_accuracy/test_data.shape[0]\n",
697 | "test_time = time.time() - tic\n",
698 | "msg = \"[TST] iter = %03i, acc = %4.2f (%3.2es)\" % (num_iter, test_accuracy, test_time)\n",
699 | "print msg"
700 | ]
701 | },
702 | {
703 | "cell_type": "code",
704 | "execution_count": null,
705 | "metadata": {
706 | "collapsed": true,
707 | "deletable": true,
708 | "editable": true
709 | },
710 | "outputs": [],
711 | "source": []
712 | }
713 | ],
714 | "metadata": {
715 | "kernelspec": {
716 | "display_name": "Python 2",
717 | "language": "python",
718 | "name": "python2"
719 | },
720 | "language_info": {
721 | "codemirror_mode": {
722 | "name": "ipython",
723 | "version": 2
724 | },
725 | "file_extension": ".py",
726 | "mimetype": "text/x-python",
727 | "name": "python",
728 | "nbconvert_exporter": "python",
729 | "pygments_lexer": "ipython2",
730 | "version": "2.7.6"
731 | }
732 | },
733 | "nbformat": 4,
734 | "nbformat_minor": 1
735 | }
736 |
--------------------------------------------------------------------------------