├── README.md ├── test_fastPFP.py ├── create_dataset.py └── fastPFP.py /README.md: -------------------------------------------------------------------------------- 1 | fastPFP 2 | ======= 3 | 4 | Implementation of the fastPFP approximate subgraph matching algorithm, as described in http://arxiv.org/abs/1207.1114 5 | -------------------------------------------------------------------------------- /test_fastPFP.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from create_dataset import create_dataset_artificial 3 | import matplotlib.pyplot as plt 4 | from fastPFP import fastPFP_faster, loss 5 | 6 | 7 | if __name__ == '__main__': 8 | 9 | np.random.seed(0) 10 | 11 | size1 = 35 12 | size2 = 30 13 | 14 | same = True 15 | 16 | print("Simple 2D example of fastPFP.") 17 | print("Same: %s" % same) 18 | A, B, X1, X2 = create_dataset_artificial(size1, size2, same) 19 | 20 | C = X1 21 | D = X2 22 | lam = 1.0 23 | 24 | print("fastPFP:") 25 | X = fastPFP_faster(A, B, C=C, D=D, lam=lam, alpha=0.5, 26 | threshold1=1.0e-4, threshold2=1.0e-4) 27 | P = (X == X.max(1)[:, None]) 28 | loss_X = loss(A, B, X) 29 | loss_P = loss(A, B, P) 30 | print("Loss(X) = %s" % loss_X) 31 | print("Loss(P) = %s" % loss_P) 32 | 33 | print("") 34 | print("Plotting.") 35 | plt.figure() 36 | X2 = X2 + np.array([1.0, 0.5]) # Adding some constant displacement for visualization purpose. 37 | plt.plot(X1[:, 0], X1[:, 1], 'ro', markersize=10) 38 | plt.plot(X2[:, 0], X2[:, 1], '*b', markersize=10) 39 | mapping12 = P.argmax(1) 40 | if size2 >= size1: 41 | for i in range(size1): 42 | # plt.plot([X1[i,0], X2[mapping12[i], 0]], [X1[i,1], X2[mapping12[i], 1]], 'r-') 43 | temp = X2[mapping12[i]] - X1[i] 44 | plt.arrow(X1[i, 0], X1[i, 1], temp[0], temp[1], head_width=0.05, 45 | head_length=0.05, length_includes_head=True) 46 | 47 | else: 48 | mapping21 = P.argmax(0) 49 | for i in range(size2): 50 | temp = X1[mapping21[i]] - X2[i] 51 | plt.arrow(X2[i, 0], X2[i, 1], temp[0], temp[1], head_width=0.05, 52 | head_length=0.05, length_includes_head=True) 53 | 54 | plt.show() 55 | -------------------------------------------------------------------------------- /create_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.spatial import distance_matrix 3 | 4 | 5 | def create_dataset_artificial(size1, size2, same=True, sigma1=None, sigma2=None, verbose=False): 6 | """This function creates two adjacency matrices graphs whose 7 | respective number of nodes is size1 and size2, respectively. 8 | 9 | The graphs refer to 2D clouds of point where the edges, i.e. the 10 | values of the adjacency matrices, are similarities between points 11 | defined as s(x1, x2) = exp(-d(x1,x2)**2 / sigma**2) where d() is 12 | the Euclidean distance and sigma is either provided by the user or 13 | defined as the median distance between the points. 14 | 15 | If 'same' is True, then the smaller cloud of points is a subset of 16 | the larger cloud, i.e. the corresponding graphs have a perfect 17 | subgraph match. 18 | """ 19 | print("Dateset creation.") 20 | if same: 21 | X = np.random.rand(max([size1, size2]), 2) 22 | X1 = X[:size1] 23 | X2 = X[:size2] 24 | dm = distance_matrix(X, X) 25 | dm1 = dm[:size1, :size1] 26 | dm2 = dm[:size2, :size2] 27 | sigma = np.median(dm[np.triu_indices(dm.shape[0], 1)]) 28 | if sigma1 is None: 29 | sigma1 = sigma 30 | 31 | if sigma2 is None: 32 | sigma2 = sigma 33 | 34 | else: 35 | X1 = np.random.rand(size1, 2) 36 | X2 = np.random.rand(size2, 2) 37 | dm1 = distance_matrix(X1, X1) 38 | dm2 = distance_matrix(X2, X2) 39 | if sigma1 is None: 40 | sigma1 = np.median(dm1[np.triu_indices(size1, 1)]) 41 | 42 | if sigma2 is None: 43 | sigma2 = np.median(dm2[np.triu_indices(size2, 1)]) 44 | 45 | 46 | if verbose: print("create_dataset_artificial: sigma1=%s , sigma2=%s" % (sigma1, sigma2)) 47 | A = np.exp(- dm1 * dm1 / (sigma1 ** 2)) 48 | B = np.exp(- dm2 * dm2 / (sigma2 ** 2)) 49 | 50 | return A, B, X1, X2 51 | -------------------------------------------------------------------------------- /fastPFP.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | from sys import float_info 4 | 5 | 6 | def loss(A, B, P, C=0.0, D=0.0, lam=0.0): 7 | """The subgraph matching loss for weighted undirected (and 8 | unlabeled) graphs. 9 | 10 | A and B are the adjacency matrices, P is a partial permutation 11 | matrix, C and D are k-dimensional node labels, and lam(bda) is the 12 | parameter to balance graph weights and node labels. 13 | 14 | """ 15 | return 0.5 * norm(A - P.dot(B.dot(P.T))) + \ 16 | lam * norm(C - P.dot(D)) 17 | 18 | 19 | def fastPFP(A, B, C=0.0, D=0.0, lam=0.0, alpha=0.5, threshold1=1.0e-6, 20 | threshold2=1.0e-6, X=None, Y=None, verbose=True, max_iter1=100, 21 | max_iter2=100): 22 | """The fastPFP algorithm for the subgraph matching problem, as 23 | proposed in the paper 'A Fast Projected Fixed-Point Algorithm for 24 | Large Graph Matching' by Yao Lu, Kaizhu Huang, Cheng-Lin Liu. 25 | 26 | See: http://arxiv.org/abs/1207.1114 27 | 28 | Note: in the paper A, B, C and D are called A, A', B and B'. 29 | """ 30 | size1 = A.shape[0] 31 | size2 = B.shape[0] 32 | one1 = np.ones((size1, 1)) 33 | one2 = np.ones((size2, 1)) 34 | if X is None: 35 | X = one1.dot(one2.T) / (size1 * size2) 36 | 37 | if Y is None: 38 | Y = np.zeros((size1, size1)) 39 | 40 | K = np.atleast_2d(C).dot(np.atleast_2d(D).T) 41 | 42 | float_max = float_info.max 43 | epsilon1 = epsilon2 = float_max 44 | iter1 = 0 45 | while epsilon1 > threshold1 and iter1 < max_iter1: 46 | Y[:size1, :size2] = A.dot(X.dot(B)) + lam * K 47 | epsilon2 = float_max 48 | iter2 = 0 49 | while epsilon2 > threshold2 and iter2 < max_iter2: 50 | tmp = np.eye(size1, size1) / size1 51 | tmp += (one1.T.dot(Y.dot(one1)) / (size1 * size1)) \ 52 | * (np.eye(size1, size1)) 53 | tmp -= Y / size1 54 | tmp = tmp.dot(one1.dot(one1.T)) 55 | Y_new = Y + tmp - one1.dot(one1.T.dot(Y)) / size1 56 | Y_new = (Y_new + np.abs(Y_new)) / 2.0 57 | epsilon2 = np.abs(Y_new - Y).max() 58 | Y = Y_new 59 | iter2 += 1 60 | 61 | if verbose: 62 | print("epsilon2 = %s" % epsilon2) 63 | 64 | X_new = (1.0 - alpha) * X + alpha * Y[:size1, :size2] 65 | X_new = X_new / X_new.max() 66 | epsilon1 = np.abs(X_new - X).max() 67 | X = X_new 68 | if verbose: 69 | print("epsilon1 = %s" % epsilon1) 70 | loss_X = loss(A, B, X, C, D) 71 | print("Loss(X) = %s" % loss_X) 72 | 73 | iter1 += 1 74 | 75 | return X 76 | 77 | 78 | def fastPFP_faster(A, B, C=0.0, D=0.0, lam=0.0, alpha=0.5, threshold1=1.0e-6, 79 | threshold2=1.0e-6, X=None, Y=None, verbose=True, 80 | max_iter1=100, max_iter2=100): 81 | """A faster and more efficient implementation of fastPFP(). 82 | """ 83 | size1 = A.shape[0] 84 | size2 = B.shape[0] 85 | if X is None: 86 | X = np.ones((size1, size2)) / (size1 * size2) 87 | 88 | if Y is None: 89 | Y = np.zeros((size1, size1)) 90 | 91 | K = np.atleast_2d(C).dot(np.atleast_2d(D).T) 92 | 93 | float_max = float_info.max 94 | epsilon1 = epsilon2 = float_max 95 | iter1 = 0 96 | while epsilon1 > threshold1 and iter1 < max_iter1: 97 | Y[:size1, :size2] = A.dot(X.dot(B)) + lam * K 98 | epsilon2 = float_max 99 | iter2 = 0 100 | while epsilon2 > threshold2 and iter2 < max_iter2: 101 | tmp = (1.0 + Y.sum() / size1 - Y.sum(1)) / size1 102 | Y_new = Y + tmp[:, None] - Y.sum(0) / size1 103 | Y_new = np.clip(Y_new, 0.0, float_max) 104 | epsilon2 = np.abs(Y_new - Y).max() 105 | Y = Y_new 106 | iter2 += 1 107 | 108 | if verbose: 109 | print("epsilon2 = %s" % epsilon2) 110 | 111 | X_new = (1.0 - alpha) * X + alpha * Y[:size1, :size2] 112 | X_new = X_new / X_new.max() 113 | epsilon1 = np.abs(X_new - X).max() 114 | X = X_new 115 | if verbose: 116 | print("epsilon1 = %s" % epsilon1) 117 | 118 | iter1 += 1 119 | 120 | return X 121 | 122 | 123 | def greedy_assignment(X): 124 | """A simple greedy algorithm for the assignment problem as 125 | proposed in the paper of fastPFP. It creates a proper partial 126 | permutation matrix (P) from the result (X) of the optimization 127 | algorithm fastPFP. 128 | """ 129 | XX = X.copy() 130 | min = XX.min() - 1.0 131 | P = np.zeros(X.shape) 132 | while (XX > min).any(): 133 | row, col = np.unravel_index(XX.argmax(), XX.shape) 134 | P[row, col] = 1.0 135 | XX[row, :] = min 136 | XX[:, col] = min 137 | 138 | return P 139 | --------------------------------------------------------------------------------