├── README.md
├── test_fastPFP.py
├── create_dataset.py
└── fastPFP.py


/README.md:
--------------------------------------------------------------------------------
1 | fastPFP
2 | =======
3 | 
4 | Implementation of the fastPFP approximate subgraph matching algorithm, as described in http://arxiv.org/abs/1207.1114
5 | 


--------------------------------------------------------------------------------
/test_fastPFP.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from create_dataset import create_dataset_artificial
 3 | import matplotlib.pyplot as plt
 4 | from fastPFP import fastPFP_faster, loss
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 |     np.random.seed(0)
10 | 
11 |     size1 = 35
12 |     size2 = 30
13 | 
14 |     same = True
15 | 
16 |     print("Simple 2D example of fastPFP.")
17 |     print("Same: %s" % same)
18 |     A, B, X1, X2 = create_dataset_artificial(size1, size2, same)
19 | 
20 |     C = X1
21 |     D = X2
22 |     lam = 1.0
23 | 
24 |     print("fastPFP:")
25 |     X = fastPFP_faster(A, B, C=C, D=D, lam=lam, alpha=0.5,
26 |                        threshold1=1.0e-4, threshold2=1.0e-4)
27 |     P = (X == X.max(1)[:, None])
28 |     loss_X = loss(A, B, X)
29 |     loss_P = loss(A, B, P)
30 |     print("Loss(X) = %s" % loss_X)
31 |     print("Loss(P) = %s" % loss_P)
32 | 
33 |     print("")
34 |     print("Plotting.")
35 |     plt.figure()
36 |     X2 = X2 + np.array([1.0, 0.5])  # Adding some constant displacement for visualization purpose.
37 |     plt.plot(X1[:, 0], X1[:, 1], 'ro', markersize=10)
38 |     plt.plot(X2[:, 0], X2[:, 1], '*b', markersize=10)
39 |     mapping12 = P.argmax(1)
40 |     if size2 >= size1:
41 |         for i in range(size1):
42 |             # plt.plot([X1[i,0], X2[mapping12[i], 0]], [X1[i,1], X2[mapping12[i], 1]], 'r-')
43 |             temp = X2[mapping12[i]] - X1[i]
44 |             plt.arrow(X1[i, 0], X1[i, 1], temp[0], temp[1], head_width=0.05,
45 |                       head_length=0.05, length_includes_head=True)
46 | 
47 |     else:
48 |         mapping21 = P.argmax(0)
49 |         for i in range(size2):
50 |             temp = X1[mapping21[i]] - X2[i]
51 |             plt.arrow(X2[i, 0], X2[i, 1], temp[0], temp[1], head_width=0.05,
52 |                       head_length=0.05, length_includes_head=True)
53 | 
54 |     plt.show()
55 | 


--------------------------------------------------------------------------------
/create_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy.spatial import distance_matrix
 3 | 
 4 | 
 5 | def create_dataset_artificial(size1, size2, same=True, sigma1=None, sigma2=None, verbose=False):
 6 |     """This function creates two adjacency matrices graphs whose
 7 |     respective number of nodes is size1 and size2, respectively.
 8 |     
 9 |     The graphs refer to 2D clouds of point where the edges, i.e. the
10 |     values of the adjacency matrices, are similarities between points
11 |     defined as s(x1, x2) = exp(-d(x1,x2)**2 / sigma**2) where d() is
12 |     the Euclidean distance and sigma is either provided by the user or
13 |     defined as the median distance between the points.
14 | 
15 |     If 'same' is True, then the smaller cloud of points is a subset of
16 |     the larger cloud, i.e. the corresponding graphs have a perfect
17 |     subgraph match.
18 |     """
19 |     print("Dateset creation.")
20 |     if same:
21 |         X = np.random.rand(max([size1, size2]), 2)
22 |         X1 = X[:size1]
23 |         X2 = X[:size2]
24 |         dm = distance_matrix(X, X)
25 |         dm1 = dm[:size1, :size1]
26 |         dm2 = dm[:size2, :size2]
27 |         sigma = np.median(dm[np.triu_indices(dm.shape[0], 1)])
28 |         if sigma1 is None:
29 |             sigma1 = sigma
30 | 
31 |         if sigma2 is None:
32 |             sigma2 = sigma
33 |             
34 |     else:
35 |         X1 = np.random.rand(size1, 2)
36 |         X2 = np.random.rand(size2, 2)
37 |         dm1 = distance_matrix(X1, X1)
38 |         dm2 = distance_matrix(X2, X2)
39 |         if sigma1 is None:
40 |             sigma1 = np.median(dm1[np.triu_indices(size1, 1)])
41 | 
42 |         if sigma2 is None:
43 |             sigma2 = np.median(dm2[np.triu_indices(size2, 1)])
44 | 
45 | 
46 |     if verbose: print("create_dataset_artificial: sigma1=%s , sigma2=%s" % (sigma1, sigma2))
47 |     A = np.exp(- dm1 * dm1 / (sigma1 ** 2))
48 |     B = np.exp(- dm2 * dm2 / (sigma2 ** 2))
49 | 
50 |     return A, B, X1, X2
51 | 


--------------------------------------------------------------------------------
/fastPFP.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.linalg import norm
  3 | from sys import float_info
  4 | 
  5 | 
  6 | def loss(A, B, P, C=0.0, D=0.0, lam=0.0):
  7 |     """The subgraph matching loss for weighted undirected (and
  8 |     unlabeled) graphs.
  9 | 
 10 |     A and B are the adjacency matrices, P is a partial permutation
 11 |     matrix, C and D are k-dimensional node labels, and lam(bda) is the
 12 |     parameter to balance graph weights and node labels.
 13 | 
 14 |     """
 15 |     return 0.5 * norm(A - P.dot(B.dot(P.T))) + \
 16 |         lam * norm(C - P.dot(D))
 17 | 
 18 | 
 19 | def fastPFP(A, B, C=0.0, D=0.0, lam=0.0, alpha=0.5, threshold1=1.0e-6,
 20 |             threshold2=1.0e-6, X=None, Y=None, verbose=True, max_iter1=100,
 21 |             max_iter2=100):
 22 |     """The fastPFP algorithm for the subgraph matching problem, as
 23 |     proposed in the paper 'A Fast Projected Fixed-Point Algorithm for
 24 |     Large Graph Matching' by Yao Lu, Kaizhu Huang, Cheng-Lin Liu.
 25 | 
 26 |     See: http://arxiv.org/abs/1207.1114
 27 | 
 28 |     Note: in the paper A, B, C and D are called A, A', B and B'.
 29 |     """
 30 |     size1 = A.shape[0]
 31 |     size2 = B.shape[0]
 32 |     one1 = np.ones((size1, 1))
 33 |     one2 = np.ones((size2, 1))
 34 |     if X is None:
 35 |         X = one1.dot(one2.T) / (size1 * size2)
 36 | 
 37 |     if Y is None:
 38 |         Y = np.zeros((size1, size1))
 39 | 
 40 |     K = np.atleast_2d(C).dot(np.atleast_2d(D).T)
 41 | 
 42 |     float_max = float_info.max
 43 |     epsilon1 = epsilon2 = float_max
 44 |     iter1 = 0
 45 |     while epsilon1 > threshold1 and iter1 < max_iter1:
 46 |         Y[:size1, :size2] = A.dot(X.dot(B)) + lam * K
 47 |         epsilon2 = float_max
 48 |         iter2 = 0
 49 |         while epsilon2 > threshold2 and iter2 < max_iter2:
 50 |             tmp = np.eye(size1, size1) / size1
 51 |             tmp += (one1.T.dot(Y.dot(one1)) / (size1 * size1)) \
 52 |                    * (np.eye(size1, size1))
 53 |             tmp -= Y / size1
 54 |             tmp = tmp.dot(one1.dot(one1.T))
 55 |             Y_new = Y + tmp - one1.dot(one1.T.dot(Y)) / size1
 56 |             Y_new = (Y_new + np.abs(Y_new)) / 2.0
 57 |             epsilon2 = np.abs(Y_new - Y).max()
 58 |             Y = Y_new
 59 |             iter2 += 1
 60 | 
 61 |         if verbose:
 62 |             print("epsilon2 = %s" % epsilon2)
 63 | 
 64 |         X_new = (1.0 - alpha) * X + alpha * Y[:size1, :size2]
 65 |         X_new = X_new / X_new.max()
 66 |         epsilon1 = np.abs(X_new - X).max()
 67 |         X = X_new
 68 |         if verbose:
 69 |             print("epsilon1 = %s" % epsilon1)
 70 |             loss_X = loss(A, B, X, C, D)
 71 |             print("Loss(X) = %s" % loss_X)
 72 | 
 73 |         iter1 += 1
 74 | 
 75 |     return X
 76 | 
 77 | 
 78 | def fastPFP_faster(A, B, C=0.0, D=0.0, lam=0.0, alpha=0.5, threshold1=1.0e-6,
 79 |                    threshold2=1.0e-6, X=None, Y=None, verbose=True,
 80 |                    max_iter1=100, max_iter2=100):
 81 |     """A faster and more efficient implementation of fastPFP().
 82 |     """
 83 |     size1 = A.shape[0]
 84 |     size2 = B.shape[0]
 85 |     if X is None:
 86 |         X = np.ones((size1, size2)) / (size1 * size2)
 87 | 
 88 |     if Y is None:
 89 |         Y = np.zeros((size1, size1))
 90 | 
 91 |     K = np.atleast_2d(C).dot(np.atleast_2d(D).T)
 92 | 
 93 |     float_max = float_info.max
 94 |     epsilon1 = epsilon2 = float_max
 95 |     iter1 = 0
 96 |     while epsilon1 > threshold1 and iter1 < max_iter1:
 97 |         Y[:size1, :size2] = A.dot(X.dot(B)) + lam * K
 98 |         epsilon2 = float_max
 99 |         iter2 = 0
100 |         while epsilon2 > threshold2 and iter2 < max_iter2:
101 |             tmp = (1.0 + Y.sum() / size1 - Y.sum(1)) / size1
102 |             Y_new = Y + tmp[:, None] - Y.sum(0) / size1
103 |             Y_new = np.clip(Y_new, 0.0, float_max)
104 |             epsilon2 = np.abs(Y_new - Y).max()
105 |             Y = Y_new
106 |             iter2 += 1
107 | 
108 |         if verbose:
109 |             print("epsilon2 = %s" % epsilon2)
110 | 
111 |         X_new = (1.0 - alpha) * X + alpha * Y[:size1, :size2]
112 |         X_new = X_new / X_new.max()
113 |         epsilon1 = np.abs(X_new - X).max()
114 |         X = X_new
115 |         if verbose:
116 |             print("epsilon1 = %s" % epsilon1)
117 | 
118 |         iter1 += 1
119 | 
120 |     return X
121 | 
122 | 
123 | def greedy_assignment(X):
124 |     """A simple greedy algorithm for the assignment problem as
125 |     proposed in the paper of fastPFP. It creates a proper partial
126 |     permutation matrix (P) from the result (X) of the optimization
127 |     algorithm fastPFP.
128 |     """
129 |     XX = X.copy()
130 |     min = XX.min() - 1.0
131 |     P = np.zeros(X.shape)
132 |     while (XX > min).any():
133 |         row, col = np.unravel_index(XX.argmax(), XX.shape)
134 |         P[row, col] = 1.0
135 |         XX[row, :] = min
136 |         XX[:, col] = min
137 | 
138 |     return P
139 | 


--------------------------------------------------------------------------------