├── figure_1.png
├── nips02-metric.pdf
├── Derivation of Newton.pdf
├── README.md
├── D_constraint.py
└── Newton.py


/figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Coolister-Ye/Distance-metric-learning/HEAD/figure_1.png


--------------------------------------------------------------------------------
/nips02-metric.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Coolister-Ye/Distance-metric-learning/HEAD/nips02-metric.pdf


--------------------------------------------------------------------------------
/Derivation of Newton.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Coolister-Ye/Distance-metric-learning/HEAD/Derivation of Newton.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Distance-metric-learning
2 | This code implemented the learning algorithm from the paper "Distance metric learning, with application to clustering with side-information [Eric P. Xing]". Except from the original materials, there are some construction details inclueding more info to clarify the derivertion of formula in the paper and more experiment result from the metric.
3 | 
4 | ![alt text](https://github.com/JasonYee/Distance-metric-learning/blob/master/figure_1.png)
5 | 
6 | The pic above is the objective funcion using artifical data for the case when A is diagonal. We can see the function is convex.
7 | 


--------------------------------------------------------------------------------
/D_constraint.py:
--------------------------------------------------------------------------------
 1 | """
 2 | compute the value, 1st derivative, second derivative (Hessian) of
 3 | a dissimilarity constraint function g(sum{ij in D}(d_ij*A*d_ij'))
 4 | where A is a diagnal matrix.
 5 | 
 6 | df/dA = f'(sum{ij in D} sqrt{trace((x_i-x_j)A(x_i-x_j)')})
 7 |     *0.5*(sum(ij in D) (1/sqrt{trace(d_ij*A*d_ij')})*(d_ij'*d_ij))
 8 | 
 9 | """
10 | import numpy as np
11 | 
12 | 
13 | def d_constraint(X, D, a):
14 |     n, d = X.shape
15 |     sum_dist = 0
16 |     sum_deri1 = np.zeros(d)
17 |     sum_deri2 = np.zeros((d, d))
18 | 
19 |     for i in range(n):
20 |         for j in range(i+1, n):
21 |             if D[i, j] == 1:
22 |                 d_ij = X[i] - X[j]
23 |                 dist_ij, deri1_d_ij, deri2_d_ij = distance1(a, d_ij)
24 |                 sum_dist += dist_ij
25 |                 sum_deri1 += deri1_d_ij
26 |                 sum_deri2 += deri2_d_ij
27 |     fD, fD_1st_d, fD_2nd_d = gf(sum_dist, sum_deri1, sum_deri2)
28 |     return [fD, fD_1st_d, fD_2nd_d]
29 | 
30 | 
31 | def gf(sum_dist, sum_deri1, sum_deri2):
32 |     fD = np.log(sum_dist)
33 |     fD_1st_d = sum_deri1/sum_dist
34 |     fD_2nd_d = sum_deri2/sum_dist - np.outer(sum_deri1, sum_deri1)/sum_dist**2
35 |     return [fD, fD_1st_d, fD_2nd_d]
36 | 
37 | 
38 | def distance1(a, d_ij):
39 |     fudge = 0.000001
40 |     dist_ij = np.sqrt(np.dot(d_ij**2, a))  # distance between X[i] and X[j]
41 |     deri1_d_ij = 0.5*(d_ij**2)/(dist_ij + (dist_ij==0)*fudge)  # in case of dist_ij==0, shift dist_ij by 0.000001.
42 |     deri2_d_ij = -0.25*np.outer(d_ij**2, d_ij**2)/(dist_ij**3+(dist_ij==0)*fudge)  # the same as last one.
43 |     return [dist_ij, deri1_d_ij, deri2_d_ij]
44 | 
45 | 
46 | """
47 | checking code. the result should be 1.039, [.25, .25], [[-0.125, -0.125], [-0.125, -0.125]]
48 | 
49 | x = np.array([[1, 2], [3, 4]])
50 | d = np.ones((2, 2)) - np.tril(np.ones((2, 2)))
51 | a = np.array([1, 1])
52 | 
53 | re = d_constraint(x, d, a)
54 | print(re)
55 | """
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/Newton.py:
--------------------------------------------------------------------------------
 1 | """
 2 | solving constraint optimization problem using Newton-Raphson method.
 3 | """
 4 | import numpy as np
 5 | from Distance_Learning.D_constraint import d_constraint
 6 | 
 7 | 
 8 | def Newton(X, S, D, C):
 9 |     n, d = X.shape
10 |     a = np.ones(d)
11 | 
12 |     fudge = 0.000001
13 |     threshold1 = 0.001
14 |     reduction = 2
15 | 
16 |     # sum(d'Ad)=sum(trace(d'Ad))=sum(trace(dd'A))=trace(sum(dd'A))=trace(sum(dd')A)
17 |     # sum(d_ij'a) = sum(d_ij')a where d_ij = [(di1-dj1)**2...(din-djn)**2]'
18 |     s_sum = np.zeros(d)
19 |     d_sum = np.zeros(d)
20 |     for i in range(n):
21 |         for j in range(i+1, n):
22 |             d_ij = X[i] - X[j]
23 |             if S[i, j] == 1:
24 |                 s_sum += d_ij**2
25 |             elif D[i, j] == 1:
26 |                 d_sum += d_ij**2
27 | 
28 |     tt = 1
29 |     error = 1
30 |     while error > threshold1:
31 |         fd0, fd_1st_d, fd_2nd_d = d_constraint(X, D, a)
32 |         obj_initial = s_sum.dot(a) - C*fd0
33 |         fs_1st_d = s_sum  # first derivative of the S constraint
34 |         gradient = fs_1st_d - C*fd_1st_d  # the gradient of objective
35 |         Hessain = -C*fd_2nd_d + fudge*np.eye(d)
36 |         invHessian = np.linalg.inv(Hessain)
37 |         step = np.dot(invHessian, gradient)
38 | 
39 |     # Newton-Raphson update
40 |     # Search over optimal lambda
41 |         lambda1 = 1
42 |         t = 1
43 |         a_previous = 0
44 |         atemp = a - lambda1*step  # x[n+1] = x[n]-f(x[n])/df([xn])dx[n]
45 |         atemp = np.maximum(atemp, 0.000001)  # keep a to be positive
46 | 
47 |         fdd0 = d_constraint(X, D, atemp)
48 |         obj = s_sum.dot(atemp) - C*fdd0[0]  # the a update to be atemp, compare this to obj_initial
49 |         obj_previous = obj * 1.1  # just to get the while loop start
50 | 
51 |         while obj < obj_previous:
52 |             obj_previous = obj
53 |             a_previous = atemp
54 |             lambda1 /= reduction
55 |             atemp = a - lambda1*step
56 |             atemp = np.maximum(atemp, 0.000001)
57 |             fdd0 = d_constraint(X, D, atemp)
58 |             obj = s_sum.dot(atemp) - C*fdd0[0]
59 |             t += 1
60 | 
61 |         a = a_previous
62 |         error = abs((obj_previous - obj_initial)/obj_previous)
63 |         tt += 1
64 |     return a
65 | 
66 | """
67 | checking code。
68 | """
69 | 
70 | x = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
71 | d = np.zeros((4, 4))
72 | d[0, 1] = 1
73 | s = np.zeros((4, 4))
74 | s[2, 3] = 1
75 | a = np.array([1, 1])
76 | 
77 | re = Newton(x, s, d, 1)
78 | print(re)
79 | 
80 | """
81 | draw the obj function, obj_initial = s_sum.dot(a) - C*fd0
82 | """
83 | 
84 | """
85 | import matplotlib.pyplot as plt
86 | from mpl_toolkits.mplot3d import Axes3D
87 | import numpy as np
88 | s_sum = np.array([4, 4])
89 | d_sum = np.array([4, 4])
90 | a1 = np.linspace(0.0001, 0.4, 300)
91 | x, y = np.meshgrid(a1, a1)
92 | # z = x**2 + y**2
93 | z = 4*(x+y)-0.5*np.log(np.sqrt(4*(x+y)))
94 | fig = plt.figure()
95 | ax = fig.gca(projection='3d')
96 | ax.plot_surface(x, y, z)
97 | plt.show()
98 | """


--------------------------------------------------------------------------------