├── figure_1.png ├── nips02-metric.pdf ├── Derivation of Newton.pdf ├── README.md ├── D_constraint.py └── Newton.py /figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolister-Ye/Distance-metric-learning/HEAD/figure_1.png -------------------------------------------------------------------------------- /nips02-metric.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolister-Ye/Distance-metric-learning/HEAD/nips02-metric.pdf -------------------------------------------------------------------------------- /Derivation of Newton.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Coolister-Ye/Distance-metric-learning/HEAD/Derivation of Newton.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Distance-metric-learning 2 | This code implemented the learning algorithm from the paper "Distance metric learning, with application to clustering with side-information [Eric P. Xing]". Except from the original materials, there are some construction details inclueding more info to clarify the derivertion of formula in the paper and more experiment result from the metric. 3 | 4 | ![alt text](https://github.com/JasonYee/Distance-metric-learning/blob/master/figure_1.png) 5 | 6 | The pic above is the objective funcion using artifical data for the case when A is diagonal. We can see the function is convex. 7 | -------------------------------------------------------------------------------- /D_constraint.py: -------------------------------------------------------------------------------- 1 | """ 2 | compute the value, 1st derivative, second derivative (Hessian) of 3 | a dissimilarity constraint function g(sum{ij in D}(d_ij*A*d_ij')) 4 | where A is a diagnal matrix. 5 | 6 | df/dA = f'(sum{ij in D} sqrt{trace((x_i-x_j)A(x_i-x_j)')}) 7 | *0.5*(sum(ij in D) (1/sqrt{trace(d_ij*A*d_ij')})*(d_ij'*d_ij)) 8 | 9 | """ 10 | import numpy as np 11 | 12 | 13 | def d_constraint(X, D, a): 14 | n, d = X.shape 15 | sum_dist = 0 16 | sum_deri1 = np.zeros(d) 17 | sum_deri2 = np.zeros((d, d)) 18 | 19 | for i in range(n): 20 | for j in range(i+1, n): 21 | if D[i, j] == 1: 22 | d_ij = X[i] - X[j] 23 | dist_ij, deri1_d_ij, deri2_d_ij = distance1(a, d_ij) 24 | sum_dist += dist_ij 25 | sum_deri1 += deri1_d_ij 26 | sum_deri2 += deri2_d_ij 27 | fD, fD_1st_d, fD_2nd_d = gf(sum_dist, sum_deri1, sum_deri2) 28 | return [fD, fD_1st_d, fD_2nd_d] 29 | 30 | 31 | def gf(sum_dist, sum_deri1, sum_deri2): 32 | fD = np.log(sum_dist) 33 | fD_1st_d = sum_deri1/sum_dist 34 | fD_2nd_d = sum_deri2/sum_dist - np.outer(sum_deri1, sum_deri1)/sum_dist**2 35 | return [fD, fD_1st_d, fD_2nd_d] 36 | 37 | 38 | def distance1(a, d_ij): 39 | fudge = 0.000001 40 | dist_ij = np.sqrt(np.dot(d_ij**2, a)) # distance between X[i] and X[j] 41 | deri1_d_ij = 0.5*(d_ij**2)/(dist_ij + (dist_ij==0)*fudge) # in case of dist_ij==0, shift dist_ij by 0.000001. 42 | deri2_d_ij = -0.25*np.outer(d_ij**2, d_ij**2)/(dist_ij**3+(dist_ij==0)*fudge) # the same as last one. 43 | return [dist_ij, deri1_d_ij, deri2_d_ij] 44 | 45 | 46 | """ 47 | checking code. the result should be 1.039, [.25, .25], [[-0.125, -0.125], [-0.125, -0.125]] 48 | 49 | x = np.array([[1, 2], [3, 4]]) 50 | d = np.ones((2, 2)) - np.tril(np.ones((2, 2))) 51 | a = np.array([1, 1]) 52 | 53 | re = d_constraint(x, d, a) 54 | print(re) 55 | """ 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /Newton.py: -------------------------------------------------------------------------------- 1 | """ 2 | solving constraint optimization problem using Newton-Raphson method. 3 | """ 4 | import numpy as np 5 | from Distance_Learning.D_constraint import d_constraint 6 | 7 | 8 | def Newton(X, S, D, C): 9 | n, d = X.shape 10 | a = np.ones(d) 11 | 12 | fudge = 0.000001 13 | threshold1 = 0.001 14 | reduction = 2 15 | 16 | # sum(d'Ad)=sum(trace(d'Ad))=sum(trace(dd'A))=trace(sum(dd'A))=trace(sum(dd')A) 17 | # sum(d_ij'a) = sum(d_ij')a where d_ij = [(di1-dj1)**2...(din-djn)**2]' 18 | s_sum = np.zeros(d) 19 | d_sum = np.zeros(d) 20 | for i in range(n): 21 | for j in range(i+1, n): 22 | d_ij = X[i] - X[j] 23 | if S[i, j] == 1: 24 | s_sum += d_ij**2 25 | elif D[i, j] == 1: 26 | d_sum += d_ij**2 27 | 28 | tt = 1 29 | error = 1 30 | while error > threshold1: 31 | fd0, fd_1st_d, fd_2nd_d = d_constraint(X, D, a) 32 | obj_initial = s_sum.dot(a) - C*fd0 33 | fs_1st_d = s_sum # first derivative of the S constraint 34 | gradient = fs_1st_d - C*fd_1st_d # the gradient of objective 35 | Hessain = -C*fd_2nd_d + fudge*np.eye(d) 36 | invHessian = np.linalg.inv(Hessain) 37 | step = np.dot(invHessian, gradient) 38 | 39 | # Newton-Raphson update 40 | # Search over optimal lambda 41 | lambda1 = 1 42 | t = 1 43 | a_previous = 0 44 | atemp = a - lambda1*step # x[n+1] = x[n]-f(x[n])/df([xn])dx[n] 45 | atemp = np.maximum(atemp, 0.000001) # keep a to be positive 46 | 47 | fdd0 = d_constraint(X, D, atemp) 48 | obj = s_sum.dot(atemp) - C*fdd0[0] # the a update to be atemp, compare this to obj_initial 49 | obj_previous = obj * 1.1 # just to get the while loop start 50 | 51 | while obj < obj_previous: 52 | obj_previous = obj 53 | a_previous = atemp 54 | lambda1 /= reduction 55 | atemp = a - lambda1*step 56 | atemp = np.maximum(atemp, 0.000001) 57 | fdd0 = d_constraint(X, D, atemp) 58 | obj = s_sum.dot(atemp) - C*fdd0[0] 59 | t += 1 60 | 61 | a = a_previous 62 | error = abs((obj_previous - obj_initial)/obj_previous) 63 | tt += 1 64 | return a 65 | 66 | """ 67 | checking code。 68 | """ 69 | 70 | x = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) 71 | d = np.zeros((4, 4)) 72 | d[0, 1] = 1 73 | s = np.zeros((4, 4)) 74 | s[2, 3] = 1 75 | a = np.array([1, 1]) 76 | 77 | re = Newton(x, s, d, 1) 78 | print(re) 79 | 80 | """ 81 | draw the obj function, obj_initial = s_sum.dot(a) - C*fd0 82 | """ 83 | 84 | """ 85 | import matplotlib.pyplot as plt 86 | from mpl_toolkits.mplot3d import Axes3D 87 | import numpy as np 88 | s_sum = np.array([4, 4]) 89 | d_sum = np.array([4, 4]) 90 | a1 = np.linspace(0.0001, 0.4, 300) 91 | x, y = np.meshgrid(a1, a1) 92 | # z = x**2 + y**2 93 | z = 4*(x+y)-0.5*np.log(np.sqrt(4*(x+y))) 94 | fig = plt.figure() 95 | ax = fig.gca(projection='3d') 96 | ax.plot_surface(x, y, z) 97 | plt.show() 98 | """ --------------------------------------------------------------------------------