├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS
├── R
    ├── aaa.R
    ├── dca.R
    ├── gdmd.r
    ├── gdmf.r
    ├── itml.R
    ├── kdca.R
    ├── kmatrixGauss.R
    ├── lmnn.R
    ├── misc.R
    ├── rca.R
    └── sdml-package.R
├── README.md
├── man
    ├── GdmDiag.Rd
    ├── GdmFull.Rd
    ├── dca.Rd
    ├── kdca.Rd
    ├── kmatrixGauss.Rd
    ├── rca.Rd
    └── sdml-package.Rd
├── sdml.Rproj
└── tests
    ├── testthat.R
    └── testthat
        └── test_helper_functions.R


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.travis\.yml$
4 | ^cran-comments\.md$
5 | ^CONDUCT\.md$
6 | ^NEWS\.md$
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: sdml
 2 | Version: 1.0.0
 3 | Date: 2012-01-08
 4 | Title: Supervised Distance Metric Learning with R
 5 | Description: The sdml package aims to implement the state-of-the-art
 6 |     algorithms for supervised distance metric learning.
 7 |     It includes global and local methods such as
 8 |     (Kernel) Relevant Component Analysis,
 9 |     (Kernel) Discriminative Component Analysis,
10 |     (Kernel) Local Fisher Discriminant Analysis, etc.
11 |     These distance metric learning methods are widely applied in
12 |     feature extraction, dimensionality reduction, clustering,
13 |     classification, information retrieval,
14 |     and computer vision problems.
15 | Depends:
16 |     MASS
17 | Imports:
18 |     rARPACK,
19 |     lfda
20 | Suggests: testthat
21 | Author: Tao Gao <joegaotao@gmail.com>, Nan Xiao <me@nanx.me>, Yuan Tang <terrytangyuan@gmail.com>
22 | Maintainer: Yuan Tang <terrytangyuan@gmail.com>
23 | License: MIT + file LICENSE
24 | URL: https://github.com/nanxstats/sdml
25 | BugReports: https://github.com/nanxstats/sdml/issues
26 | Encoding: UTF-8
27 | RoxygenNote: 6.1.1
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2015
2 | COPYRIGHT HOLDER: Yuan Tang, Tao Gao, and Nan Xiao
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | export(dca)
4 | export(kdca)
5 | export(kmatrixGauss)
6 | export(rca)
7 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | Version 1.0.0 (2012-01-10)
 2 | ------------------------------------------------------------------------------
 3 | 
 4 |   * initial release
 5 | 
 6 |   NEW FEATURES
 7 | 
 8 |   * initial version of the Relevant Component Analysis (RCA) algorithm
 9 |   * initial version of the Kernel Relevant Component Analysis (KRCA) algorithm
10 |   * initial version of the Local Fisher Discriminant Analysis (LFDA) algorithm
11 |   * initial version of the Kernel Local Fisher Discriminant Analysis (KLFDA) algorithm
12 |   * initial version of the Discriminative Component Analysis (DCA) algorithm
13 |   * initial version of the Kernel Discriminative Component Analysis (KDCA) algorithm
14 | 
15 | 


--------------------------------------------------------------------------------
/R/aaa.R:
--------------------------------------------------------------------------------
1 | 
2 | #Avoid false positives in R CMD CHECK:
3 | utils::globalVariables(c("Class"))
4 | 


--------------------------------------------------------------------------------
/R/dca.R:
--------------------------------------------------------------------------------
  1 | #' Discriminative Component Analysis
  2 | #'
  3 | #' Performs discriminative component analysis on the given data.
  4 | #'
  5 | #' Put DCA function details here.
  6 | #'
  7 | #' @param data \code{n * d} data matrix. \code{n} is the number of data points,
  8 | #'             \code{d} is the dimension of the data.
  9 | #'             Each data point is a row in the matrix.
 10 | #' @param chunks length \code{n} vector describing the chunklets:
 11 | #'               \code{-1} in the \code{i} th place means point \code{i}
 12 | #'               doesn't belong to any chunklet;
 13 | #'               integer \code{j} in place \code{i} means point \code{i}
 14 | #'               belongs to chunklet j.
 15 | #'               The chunklets indexes should be 1:(number of chunklets).
 16 | #' @param neglinks \code{s * s} symmetric matrix describing the negative relationship
 17 | #'                 between all the \code{s} chunklets.
 18 | #'                 For the element \eqn{neglinks_{ij}}:
 19 | #'                 \eqn{neglinks_{ij} = 1} means chunklet \code{i} and chunklet {j}
 20 | #'                 have negative constraint(s);
 21 | #'                 \eqn{neglinks_{ij} = 0} means chunklet \code{i} and chunklet {j}
 22 | #'                 don't have negative constraints
 23 | #'                 or we don't have information about that.
 24 | #' @param useD Integer. Optional. When not given, DCA is done in the
 25 | #'             original dimension and B is full rank. When useD is given,
 26 | #'             DCA is preceded by constraints based LDA which reduces the
 27 | #'             dimension to useD. B in this case is of rank useD.
 28 | #'
 29 | #' @return list of the DCA results:
 30 | #' \item{B}{DCA suggested Mahalanobis matrix}
 31 | #' \item{DCA}{DCA suggested transformation of the data.
 32 | #'            The dimension is (original data dimension) * (useD)}
 33 | #' \item{newData}{DCA transformed data}
 34 | #'
 35 | #' For every two original data points (x1, x2) in newData (y1, y2):
 36 | #'
 37 | #' \eqn{(x2 - x1)' * B * (x2 - x1) = || (x2 - x1) * A ||^2 = || y2 - y1 ||^2}
 38 | #'
 39 | #' @keywords dca discriminant component transformation mahalanobis metric
 40 | #'
 41 | #' @aliases dca
 42 | #'
 43 | #' @note Put some note here.
 44 | #'
 45 | #' @author Nan Xiao <\url{https://nanx.me}>
 46 | #'
 47 | #' @seealso See \code{\link{kdca}} for the kernelized version of DCA.
 48 | #'
 49 | #' @export dca
 50 | #'
 51 | #' @references
 52 | #' Steven C.H. Hoi, W. Liu, M.R. Lyu and W.Y. Ma (2006).
 53 | #' Learning Distance Metrics with Contextual Constraints for Image Retrieval.
 54 | #' \emph{Proceedings IEEE Conference on Computer Vision and Pattern Recognition
 55 | #' (CVPR2006)}.
 56 | #'
 57 | #' @examples
 58 | #' set.seed(123)
 59 | #' require(MASS)  # generate synthetic Gaussian data
 60 | #' k = 100        # sample size of each class
 61 | #' n = 3          # specify how many class
 62 | #' N = k * n      # total sample number
 63 | #' x1 = mvrnorm(k, mu = c(-10, 6), matrix(c(10, 4, 4, 10), ncol = 2))
 64 | #' x2 = mvrnorm(k, mu = c(0, 0), matrix(c(10, 4, 4, 10), ncol = 2))
 65 | #' x3 = mvrnorm(k, mu = c(10, -6), matrix(c(10, 4, 4, 10), ncol = 2))
 66 | #' data = as.data.frame(rbind(x1, x2, x3))
 67 | 
 68 | #' # The fully labeled data set with 3 classes
 69 | #' plot(data$V1, data$V2, bg = c("#E41A1C", "#377EB8", "#4DAF4A")[gl(n, k)],
 70 | #'      pch = c(rep(22, k), rep(21, k), rep(25, k)))
 71 | #' Sys.sleep(3)
 72 | 
 73 | #' # Same data unlabeled; clearly the classes' structure is less evident
 74 | #' plot(x$V1, x$V2)
 75 | #' Sys.sleep(3)
 76 | #'
 77 | #' chunk1 = sample(1:100, 5)
 78 | #' chunk2 = sample(setdiff(1:100, chunk1), 5)
 79 | #' chunk3 = sample(101:200, 5)
 80 | #' chunk4 = sample(setdiff(101:200, chunk3), 5)
 81 | #' chunk5 = sample(201:300, 5)
 82 | #' chks = list(chunk1, chunk2, chunk3, chunk4, chunk5)
 83 | 
 84 | #' chunks = rep(-1, 300)
 85 | 
 86 | #' # positive samples in the chunks
 87 | #' for (i in 1:5) {
 88 | #'   for (j in chks[[i]]) {
 89 | #'     chunks[j] = i
 90 | #'   }
 91 | #' }
 92 | #'
 93 | #' # define the negative constrains between chunks
 94 | #' neglinks = matrix(c(
 95 | #' 		0, 0, 1, 1, 1,
 96 | #' 		0, 0, 1, 1, 1,
 97 | #' 		1, 1, 0, 0, 0,
 98 | #' 		1, 1, 0, 0, 1,
 99 | #' 		1, 1, 1, 1, 0),
100 | #' 		ncol = 5, byrow = TRUE)
101 | #'
102 | #' dcaData = dca(data = data, chunks = chunks, neglinks = neglinks)$newData
103 | #' # plot DCA transformed data
104 | #' plot(dcaData[, 1], dcaData[, 2], bg = c("#E41A1C", "#377EB8", "#4DAF4A")[gl(n, k)],
105 | #'      pch = c(rep(22, k), rep(21, k), rep(25, k)),
106 | #'      xlim = c(-15, 15), ylim = c(-15, 15))
107 | 
108 | dca <- function(data, chunks, neglinks, useD = NULL) {
109 | 
110 | 	data     = t(as.matrix(data))
111 | 	chunks   = as.matrix(chunks)
112 | 	neglinks = as.matrix(neglinks)
113 | 
114 | 	d = nrow(data)
115 | 	n = ncol(data)
116 | 
117 | 	if(is.null(useD)) useD = d
118 | 
119 | 	# 1. Compute means of chunks
120 | 	s = max(chunks)
121 | 	M = matrix(NA, ncol = s, nrow = d)
122 | 
123 | 	for (i in 1:s) {
124 | 		inds = which(chunks == i)
125 | 		M[ , i] = as.matrix(rowMeans(data[ , inds]))
126 | 	}
127 | 
128 | 	# 2. Compute Cb
129 | 	Cb = mat.or.vec(d, d)
130 | 	N_d = 0
131 | 	for (j in 1:s) {
132 | 		inds = which(neglinks[j, ] == 1)
133 | 		for (i in 1:length(inds)) {
134 | 			Cb = Cb + ((M[ , j] - M[ , inds[i]]) %*% t(M[ , j] - M[ , inds[i]]))
135 | 		}
136 | 		N_d = N_d + length(inds)
137 | 	}
138 | 
139 | 	if (N_d == 0) {
140 | 		Cb = diag(d)
141 | 	} else {
142 | 		Cb = Cb/N_d
143 | 	}
144 | 
145 | 	# 3. Compute Cw
146 | 
147 | 	Cw = mat.or.vec(d, d)
148 | 	N_w = 0
149 | 
150 | 	for (j in 1:s) {
151 | 		inds = which(chunks == j)
152 | 		for (i in 1:length(inds)) {
153 | 			Cw = Cw + ((data[ , inds[i]] - M[ , j]) %*% t(data[ , inds[i]] - M[ , j]))
154 | 		}
155 | 		N_w = N_w + length(inds)
156 | 	}
157 | 
158 | 	Cw = Cw/N_w
159 | 
160 | 	# 3. Diagonalize Cb
161 | 
162 | 	eigTmp = eigen(Cb)
163 | 	eigVec = eigTmp$vectors
164 | 	eigVal = as.matrix(eigTmp$values)
165 | 	index = which(abs(eigVal) > 1e-9)  # find Non-Zero EigVals
166 | 
167 | 	if (useD < d) {
168 | 	R = eigVec[ , index[1:useD]]  # R have already sorted eigenvalues
169 | 	} else {
170 | 	R = eigVec[ , index]  # Each col of D is an eigenvector
171 | 	}
172 | 
173 | 	Db = t(R) %*% Cb %*% as.matrix(R)
174 | 	Z = as.matrix(R) %*% ((Db) %^% (-0.5))
175 | 
176 | 	# Diagonalize t(Z) %*% Cw %*% Z
177 | 	Cz = t(Z) %*% Cw %*% as.matrix(Z)
178 | 	eigVal = eigen(Cz)$values
179 | 	if (length(eigVal) == 1) {
180 | 		Dw = as.matrix(eigVal)
181 | 	} else {
182 | 		Dw = diag(eigen(Cz)$values)
183 | 	}
184 | 	eigVec = eigen(Cz)$vectors
185 | 
186 | 	DCA = (Dw %^% (-0.5)) %*% t(eigVec) %*% t(Z)
187 | 	B = t(DCA) %*% as.matrix(DCA)
188 | 	newData = t(DCA %*% data)
189 | 
190 | 	return(list("B" = B, "DCA" = DCA, "newData" = newData))
191 | }
192 | 


--------------------------------------------------------------------------------
/R/gdmd.r:
--------------------------------------------------------------------------------
  1 | #' Global Distance Metric Learning
  2 | #'
  3 | #' Performs Global Distance Metric Learning (GDM) on the given data, learning a diagonal matrix.
  4 | #'
  5 | #' Put GdmDiag function details here.
  6 | #'
  7 | #' @param data \code{n * d} data matrix. \code{n} is the number of data points,
  8 | #'             \code{d} is the dimension of the data.
  9 | #'             Each data point is a row in the matrix.
 10 | #' @param simi \code{n * 2} matrix describing the similar constrains.
 11 | #'              Each row of matrix is serial number of a similar pair in the original data.
 12 | #'				For example, pair(1, 3) represents the first observation is similar the 3th observation in the original data.
 13 | #' @param dism \code{n * 2} matrix describing the dissimilar constrains as \code{simi}.
 14 | #'				Each row of matrix is serial number of a dissimilar pair in the original data.
 15 | #' @param C0 numeric, the bound of similar constrains.
 16 | #' @param threshold numeric, the threshold of stoping the learning iteration.
 17 | #'
 18 | #' @return list of the GdmDiag results:
 19 | #' \item{newData}{GdmDiag transformed data}
 20 | #' \item{diagonalA}{suggested Mahalanobis matrix}
 21 | #' \item{dmlA}{matrix to transform data, square root of diagonalA }
 22 | #' \item{error}{the precision of obtained distance metric by Newton-Raphson optimization }
 23 | #'
 24 | #' For every two original data points (x1, x2) in newData (y1, y2):
 25 | #'
 26 | #' \eqn{(x2 - x1)' * A * (x2 - x1) = || (x2 - x1) * B ||^2 = || y2 - y1 ||^2}
 27 | #'
 28 | #' @keywords GDM global distance metirc learning transformation mahalanobis metric
 29 | #'
 30 | #' @note Be sure to check whether the dimension of original data and constrains' format are valid for the function.
 31 | #'
 32 | #' @author Tao Gao <\url{http://www.gaotao.name}>
 33 | #'
 34 | #' @references
 35 | #' Steven C.H. Hoi, W. Liu, M.R. Lyu and W.Y. Ma (2003).
 36 | #' Distance metric learning, with application to clustering with side-information.
 37 | #  in \emph{Proc. NIPS}.
 38 | #'
 39 | 
 40 | #' @examples
 41 | #' set.seed(602)
 42 | #' library(MASS)
 43 | #' library(scatterplot3d)
 44 | #'
 45 | #' # generate simulated Gaussian data
 46 | #' k = 100
 47 | #' m <- matrix(c(1, 0.5, 1, 0.5, 2, -1, 1, -1, 3), nrow =3, byrow = T)
 48 | #' x1 <- mvrnorm(k, mu = c(1, 1, 1), Sigma = m)
 49 | #' x2 <- mvrnorm(k, mu = c(-1, 0, 0), Sigma = m)
 50 | #' data <- rbind(x1, x2)
 51 | #'
 52 | #' # define similar constrains
 53 | #' simi <- rbind(t(combn(1:k, 2)), t(combn((k+1):(2*k), 2)))
 54 | #'
 55 | #' temp <-  as.data.frame(t(simi))
 56 | #' tol <- as.data.frame(combn(1:(2*k), 2))
 57 | #'
 58 | #' # define disimilar constrains
 59 | #' dism <- t(as.matrix(tol[!tol %in% simi]))
 60 | #'
 61 | #' # transform data using GdmDiag
 62 | #' result <- GdmDiag(data, simi, dism)
 63 | #' newData <- result$newData
 64 | #' # plot original data
 65 | #' color <- gl(2, k, labels = c("red", "blue"))
 66 | #' par(mfrow = c(2, 1), mar = rep(0, 4) + 0.1)
 67 | #' scatterplot3d(data, color = color, cex.symbols = 0.6,
 68 | #'			  xlim = range(data[, 1], newData[, 1]),
 69 | #'			  ylim = range(data[, 2], newData[, 2]),
 70 | #'			  zlim = range(data[, 3], newData[, 3]),
 71 | #'			  main = "Original Data")
 72 | #' # plot GdmDiag transformed data
 73 | #' scatterplot3d(newData, color = color, cex.symbols = 0.6,
 74 | #'			  xlim = range(data[, 1], newData[, 1]),
 75 | #'			  ylim = range(data[, 2], newData[, 2]),
 76 | #'			  zlim = range(data[, 3], newData[, 3]),
 77 | #'			  main = "Transformed Data")
 78 | 
 79 | GdmDiag <- function(data, simi, dism, C0 = 1, S1 = NULL, D1 = NULL, threshold = 0.001) {
 80 | 		fudge = 0.000001
 81 | 		reduction = 2
 82 | 		data <- as.matrix(data)
 83 | 		simi <- as.matrix(simi)
 84 | 		dism <- as.matrix(dism)
 85 | 		N <- dim(data)[1]
 86 | 		d <- dim(data)[2]
 87 | 		a <- matrix(rep(1, d), nrow = d)# initial diagonal A in the form of column vector
 88 | 		# dij <- mat.or.vec(1, d)
 89 | 
 90 | 		new.simi <- unique(t(apply(simi, 1, sort)))
 91 | 		new.dism <- unique(t(apply(dism, 1, sort)))
 92 | 
 93 | 		######### contraints
 94 | 		dist1.dism <- data[new.dism[, 1], ] - data[new.dism[, 2], ]
 95 | 		dist.ij <- sqrt((dist1.dism^2) %*% a)
 96 | 		sum.dist <- sum(dist.ij)
 97 | 		temp <- cbind(dist1.dism^2, dist.ij)
 98 | 		deri1.ij <-0.5 * temp[, 1:d]/(temp[, d + 1] + (temp[, d + 1] == 0) * fudge)
 99 | 		sum.deri1 <- t(apply(deri1.ij, 2, sum))
100 | 		deri2.ij <- t(apply(dist1.dism, 1, function(x) outer(x, x)))
101 | 		temp1 <- cbind(deri2.ij, dist.ij^3)
102 | 		deri2.ij <- -0.25 * temp1[, 1:(d^2)]/(temp1[, d^2 + 1] + (temp1[, d^2 + 1] == 0) * fudge)
103 | 		sum.deri2 <- matrix(apply(deri2.ij, 2, sum), ncol = d, byrow = TRUE)
104 | 
105 | 		fD <- log(sum.dist)
106 | 		fD.1d <- sum.deri1/sum.dist
107 | 		fD.2d <- sum.deri2/sum.dist - crossprod(sum.deri1, sum.deri1)/(sum.dist^2)
108 | 
109 | 		####### objection is part of contraints
110 | 		# fD <- log(sum.dist)
111 | 		######################################
112 | 		dist1.dism <- data[new.dism[, 1], ] - data[new.dism[, 2], ]
113 | 		d.sum <- t(apply(dist1.dism^2, 2, sum))
114 | 		dist1.simi <- data[new.simi[, 1], ] - data[new.simi[, 2], ]
115 | 		s.sum <- t(apply(dist1.simi^2, 2, sum))
116 | 
117 | 		# S1 <- mat.or.vec(N, N)
118 | 		# D1 <- mat.or.vec(N, N)
119 | 
120 | 		# dism <- rbind(dism, dism[, c(2, 1)]) #
121 | 		# Dism <- rbind(Dism, Dism[, c(2, 1)])
122 | 		# S1[dism] <- 1
123 | 		# D1[Dism] <- 1
124 | 
125 | 		error <- 1
126 | 		while (error > threshold) {
127 | 			obj.initial <- as.numeric(s.sum %*% a) + C0 * fD
128 | 			fS.1d <- s.sum
129 | 
130 | 			gradient <- fS.1d - C0 * fD.1d
131 | 			hessian <- -C0 * fD.2d + fudge * diag(1, d)
132 | 			invhessian <- solve(hessian)
133 | 			cstep <- invhessian %*% t(gradient)
134 | 
135 | 			lambda <- 1
136 | 			atemp <- a - lambda * cstep
137 | 			atemp[atemp < 0] <- 0
138 | 
139 | 			fDo <- log(sum(sqrt((dist1.dism^2) %*% atemp)))
140 | 			obj <- as.numeric(s.sum %*% atemp) + C0 * fDo
141 | 			obj.previous <- obj * 1.1
142 | 
143 | 			while (obj < obj.previous) {
144 | 				lambda.previous <- lambda
145 | 				obj.previous <- obj
146 | 				a.previous = atemp
147 | 				lambda <- lambda/reduction
148 | 				atemp <- a - lambda * cstep
149 | 				atemp[atemp < 0] <- 0
150 | 				fDo1 <- log(sum(sqrt((dist1.dism^2) %*% atemp)))
151 | 				obj <- as.numeric(s.sum %*% atemp) + C0 * fDo1
152 | 			}
153 | 		a <- a.previous
154 | 		error <- abs((obj.previous - obj.initial)/obj.previous)
155 | 		}
156 | 		diagnoalA <- diag(as.numeric(a))
157 | 		dmlA <- sqrt(diagonalA)
158 | 		newData <- data %*% dmlA
159 | 
160 | 		return(list("newData" = newData, "diagonalA" = diagonalA, "dmlA" = dmlA, "error" = error))
161 | }
162 | 


--------------------------------------------------------------------------------
/R/gdmf.r:
--------------------------------------------------------------------------------
  1 | #' Global Distance Metric Learning
  2 | #'
  3 | #' Performs Global Distance Metric Learning (GDM) on the given data, learning a full matrix.
  4 | #'
  5 | #' Put GdmFull function details here.
  6 | #'
  7 | #' @param data \code{n * d} data matrix. \code{n} is the number of data points,
  8 | #'             \code{d} is the dimension of the data.
  9 | #'             Each data point is a row in the matrix.
 10 | #' @param simi \code{n * 2} matrix describing the similar constrains.
 11 | #'              Each row of matrix is serial number of a similar pair in the original data.
 12 | #'				For example, pair(1, 3) represents the first observation is similar the 3th observation in the original data.
 13 | #' @param dism \code{n * 2} matrix describing the dissimilar constrains as \code{simi}.
 14 | #'				Each row of matrix is serial number of a dissimilar pair in the original data.
 15 | #' @param maxiter numeric, the number of iteration.
 16 | #'
 17 | #' @return list of the GdmDiag results:
 18 | #' \item{newData}{GdmDiag transformed data}
 19 | #' \item{fullA}{suggested Mahalanobis matrix}
 20 | #' \item{dmlA}{matrix to transform data, square root of diagonalA }
 21 | #' \item{converged}{whether the iteration-projection optimization is converged or not}
 22 | #'
 23 | #' For every two original data points (x1, x2) in newData (y1, y2):
 24 | #'
 25 | #' \eqn{(x2 - x1)' * A * (x2 - x1) = || (x2 - x1) * B ||^2 = || y2 - y1 ||^2}
 26 | #'
 27 | #' @keywords GDM global distance metirc learning transformation mahalanobis metric
 28 | #'
 29 | #' @note Be sure to check whether the dimension of original data and constrains' format are valid for the function.
 30 | #'
 31 | #' @author Tao Gao <\url{http://www.gaotao.name}>
 32 | #'
 33 | #' @references
 34 | #' Steven C.H. Hoi, W. Liu, M.R. Lyu and W.Y. Ma (2003).
 35 | #' Distance metric learning, with application to clustering with side-information.
 36 | #  in \emph{Proc. NIPS}.
 37 | #'
 38 | 
 39 | #' @examples
 40 | #' set.seed(123)
 41 | #' library(MASS)
 42 | #' library(scatterplot3d)
 43 | #'
 44 | #' # generate simulated Gaussian data
 45 | #' k = 100
 46 | #' m <- matrix(c(1, 0.5, 1, 0.5, 2, -1, 1, -1, 3), nrow =3, byrow = T)
 47 | #' x1 <- mvrnorm(k, mu = c(1, 1, 1), Sigma = m)
 48 | #' x2 <- mvrnorm(k, mu = c(-1, 0, 0), Sigma = m)
 49 | #' data <- rbind(x1, x2)
 50 | #'
 51 | #' # define similar constrains
 52 | #' simi <- rbind(t(combn(1:k, 2)), t(combn((k+1):(2*k), 2)))
 53 | #'
 54 | #' temp <-  as.data.frame(t(simi))
 55 | #' tol <- as.data.frame(combn(1:(2*k), 2))
 56 | #'
 57 | #' # define disimilar constrains
 58 | #' dism <- t(as.matrix(tol[!tol %in% simi]))
 59 | #'
 60 | #' # transform data using GdmFull
 61 | #' result <- GdmFull(data, simi, dism)
 62 | #' newData <- result$newData
 63 | #' # plot original data
 64 | #' color <- gl(2, k, labels = c("red", "blue"))
 65 | #' par(mfrow = c(2, 1), mar = rep(0, 4) + 0.1)
 66 | #' scatterplot3d(data, color = color, cex.symbols = 0.6,
 67 | #'			  xlim = range(data[, 1], newData[, 1]),
 68 | #'			  ylim = range(data[, 2], newData[, 2]),
 69 | #'			  zlim = range(data[, 3], newData[, 3]),
 70 | #'			  main = "Original Data")
 71 | #' # plot GdmFull transformed data
 72 | #' scatterplot3d(newData, color = color, cex.symbols = 0.6,
 73 | #'			  xlim = range(data[, 1], newData[, 1]),
 74 | #'			  ylim = range(data[, 2], newData[, 2]),
 75 | #'			  zlim = range(data[, 3], newData[, 3]),
 76 | #'			  main = "Transformed Data")
 77 | 
 78 | GdmFull <- function(data, simi, dism, maxiter = 100) {
 79 | 		data <- as.matrix(data)
 80 | 		N <- dim(data)[1]
 81 | 		d <- dim(data)[2]
 82 | 		new.simi <- unique(t(apply(simi, 1, sort)))
 83 | 		new.dism <- unique(t(apply(dism, 1, sort)))
 84 | 
 85 | 		A <- diag(1, d) * 0.1
 86 | 		W <- mat.or.vec(d, d)
 87 | 		dij <- mat.or.vec(1, d)
 88 | 
 89 | 		# sphereMult = cov(data)^(-0.5);
 90 | 		# spheredata = data %*% sphereMult
 91 | 
 92 | 		dist1.simi <- data[new.simi[, 1], ] - data[new.simi[, 2], ]
 93 | 		dist2.ij <- t(apply(dist1.simi, 1, function(x) outer(x, x)))
 94 | 		W <- matrix(apply(dist2.ij, 2, sum), ncol = d, byrow = TRUE)
 95 | 
 96 | 
 97 | 		w <- matrix(W, ncol = 1)
 98 | 		t0 <- as.numeric(crossprod(w, matrix(A, ncol = 1))/100)
 99 | 
100 | 		IterProjection <- function(data, simi, dism, A, w, t0 , maxiter = 100) {
101 | 							data <- as.matrix(data)
102 | 							N = dim(data)[1]     # number of examples
103 | 							d = dim(data)[2]     # dimensionality of examples
104 | 							# S1 <- mat.or.vec(N, N)
105 | 							# D1 <- mat.or.vec(N, N)
106 | 							# simi <- rbind(simi, simi[, c(2, 1)])
107 | 							# dism <- rbind(dism, dism[, c(2, 1)])
108 | 							# S1[simi] <- 1
109 | 							# D1[dism] <- 1
110 | 							new.simi <- unique(t(apply(simi, 1, sort)))
111 | 							new.dism <- unique(t(apply(dism, 1, sort)))
112 | 
113 | 							# error1=1e5
114 | 							threshold2 <- 0.01  # error-bound of main A-update iteration
115 | 							epsilon <- 0.01   # error-bound of iterative projection on C1 and C2
116 | 							maxcount <- 200
117 | 
118 | 							w1 <- w/norm(w, "F")    # make 'w' a unit vector
119 | 							t1 <- t0/norm(w, "F")
120 | 
121 | 							count <- 1
122 | 							alpha <- 0.1    # initial step size along gradient
123 | 
124 | 							GradProjection <- function(grad1, grad2, d) {
125 | 												g1 <- matrix(grad1, ncol = 1)
126 | 												g2 <- matrix(grad2, ncol = 1)
127 | 
128 | 												g2 <- g2/norm(g2, "F")
129 | 												gtemp <- g1 - as.numeric(crossprod(g2, g1)) * g2
130 | 												gtemp <- gtemp/norm(gtemp, "F")
131 | 												grad.proj <- matrix(gtemp, d, d)
132 | 												return(grad.proj)
133 | 							}
134 | 
135 | 							fS1 <- function(data, new.simi, A, N, d, fudge = 0.000001) {
136 | 
137 | 									dist1.simi <- data[new.simi[, 1], ] - data[new.simi[, 2], ]
138 | 									dist2.ij <- t(apply(dist1.simi, 1, function(x) outer(x, x)))
139 | 									fs.1d <- matrix(apply(dist2.ij, 2, sum), ncol = d, byrow = TRUE)
140 | 									return(fs.1d)
141 | 							}
142 | 
143 | 							fD1 <- function(data, new.simi, A, N, d, fudge = 0.000001) {
144 | 									dist1.dism <- data[new.dism[, 1], ] - data[new.dism[, 2], ]
145 | 									dist.ij <- numeric(dim(dist1.dism)[1])
146 | 									for (i in 1:dim(dist1.dism)[1]) {
147 | 										dist.ij[i] <- sqrt(t(dist1.dism[i, ]) %*% A %*% t(t(dist1.dism[i, ])))
148 | 									}
149 | 									sum.dist <- sum(dist.ij) + 0.000001
150 | 									Mij <- t(apply(dist1.dism, 1, function(x) outer(x, x)))
151 | 									temp <- cbind(Mij, t(t(dist.ij)))
152 | 
153 | 									deri.ij <- 0.5 * temp[, 1:(d^2)]/(temp[, d^2 + 1] + (temp[, d^2 + 1] == 0) * fudge)
154 | 									sum.deri <- matrix(apply(deri.ij, 2, sum), ncol = d, byrow = TRUE)
155 | 									fd.1d <- sum.deri/sum.dist
156 | 									return(fd.1d)
157 | 							}
158 | 
159 | 							fD <- function(data, new.dism, A, N, d) {
160 | 									dist1.dism <- data[new.dism[, 1], ] - data[new.dism[, 2], ]
161 | 									dist.ij <- numeric(dim(dist1.dism)[1])
162 | 									for (i in 1:dim(dist1.dism)[1]) {
163 | 										dist.ij[i] <- sqrt(t(dist1.dism[i, ]) %*% A %*% t(t(dist1.dism[i, ])))
164 | 									}
165 | 									fd <- sum(dist.ij) + 0.000001
166 | 									fd <- log(fd)
167 | 									return(fd)
168 | 							}
169 | 
170 | 							grad1 <- fS1(data, new.simi, A, N, d);   # gradient of similarity constraint function
171 | 							grad2 <- fD1(data, new.dism, A, N, d);   # gradient of dissimilarity constraint func.
172 | 							M <- GradProjection(grad1, grad2, d); # gradient of fD1 orthognal to fS1
173 | 
174 | 
175 | 							A.last <- A        # initial A
176 | 							done <- 0
177 | 							delta <- 0
178 | 							converged <- 0
179 | 							while (done == 0) {
180 | 									projection.iters <- 0
181 | 									satisfy <- 0
182 | 
183 | 									while (projection.iters < maxiter & satisfy == 0) {
184 | 										A0 <- A
185 | 										x0 <- matrix(A0, ncol = 1)
186 | 										if(crossprod(w, x0) <= t0)
187 | 											A <- A0
188 | 										else {
189 | 											x <- x0 + as.numeric(t1 - crossprod(w1, x0)) * w1
190 | 											A <- matrix(x, 3, 3)
191 | 										}
192 | 
193 | 										A <- (A + t(A))/2
194 | 										vl <- eigen(A)
195 | 										vl[[1]][vl[[1]] < 0] = 0
196 | 										A <- vl[[2]] %*% diag(vl[[1]], d) %*% t(vl[[2]])
197 | 
198 | 										fDC2 <- crossprod(w, matrix(A, ncol = 1))
199 | 										error1 <- as.numeric((fDC2 - t0)/t0)
200 | 										projection.iters <- projection.iters + 1
201 | 										satisfy <- as.numeric(ifelse(error1 > epsilon, 0, 1))
202 | 									}
203 | 
204 | 									obj.previous <- fD(data, new.dism, A.last, N, d)
205 | 									obj <- fD(data, new.dism, A, N, d)
206 | 
207 | 									if (obj > obj.previous & satisfy == 1) {
208 | 										alpha <-  alpha * 1.05
209 | 										A.last <- A
210 | 										grad2 <- fS1(data, new.simi, A, N, d)
211 | 										grad1 <- fD1(data, new.dism, A, N, d)
212 | 										M <- GradProjection(grad1, grad2, d)
213 | 										A <- A + alpha * M
214 | 									}
215 | 									else{
216 | 										alpha <- alpha/2
217 | 										A <- A.last + alpha * M
218 | 									}
219 | 									delta <- norm(alpha * M, "F")/norm(A.last, "F")
220 | 									count <- count + 1
221 | 									done <- ifelse(delta < threshold2 | count == maxcount, 1, 0)
222 | 							}
223 | 							converged <- ifelse(delta > threshold2, 0, 1)
224 | 							return(list("converged" = ifelse(converged == 1, "Yes", "No"), "fullA" = A))
225 | 		}
226 | 
227 | 		iterproj <- IterProjection(data, simi, dism, A, w, t0)
228 | 		eigenvalue <- eigen(iterproj$fullA)
229 | 		dml <- eigenvalue[[2]] %*% sqrt(diag(eigenvalue[[1]], d))
230 | 		newData <- data %*% dml
231 | 		return(list("newData" = newData, "fullA" = iterproj[[2]], "dmlA" = dml, "converged" = iterproj[[1]]))
232 | }
233 | 


--------------------------------------------------------------------------------
/R/itml.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nanxstats/sdml/5497f697a882136c83d397a476419fd4503a80bf/R/itml.R


--------------------------------------------------------------------------------
/R/kdca.R:
--------------------------------------------------------------------------------
 1 | #' Kernel Discriminative Component Analysis
 2 | #'
 3 | #' Performs kernel discriminative component analysis on the given data.
 4 | #'
 5 | #' Put KDCA function details here.
 6 | #'
 7 | #' @param  k n x n kernel matrix. Result of the \code{\link{kmatrixGauss}} function.
 8 | #'         n is the number of samples.
 9 | #' @param chunks \code{n * 1} vector describing the chunklets:
10 | #'               \code{-1} in the \code{i} th place means that point \code{i}
11 | #'               doesn\'t belong to any chunklet;
12 | #'               integer \code{j} in place \code{i} means that point \code{i}
13 | #'               belongs to chunklet j.
14 | #'               The chunklets indexes should be 1:(number of chunklets).
15 | #' @param neglinks \code{s * s} matrix describing the negative relationship
16 | #'                 between all the \code{s} chunklets.
17 | #'                 For the element \eqn{neglinks_{ij}}:
18 | #'                 \eqn{neglinks_{ij} = 1} means chunklet \code{i} and chunklet {j}
19 | #'                 have negative constraint(s);
20 | #'                 \eqn{neglinks_{ij} = -1} means chunklet \code{i} and chunklet {j}
21 | #'                 don\'t have negative constraints
22 | #'                 or we don\'t have information about that.
23 | #' @param useD optional. When not given, DCA is done in the original dimension
24 | #'             and B is full rank. When useD is given, DCA is preceded by
25 | #'             constraints based LDA which reduces the dimension to useD.
26 | #'             B in this case is of rank useD.
27 | #'
28 | #' @return list of the KDCA results:
29 | #' \item{B}{KDCA suggested Mahalanobis matrix}
30 | #' \item{DCA}{KDCA suggested transformation of the data.
31 | #'            The dimension is (original data dimension) * (useD)}
32 | #' \item{newData}{KDCA transformed data}
33 | #'
34 | #' @keywords dca kdca discriminant component transformation mahalanobis metric
35 | #'
36 | #' @aliases kdca
37 | #'
38 | #' @note Put some note here.
39 | #'
40 | #' @author Nan Xiao <\url{https://nanx.me}>
41 | #'
42 | #' @seealso See \code{\link{kmatrixGauss}} for the Gaussian kernel computation,
43 | #'          and \code{\link{dca}} for the linear version of DCA.
44 | #'
45 | #' @export kdca
46 | #'
47 | #' @references
48 | #' Steven C.H. Hoi, W. Liu, M.R. Lyu and W.Y. Ma (2006).
49 | #' Learning Distance Metrics with Contextual Constraints for Image Retrieval.
50 | #' \emph{Proceedings IEEE Conference on Computer Vision and Pattern Recognition
51 | #' (CVPR2006)}.
52 | #'
53 | #' @examples
54 | #' kdca(NULL)
55 | 
56 | kdca <- function(k, chunks, neglinks, useD) {
57 | 
58 | NULL
59 | 
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/R/kmatrixGauss.R:
--------------------------------------------------------------------------------
 1 | #' Gaussian Kernel Computation for
 2 | #' Kernel Local Fisher Discriminant Analysis
 3 | #'
 4 | #' Gaussian kernel computation for klfda.
 5 | #' 
 6 | #' Put kmatrixGauss function details here.
 7 | #' 
 8 | #' @param x n x d matrix of original samples.
 9 | #'          n is the number of samples.
10 | #' @param sigma dimensionality of reduced space. (default: 0.001)
11 | #' 
12 | #' @return K n x n kernel matrix.
13 | #'           n is the number of samples.
14 | #' 
15 | #' @keywords klfda kernel local fisher discriminant
16 | #'           transformation mahalanobis metric
17 | #'
18 | #' @aliases kmatrixGauss
19 | #' 
20 | #' @note Put some note here.
21 | #' 
22 | #' @author Nan Xiao <\url{https://nanx.me}>
23 | #' 
24 | #' @seealso See \code{klfda} for the computation of
25 | #'          kernel local fisher discriminant analysis
26 | #' 
27 | #' @export kmatrixGauss
28 | #' 
29 | #' @references
30 | #' Sugiyama, M (2007).
31 | #' Dimensionality reduction of multimodal labeled data by
32 | #' local Fisher discriminant analysis.
33 | #' \emph{Journal of Machine Learning Research}, vol.\bold{8}, 1027--1061.
34 | #' 
35 | #' Sugiyama, M (2006).
36 | #' Local Fisher discriminant analysis for supervised dimensionality reduction.
37 | #' In W. W. Cohen and A. Moore (Eds.), \emph{Proceedings of 23rd International
38 | #' Conference on Machine Learning (ICML2006)}, 905--912.
39 | #' 
40 | #' @examples
41 | #' NULL
42 | 
43 | kmatrixGauss <- function(x, sigma = 1) {
44 | 	x = t(as.matrix(x))
45 | 	d = nrow(x)
46 | 	n = ncol(x)
47 | 	X2 = t(as.matrix(colSums(x^2)))
48 | 	distance2 = repmat(X2, n, 1) + repmat(t(X2), 1, n) - 2 * t(x) %*% x
49 | 	K = exp(-distance2/(2 * sigma^2)) # To be tested
50 | 	return(K)
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/R/lmnn.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nanxstats/sdml/5497f697a882136c83d397a476419fd4503a80bf/R/lmnn.R


--------------------------------------------------------------------------------
/R/misc.R:
--------------------------------------------------------------------------------
 1 | # repeat a matrix like the MATLAB grammar
 2 | repmat <- function(A, N, M) {
 3 | 	kronecker(matrix(1, N, M), A)
 4 | }
 5 | 
 6 | # negative one half matrix power operator
 7 | "%^%" <- function(x, n) {
 8 | 		with(eigen(as.matrix(x)), vectors %*% (values^n * t(vectors)))
 9 | 	}
10 | 
11 | 


--------------------------------------------------------------------------------
/R/rca.R:
--------------------------------------------------------------------------------
  1 | #' Relevant Component Analysis
  2 | #'
  3 | #' Performs relevant component analysis on the given data.
  4 | #' 
  5 | #' The RCA function takes a data set and a set of positive constraints
  6 | #' as arguments and returns a linear transformation of the data space
  7 | #' into better representation, alternatively, a Mahalanobis metric
  8 | #' over the data space.
  9 | #' 
 10 | #' Relevant component analysis consists of three steps: 
 11 | #' \enumerate{\item locate the test point
 12 | #' \item compute the distances between the test points
 13 | #' \item find \eqn{k} shortest distances and the bla}
 14 | #' The new representation is known to be optimal in an information
 15 | #' theoretic sense under a constraint of keeping equivalent data
 16 | #' points close to each other.
 17 | #' 
 18 | #' @param x matrix or data frame of original data.
 19 | #'          Each row is a feature vector of a data instance.
 20 | #' @param chunks list of \code{k} numerical vectors.
 21 | #'               Each vector represents a chunklet, the elements
 22 | #'               in the vectors indicate where the samples locate
 23 | #'               in \code{x}. See examples for more information.
 24 | #' 
 25 | #' @return list of the RCA results:
 26 | #' \item{B}{The RCA suggested Mahalanobis matrix. 
 27 | #'          Distances between data points x1, x2 should be 
 28 | #'          computed by (x2 - x1)' * B * (x2 - x1)}
 29 | #' \item{A}{The RCA suggested transformation of the data.
 30 | #'          The data should be transformed by A * data}
 31 | #' \item{newX}{The data after the RCA transformation (A).
 32 | #'             newData = A * data}
 33 | #' 
 34 | #' The three returned argument are just different forms of the same output.
 35 | #' If one is interested in a Mahalanobis metric over the original data space, 
 36 | #' the first argument is all she/he needs. If a transformation into another
 37 | #' space (where one can use the Euclidean metric) is preferred, the second
 38 | #' returned argument is sufficient. Using A and B is equivalent in the 
 39 | #' following sense:
 40 | #' 
 41 | #' if y1 = A * x1, y2 = A * y2  then
 42 | #' (x2 - x1)' * B * (x2 - x1) = (y2 - y1)' * (y2 - y1)
 43 | #' 
 44 | #' @keywords rca transformation mahalanobis metric
 45 | #'
 46 | #' @aliases rca
 47 | #' 
 48 | #' @note Note that any different sets of instances (chunklets),
 49 | #'       e.g. {1, 3, 7} and {4, 6}, might belong to the 
 50 | #'       same class and might belong to different classes.
 51 | #' 
 52 | #' @author Nan Xiao <\url{https://nanx.me}>
 53 | #' 
 54 | #' @seealso See \code{\link{dca}} for exploiting negative constrains.
 55 | #' 
 56 | #' @export rca
 57 | #' 
 58 | #' @references
 59 | #' Aharon Bar-Hillel, Tomer Hertz, Noam Shental, and Daphna Weinshall (2003).
 60 | #' Learning Distance Functions using Equivalence Relations.
 61 | #' \emph{Proceedings of 20th International Conference on
 62 | #' Machine Learning (ICML2003)}.
 63 | #' 
 64 | #' @examples
 65 | #' set.seed(1234)
 66 | #' require(MASS)  # generate synthetic Gaussian data
 67 | #' k = 100        # sample size of each class
 68 | #' n = 3          # specify how many class
 69 | #' N = k * n      # total sample number
 70 | #' x1 = mvrnorm(k, mu = c(-10, 6), matrix(c(10, 4, 4, 10), ncol = 2))
 71 | #' x2 = mvrnorm(k, mu = c(0, 0), matrix(c(10, 4, 4, 10), ncol = 2))
 72 | #' x3 = mvrnorm(k, mu = c(10, -6), matrix(c(10, 4, 4, 10), ncol = 2))
 73 | #' x = as.data.frame(rbind(x1, x2, x3))
 74 | #' x$V3 = gl(n, k)
 75 | #' 
 76 | #' # The fully labeled data set with 3 classes
 77 | #' plot(x$V1, x$V2, bg = c("#E41A1C", "#377EB8", "#4DAF4A")[x$V3], 
 78 | #'      pch = c(rep(22, k), rep(21, k), rep(25, k)))
 79 | #' Sys.sleep(3)
 80 | #' 
 81 | #' # Same data unlabeled; clearly the classes' structure is less evident
 82 | #' plot(x$V1, x$V2)
 83 | #' Sys.sleep(3)
 84 | #' 
 85 | #' chunk1 = sample(1:100, 5)
 86 | #' chunk2 = sample(setdiff(1:100, chunk1), 5)
 87 | #' chunk3 = sample(101:200, 5)
 88 | #' chunk4 = sample(setdiff(101:200, chunk3), 5)
 89 | #' chunk5 = sample(201:300, 5)
 90 | #' chks = x[c(chunk1, chunk2, chunk3, chunk4, chunk5), ]
 91 | #' chunks = list(chunk1, chunk2, chunk3, chunk4, chunk5)
 92 | #' 
 93 | #' # The chunklets provided to the RCA algorithm
 94 | #' plot(chks$V1, chks$V2, col = rep(c("#E41A1C", "#377EB8",
 95 | #'      "#4DAF4A", "#984EA3", "#FF7F00"), each = 5), 
 96 | #'      pch = rep(0:4, each = 5), ylim = c(-15, 15))
 97 | #' Sys.sleep(3)
 98 | #' 
 99 | #' # Whitening transformation applied to the  chunklets
100 | #' chkTransformed = as.matrix(chks[ , 1:2]) %*% rca(x[ , 1:2], chunks)$A
101 | #' 
102 | #' plot(chkTransformed[ , 1], chkTransformed[ , 2], col = rep(c(
103 | #'      "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00"), each = 5),
104 | #'      pch = rep(0:4, each = 5), ylim = c(-15, 15))
105 | #' Sys.sleep(3)
106 | #' 
107 | #' # The origin data after applying the RCA transformation
108 | #' plot(rca(x[ , 1:2], chunks)$newX[, 1], rca(x[ , 1:2], chunks)$newX[, 2], 
109 | #'          bg = c("#E41A1C", "#377EB8", "#4DAF4A")[gl(n, k)],
110 | #'          pch = c(rep(22, k), rep(21, k), rep(25, k)))
111 | #' 
112 | #' # The RCA suggested transformation of the data, dimensionality reduced
113 | #' rca(x[ , 1:2], chunks)$A
114 | #' 
115 | #' # The RCA suggested Mahalanobis matrix
116 | #' rca(x[ , 1:2], chunks)$B
117 | 
118 | rca <- function(x, chunks) {
119 | 
120 | 	chunkNum = length(chunks)
121 | 	chunkDf = vector("list", chunkNum)
122 | 	p = length(unlist(chunks))
123 | 
124 | 	for (i in 1:chunkNum) {
125 | 		chunkDf[[i]] = as.matrix(x[chunks[[i]], ])
126 | 	}
127 | 
128 | 	chunkMean = lapply(chunkDf, colMeans)
129 | 
130 | 	for (i in 1:chunkNum) {
131 | 		chunkDf[[i]] = chunkDf[[i]] - chunkMean[[i]]
132 | 	}
133 | 
134 | 	cData = do.call(rbind, chunkDf)  # calc inner covariance matrix and normalize
135 | 	innerCov = cov(cData) * ((nrow(cData) - 1) / nrow(cData))
136 | 
137 | 	for (i in 1:chunkNum) {
138 | 		chunkDf[[i]] = t(chunkDf[[i]]) %*% chunkDf[[i]]
139 | 	}
140 | 
141 | 	hatC = Reduce("+", chunkDf)/p    # Reduce() do the sum of matrices in a list
142 | 
143 | 	B = solve(hatC)                  # raw mahalanobis metric
144 | 	
145 | 	A = diag(ncol(x))
146 | 	A = A %*% (innerCov %^% (-0.5))  # whitening transformation matrix
147 | 
148 | 	newX = as.matrix(x) %*% A        # original data transformed
149 | 
150 | 	return(list("B" = B, "A" = A, "newX" = newX))
151 | }
152 | 
153 | 


--------------------------------------------------------------------------------
/R/sdml-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![License](http://img.shields.io/:license-mit-blue.svg?style=flat)](http://badges.mit-license.org)
 2 | 
 3 | # sdml
 4 | 
 5 | ## Note: This package has been moved to a [new package](https://github.com/terrytangyuan/dml), which will be maintained actively.
 6 | 
 7 | ## Introduction
 8 | 
 9 | Distance metric is widely used in the machine learning literature.
10 | We used to choose a distance metric according to a priori (Euclidean Distance,
11 | L1 Distance, etc.) or according to the result of cross validation within small
12 | class of functions (e.g. choosing order of polynomial for a kernel).
13 | Actually, with priori knowledge of the data, we could learn a more suitable
14 | distance metric with (semi-)supervised distance metric learning techniques.
15 | `sdml` is an R package aiming to implement the state-of-the-art algorithms for
16 | supervised distance metric learning. These distance metric learning methods
17 | are widely applied in feature extraction, dimensionality reduction, clustering,
18 | classification, information retrieval, and computer vision problems.
19 | 
20 | ## Algorithms
21 | 
22 | Algorithms planned in the first development stage:
23 | 
24 |   * Supervised Global Distance Metric Learning:
25 |   
26 |     * Relevant Component Analysis (RCA)
27 |     * Kernel Relevant Component Analysis (KRCA)
28 |     * Discriminative Component Analysis (DCA)
29 |     * Kernel Discriminative Component Analysis (KDCA)
30 |     * Global Distance Metric Learning by Convex Programming (GDMLCP)
31 | 
32 |   * Supervised Local Distance Metric Learning:
33 | 
34 |     * Local Fisher Discriminant Analysis (LFDA)
35 |     * Kernel Local Fisher Discriminant Analysis (KLFDA)
36 |     * Information-Theoretic Metric Learning (ITML)
37 |     * Large Margin Nearest Neighbor Classifier (LMNN)
38 |     * Neighbourhood Components Analysis (NCA)
39 |     * Localized Distance Metric Learning (LDM)
40 | 
41 | The algorithms and routines might be adjusted during developing.
42 | 
43 | ## Contact
44 | 
45 | Contact authors of this package:
46 | 
47 | - Tao Gao <joegaotao@gmail.com>
48 | - Nan Xiao <me@nanx.me>
49 | - Yuan Tang <terrytangyuan@gmail.com>
50 | 


--------------------------------------------------------------------------------
/man/GdmDiag.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/gdmd.r
  3 | \name{GdmDiag}
  4 | \alias{GdmDiag}
  5 | \title{Global Distance Metric Learning}
  6 | \usage{
  7 | GdmDiag(data, simi, dism, C0 = 1, S1 = NULL, D1 = NULL,
  8 |   threshold = 0.001)
  9 | }
 10 | \arguments{
 11 | \item{data}{\code{n * d} data matrix. \code{n} is the number of data points,
 12 | \code{d} is the dimension of the data.
 13 | Each data point is a row in the matrix.}
 14 | 
 15 | \item{simi}{\code{n * 2} matrix describing the similar constrains.
 16 |  Each row of matrix is serial number of a similar pair in the original data.
 17 | For example, pair(1, 3) represents the first observation is similar the 3th observation in the original data.}
 18 | 
 19 | \item{dism}{\code{n * 2} matrix describing the dissimilar constrains as \code{simi}.
 20 | Each row of matrix is serial number of a dissimilar pair in the original data.}
 21 | 
 22 | \item{C0}{numeric, the bound of similar constrains.}
 23 | 
 24 | \item{threshold}{numeric, the threshold of stoping the learning iteration.}
 25 | }
 26 | \value{
 27 | list of the GdmDiag results:
 28 | \item{newData}{GdmDiag transformed data}
 29 | \item{diagonalA}{suggested Mahalanobis matrix}
 30 | \item{dmlA}{matrix to transform data, square root of diagonalA }
 31 | \item{error}{the precision of obtained distance metric by Newton-Raphson optimization }
 32 | 
 33 | For every two original data points (x1, x2) in newData (y1, y2):
 34 | 
 35 | \eqn{(x2 - x1)' * A * (x2 - x1) = || (x2 - x1) * B ||^2 = || y2 - y1 ||^2}
 36 | }
 37 | \description{
 38 | Performs Global Distance Metric Learning (GDM) on the given data, learning a diagonal matrix.
 39 | }
 40 | \details{
 41 | Put GdmDiag function details here.
 42 | }
 43 | \note{
 44 | Be sure to check whether the dimension of original data and constrains' format are valid for the function.
 45 | }
 46 | \examples{
 47 | set.seed(602)
 48 | library(MASS)
 49 | library(scatterplot3d)
 50 | 
 51 | # generate simulated Gaussian data
 52 | k = 100
 53 | m <- matrix(c(1, 0.5, 1, 0.5, 2, -1, 1, -1, 3), nrow =3, byrow = T)
 54 | x1 <- mvrnorm(k, mu = c(1, 1, 1), Sigma = m)
 55 | x2 <- mvrnorm(k, mu = c(-1, 0, 0), Sigma = m)
 56 | data <- rbind(x1, x2)
 57 | 
 58 | # define similar constrains
 59 | simi <- rbind(t(combn(1:k, 2)), t(combn((k+1):(2*k), 2)))
 60 | 
 61 | temp <-  as.data.frame(t(simi))
 62 | tol <- as.data.frame(combn(1:(2*k), 2))
 63 | 
 64 | # define disimilar constrains
 65 | dism <- t(as.matrix(tol[!tol \%in\% simi]))
 66 | 
 67 | # transform data using GdmDiag
 68 | result <- GdmDiag(data, simi, dism)
 69 | newData <- result$newData
 70 | # plot original data
 71 | color <- gl(2, k, labels = c("red", "blue"))
 72 | par(mfrow = c(2, 1), mar = rep(0, 4) + 0.1)
 73 | scatterplot3d(data, color = color, cex.symbols = 0.6,
 74 | 		  xlim = range(data[, 1], newData[, 1]),
 75 | 		  ylim = range(data[, 2], newData[, 2]),
 76 | 		  zlim = range(data[, 3], newData[, 3]),
 77 | 		  main = "Original Data")
 78 | # plot GdmDiag transformed data
 79 | scatterplot3d(newData, color = color, cex.symbols = 0.6,
 80 | 		  xlim = range(data[, 1], newData[, 1]),
 81 | 		  ylim = range(data[, 2], newData[, 2]),
 82 | 		  zlim = range(data[, 3], newData[, 3]),
 83 | 		  main = "Transformed Data")
 84 | }
 85 | \references{
 86 | Steven C.H. Hoi, W. Liu, M.R. Lyu and W.Y. Ma (2003).
 87 | Distance metric learning, with application to clustering with side-information.
 88 | }
 89 | \author{
 90 | Tao Gao <\url{http://www.gaotao.name}>
 91 | }
 92 | \keyword{GDM}
 93 | \keyword{distance}
 94 | \keyword{global}
 95 | \keyword{learning}
 96 | \keyword{mahalanobis}
 97 | \keyword{metirc}
 98 | \keyword{metric}
 99 | \keyword{transformation}
100 | 


--------------------------------------------------------------------------------
/man/GdmFull.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gdmf.r
 3 | \name{GdmFull}
 4 | \alias{GdmFull}
 5 | \title{Global Distance Metric Learning}
 6 | \usage{
 7 | GdmFull(data, simi, dism, maxiter = 100)
 8 | }
 9 | \arguments{
10 | \item{data}{\code{n * d} data matrix. \code{n} is the number of data points,
11 | \code{d} is the dimension of the data.
12 | Each data point is a row in the matrix.}
13 | 
14 | \item{simi}{\code{n * 2} matrix describing the similar constrains.
15 |  Each row of matrix is serial number of a similar pair in the original data.
16 | For example, pair(1, 3) represents the first observation is similar the 3th observation in the original data.}
17 | 
18 | \item{dism}{\code{n * 2} matrix describing the dissimilar constrains as \code{simi}.
19 | Each row of matrix is serial number of a dissimilar pair in the original data.}
20 | 
21 | \item{maxiter}{numeric, the number of iteration.}
22 | }
23 | \value{
24 | list of the GdmDiag results:
25 | \item{newData}{GdmDiag transformed data}
26 | \item{fullA}{suggested Mahalanobis matrix}
27 | \item{dmlA}{matrix to transform data, square root of diagonalA }
28 | \item{converged}{whether the iteration-projection optimization is converged or not}
29 | 
30 | For every two original data points (x1, x2) in newData (y1, y2):
31 | 
32 | \eqn{(x2 - x1)' * A * (x2 - x1) = || (x2 - x1) * B ||^2 = || y2 - y1 ||^2}
33 | }
34 | \description{
35 | Performs Global Distance Metric Learning (GDM) on the given data, learning a full matrix.
36 | }
37 | \details{
38 | Put GdmFull function details here.
39 | }
40 | \note{
41 | Be sure to check whether the dimension of original data and constrains' format are valid for the function.
42 | }
43 | \examples{
44 | set.seed(123)
45 | library(MASS)
46 | library(scatterplot3d)
47 | 
48 | # generate simulated Gaussian data
49 | k = 100
50 | m <- matrix(c(1, 0.5, 1, 0.5, 2, -1, 1, -1, 3), nrow =3, byrow = T)
51 | x1 <- mvrnorm(k, mu = c(1, 1, 1), Sigma = m)
52 | x2 <- mvrnorm(k, mu = c(-1, 0, 0), Sigma = m)
53 | data <- rbind(x1, x2)
54 | 
55 | # define similar constrains
56 | simi <- rbind(t(combn(1:k, 2)), t(combn((k+1):(2*k), 2)))
57 | 
58 | temp <-  as.data.frame(t(simi))
59 | tol <- as.data.frame(combn(1:(2*k), 2))
60 | 
61 | # define disimilar constrains
62 | dism <- t(as.matrix(tol[!tol \%in\% simi]))
63 | 
64 | # transform data using GdmFull
65 | result <- GdmFull(data, simi, dism)
66 | newData <- result$newData
67 | # plot original data
68 | color <- gl(2, k, labels = c("red", "blue"))
69 | par(mfrow = c(2, 1), mar = rep(0, 4) + 0.1)
70 | scatterplot3d(data, color = color, cex.symbols = 0.6,
71 | 		  xlim = range(data[, 1], newData[, 1]),
72 | 		  ylim = range(data[, 2], newData[, 2]),
73 | 		  zlim = range(data[, 3], newData[, 3]),
74 | 		  main = "Original Data")
75 | # plot GdmFull transformed data
76 | scatterplot3d(newData, color = color, cex.symbols = 0.6,
77 | 		  xlim = range(data[, 1], newData[, 1]),
78 | 		  ylim = range(data[, 2], newData[, 2]),
79 | 		  zlim = range(data[, 3], newData[, 3]),
80 | 		  main = "Transformed Data")
81 | }
82 | \references{
83 | Steven C.H. Hoi, W. Liu, M.R. Lyu and W.Y. Ma (2003).
84 | Distance metric learning, with application to clustering with side-information.
85 | }
86 | \author{
87 | Tao Gao <\url{http://www.gaotao.name}>
88 | }
89 | \keyword{GDM}
90 | \keyword{distance}
91 | \keyword{global}
92 | \keyword{learning}
93 | \keyword{mahalanobis}
94 | \keyword{metirc}
95 | \keyword{metric}
96 | \keyword{transformation}
97 | 


--------------------------------------------------------------------------------
/man/dca.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dca.R
  3 | \name{dca}
  4 | \alias{dca}
  5 | \title{Discriminative Component Analysis}
  6 | \usage{
  7 | dca(data, chunks, neglinks, useD = NULL)
  8 | }
  9 | \arguments{
 10 | \item{data}{\code{n * d} data matrix. \code{n} is the number of data points,
 11 | \code{d} is the dimension of the data.
 12 | Each data point is a row in the matrix.}
 13 | 
 14 | \item{chunks}{length \code{n} vector describing the chunklets:
 15 | \code{-1} in the \code{i} th place means point \code{i}
 16 | doesn't belong to any chunklet;
 17 | integer \code{j} in place \code{i} means point \code{i}
 18 | belongs to chunklet j.
 19 | The chunklets indexes should be 1:(number of chunklets).}
 20 | 
 21 | \item{neglinks}{\code{s * s} symmetric matrix describing the negative relationship
 22 | between all the \code{s} chunklets.
 23 | For the element \eqn{neglinks_{ij}}:
 24 | \eqn{neglinks_{ij} = 1} means chunklet \code{i} and chunklet {j}
 25 | have negative constraint(s);
 26 | \eqn{neglinks_{ij} = 0} means chunklet \code{i} and chunklet {j}
 27 | don't have negative constraints
 28 | or we don't have information about that.}
 29 | 
 30 | \item{useD}{Integer. Optional. When not given, DCA is done in the
 31 | original dimension and B is full rank. When useD is given,
 32 | DCA is preceded by constraints based LDA which reduces the
 33 | dimension to useD. B in this case is of rank useD.}
 34 | }
 35 | \value{
 36 | list of the DCA results:
 37 | \item{B}{DCA suggested Mahalanobis matrix}
 38 | \item{DCA}{DCA suggested transformation of the data.
 39 |            The dimension is (original data dimension) * (useD)}
 40 | \item{newData}{DCA transformed data}
 41 | 
 42 | For every two original data points (x1, x2) in newData (y1, y2):
 43 | 
 44 | \eqn{(x2 - x1)' * B * (x2 - x1) = || (x2 - x1) * A ||^2 = || y2 - y1 ||^2}
 45 | }
 46 | \description{
 47 | Performs discriminative component analysis on the given data.
 48 | }
 49 | \details{
 50 | Put DCA function details here.
 51 | }
 52 | \note{
 53 | Put some note here.
 54 | }
 55 | \examples{
 56 | set.seed(123)
 57 | require(MASS)  # generate synthetic Gaussian data
 58 | k = 100        # sample size of each class
 59 | n = 3          # specify how many class
 60 | N = k * n      # total sample number
 61 | x1 = mvrnorm(k, mu = c(-10, 6), matrix(c(10, 4, 4, 10), ncol = 2))
 62 | x2 = mvrnorm(k, mu = c(0, 0), matrix(c(10, 4, 4, 10), ncol = 2))
 63 | x3 = mvrnorm(k, mu = c(10, -6), matrix(c(10, 4, 4, 10), ncol = 2))
 64 | data = as.data.frame(rbind(x1, x2, x3))
 65 | # The fully labeled data set with 3 classes
 66 | plot(data$V1, data$V2, bg = c("#E41A1C", "#377EB8", "#4DAF4A")[gl(n, k)],
 67 |      pch = c(rep(22, k), rep(21, k), rep(25, k)))
 68 | Sys.sleep(3)
 69 | # Same data unlabeled; clearly the classes' structure is less evident
 70 | plot(x$V1, x$V2)
 71 | Sys.sleep(3)
 72 | 
 73 | chunk1 = sample(1:100, 5)
 74 | chunk2 = sample(setdiff(1:100, chunk1), 5)
 75 | chunk3 = sample(101:200, 5)
 76 | chunk4 = sample(setdiff(101:200, chunk3), 5)
 77 | chunk5 = sample(201:300, 5)
 78 | chks = list(chunk1, chunk2, chunk3, chunk4, chunk5)
 79 | chunks = rep(-1, 300)
 80 | # positive samples in the chunks
 81 | for (i in 1:5) {
 82 |   for (j in chks[[i]]) {
 83 |     chunks[j] = i
 84 |   }
 85 | }
 86 | 
 87 | # define the negative constrains between chunks
 88 | neglinks = matrix(c(
 89 | 		0, 0, 1, 1, 1,
 90 | 		0, 0, 1, 1, 1,
 91 | 		1, 1, 0, 0, 0,
 92 | 		1, 1, 0, 0, 1,
 93 | 		1, 1, 1, 1, 0),
 94 | 		ncol = 5, byrow = TRUE)
 95 | 
 96 | dcaData = dca(data = data, chunks = chunks, neglinks = neglinks)$newData
 97 | # plot DCA transformed data
 98 | plot(dcaData[, 1], dcaData[, 2], bg = c("#E41A1C", "#377EB8", "#4DAF4A")[gl(n, k)],
 99 |      pch = c(rep(22, k), rep(21, k), rep(25, k)),
100 |      xlim = c(-15, 15), ylim = c(-15, 15))
101 | }
102 | \references{
103 | Steven C.H. Hoi, W. Liu, M.R. Lyu and W.Y. Ma (2006).
104 | Learning Distance Metrics with Contextual Constraints for Image Retrieval.
105 | \emph{Proceedings IEEE Conference on Computer Vision and Pattern Recognition
106 | (CVPR2006)}.
107 | }
108 | \seealso{
109 | See \code{\link{kdca}} for the kernelized version of DCA.
110 | }
111 | \author{
112 | Nan Xiao <\url{https://nanx.me}>
113 | }
114 | \keyword{component}
115 | \keyword{dca}
116 | \keyword{discriminant}
117 | \keyword{mahalanobis}
118 | \keyword{metric}
119 | \keyword{transformation}
120 | 


--------------------------------------------------------------------------------
/man/kdca.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/kdca.R
 3 | \name{kdca}
 4 | \alias{kdca}
 5 | \title{Kernel Discriminative Component Analysis}
 6 | \usage{
 7 | kdca(k, chunks, neglinks, useD)
 8 | }
 9 | \arguments{
10 | \item{k}{n x n kernel matrix. Result of the \code{\link{kmatrixGauss}} function.
11 | n is the number of samples.}
12 | 
13 | \item{chunks}{\code{n * 1} vector describing the chunklets:
14 | \code{-1} in the \code{i} th place means that point \code{i}
15 | doesn\'t belong to any chunklet;
16 | integer \code{j} in place \code{i} means that point \code{i}
17 | belongs to chunklet j.
18 | The chunklets indexes should be 1:(number of chunklets).}
19 | 
20 | \item{neglinks}{\code{s * s} matrix describing the negative relationship
21 | between all the \code{s} chunklets.
22 | For the element \eqn{neglinks_{ij}}:
23 | \eqn{neglinks_{ij} = 1} means chunklet \code{i} and chunklet {j}
24 | have negative constraint(s);
25 | \eqn{neglinks_{ij} = -1} means chunklet \code{i} and chunklet {j}
26 | don\'t have negative constraints
27 | or we don\'t have information about that.}
28 | 
29 | \item{useD}{optional. When not given, DCA is done in the original dimension
30 | and B is full rank. When useD is given, DCA is preceded by
31 | constraints based LDA which reduces the dimension to useD.
32 | B in this case is of rank useD.}
33 | }
34 | \value{
35 | list of the KDCA results:
36 | \item{B}{KDCA suggested Mahalanobis matrix}
37 | \item{DCA}{KDCA suggested transformation of the data.
38 |            The dimension is (original data dimension) * (useD)}
39 | \item{newData}{KDCA transformed data}
40 | }
41 | \description{
42 | Performs kernel discriminative component analysis on the given data.
43 | }
44 | \details{
45 | Put KDCA function details here.
46 | }
47 | \note{
48 | Put some note here.
49 | }
50 | \examples{
51 | kdca(NULL)
52 | }
53 | \references{
54 | Steven C.H. Hoi, W. Liu, M.R. Lyu and W.Y. Ma (2006).
55 | Learning Distance Metrics with Contextual Constraints for Image Retrieval.
56 | \emph{Proceedings IEEE Conference on Computer Vision and Pattern Recognition
57 | (CVPR2006)}.
58 | }
59 | \seealso{
60 | See \code{\link{kmatrixGauss}} for the Gaussian kernel computation,
61 |          and \code{\link{dca}} for the linear version of DCA.
62 | }
63 | \author{
64 | Nan Xiao <\url{https://nanx.me}>
65 | }
66 | \keyword{component}
67 | \keyword{dca}
68 | \keyword{discriminant}
69 | \keyword{kdca}
70 | \keyword{mahalanobis}
71 | \keyword{metric}
72 | \keyword{transformation}
73 | 


--------------------------------------------------------------------------------
/man/kmatrixGauss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/kmatrixGauss.R
 3 | \name{kmatrixGauss}
 4 | \alias{kmatrixGauss}
 5 | \title{Gaussian Kernel Computation for
 6 | Kernel Local Fisher Discriminant Analysis}
 7 | \usage{
 8 | kmatrixGauss(x, sigma = 1)
 9 | }
10 | \arguments{
11 | \item{x}{n x d matrix of original samples.
12 | n is the number of samples.}
13 | 
14 | \item{sigma}{dimensionality of reduced space. (default: 0.001)}
15 | }
16 | \value{
17 | K n x n kernel matrix.
18 |           n is the number of samples.
19 | }
20 | \description{
21 | Gaussian kernel computation for klfda.
22 | }
23 | \details{
24 | Put kmatrixGauss function details here.
25 | }
26 | \note{
27 | Put some note here.
28 | }
29 | \examples{
30 | NULL
31 | }
32 | \references{
33 | Sugiyama, M (2007).
34 | Dimensionality reduction of multimodal labeled data by
35 | local Fisher discriminant analysis.
36 | \emph{Journal of Machine Learning Research}, vol.\bold{8}, 1027--1061.
37 | 
38 | Sugiyama, M (2006).
39 | Local Fisher discriminant analysis for supervised dimensionality reduction.
40 | In W. W. Cohen and A. Moore (Eds.), \emph{Proceedings of 23rd International
41 | Conference on Machine Learning (ICML2006)}, 905--912.
42 | }
43 | \seealso{
44 | See \code{klfda} for the computation of
45 |          kernel local fisher discriminant analysis
46 | }
47 | \author{
48 | Nan Xiao <\url{https://nanx.me}>
49 | }
50 | \keyword{discriminant}
51 | \keyword{fisher}
52 | \keyword{kernel}
53 | \keyword{klfda}
54 | \keyword{local}
55 | \keyword{mahalanobis}
56 | \keyword{metric}
57 | \keyword{transformation}
58 | 


--------------------------------------------------------------------------------
/man/rca.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/rca.R
  3 | \name{rca}
  4 | \alias{rca}
  5 | \title{Relevant Component Analysis}
  6 | \usage{
  7 | rca(x, chunks)
  8 | }
  9 | \arguments{
 10 | \item{x}{matrix or data frame of original data.
 11 | Each row is a feature vector of a data instance.}
 12 | 
 13 | \item{chunks}{list of \code{k} numerical vectors.
 14 | Each vector represents a chunklet, the elements
 15 | in the vectors indicate where the samples locate
 16 | in \code{x}. See examples for more information.}
 17 | }
 18 | \value{
 19 | list of the RCA results:
 20 | \item{B}{The RCA suggested Mahalanobis matrix. 
 21 |          Distances between data points x1, x2 should be 
 22 |          computed by (x2 - x1)' * B * (x2 - x1)}
 23 | \item{A}{The RCA suggested transformation of the data.
 24 |          The data should be transformed by A * data}
 25 | \item{newX}{The data after the RCA transformation (A).
 26 |             newData = A * data}
 27 | 
 28 | The three returned argument are just different forms of the same output.
 29 | If one is interested in a Mahalanobis metric over the original data space, 
 30 | the first argument is all she/he needs. If a transformation into another
 31 | space (where one can use the Euclidean metric) is preferred, the second
 32 | returned argument is sufficient. Using A and B is equivalent in the 
 33 | following sense:
 34 | 
 35 | if y1 = A * x1, y2 = A * y2  then
 36 | (x2 - x1)' * B * (x2 - x1) = (y2 - y1)' * (y2 - y1)
 37 | }
 38 | \description{
 39 | Performs relevant component analysis on the given data.
 40 | }
 41 | \details{
 42 | The RCA function takes a data set and a set of positive constraints
 43 | as arguments and returns a linear transformation of the data space
 44 | into better representation, alternatively, a Mahalanobis metric
 45 | over the data space.
 46 | 
 47 | Relevant component analysis consists of three steps: 
 48 | \enumerate{\item locate the test point
 49 | \item compute the distances between the test points
 50 | \item find \eqn{k} shortest distances and the bla}
 51 | The new representation is known to be optimal in an information
 52 | theoretic sense under a constraint of keeping equivalent data
 53 | points close to each other.
 54 | }
 55 | \note{
 56 | Note that any different sets of instances (chunklets),
 57 |       e.g. {1, 3, 7} and {4, 6}, might belong to the 
 58 |       same class and might belong to different classes.
 59 | }
 60 | \examples{
 61 | set.seed(1234)
 62 | require(MASS)  # generate synthetic Gaussian data
 63 | k = 100        # sample size of each class
 64 | n = 3          # specify how many class
 65 | N = k * n      # total sample number
 66 | x1 = mvrnorm(k, mu = c(-10, 6), matrix(c(10, 4, 4, 10), ncol = 2))
 67 | x2 = mvrnorm(k, mu = c(0, 0), matrix(c(10, 4, 4, 10), ncol = 2))
 68 | x3 = mvrnorm(k, mu = c(10, -6), matrix(c(10, 4, 4, 10), ncol = 2))
 69 | x = as.data.frame(rbind(x1, x2, x3))
 70 | x$V3 = gl(n, k)
 71 | 
 72 | # The fully labeled data set with 3 classes
 73 | plot(x$V1, x$V2, bg = c("#E41A1C", "#377EB8", "#4DAF4A")[x$V3], 
 74 |      pch = c(rep(22, k), rep(21, k), rep(25, k)))
 75 | Sys.sleep(3)
 76 | 
 77 | # Same data unlabeled; clearly the classes' structure is less evident
 78 | plot(x$V1, x$V2)
 79 | Sys.sleep(3)
 80 | 
 81 | chunk1 = sample(1:100, 5)
 82 | chunk2 = sample(setdiff(1:100, chunk1), 5)
 83 | chunk3 = sample(101:200, 5)
 84 | chunk4 = sample(setdiff(101:200, chunk3), 5)
 85 | chunk5 = sample(201:300, 5)
 86 | chks = x[c(chunk1, chunk2, chunk3, chunk4, chunk5), ]
 87 | chunks = list(chunk1, chunk2, chunk3, chunk4, chunk5)
 88 | 
 89 | # The chunklets provided to the RCA algorithm
 90 | plot(chks$V1, chks$V2, col = rep(c("#E41A1C", "#377EB8",
 91 |      "#4DAF4A", "#984EA3", "#FF7F00"), each = 5), 
 92 |      pch = rep(0:4, each = 5), ylim = c(-15, 15))
 93 | Sys.sleep(3)
 94 | 
 95 | # Whitening transformation applied to the  chunklets
 96 | chkTransformed = as.matrix(chks[ , 1:2]) \%*\% rca(x[ , 1:2], chunks)$A
 97 | 
 98 | plot(chkTransformed[ , 1], chkTransformed[ , 2], col = rep(c(
 99 |      "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00"), each = 5),
100 |      pch = rep(0:4, each = 5), ylim = c(-15, 15))
101 | Sys.sleep(3)
102 | 
103 | # The origin data after applying the RCA transformation
104 | plot(rca(x[ , 1:2], chunks)$newX[, 1], rca(x[ , 1:2], chunks)$newX[, 2], 
105 |          bg = c("#E41A1C", "#377EB8", "#4DAF4A")[gl(n, k)],
106 |          pch = c(rep(22, k), rep(21, k), rep(25, k)))
107 | 
108 | # The RCA suggested transformation of the data, dimensionality reduced
109 | rca(x[ , 1:2], chunks)$A
110 | 
111 | # The RCA suggested Mahalanobis matrix
112 | rca(x[ , 1:2], chunks)$B
113 | }
114 | \references{
115 | Aharon Bar-Hillel, Tomer Hertz, Noam Shental, and Daphna Weinshall (2003).
116 | Learning Distance Functions using Equivalence Relations.
117 | \emph{Proceedings of 20th International Conference on
118 | Machine Learning (ICML2003)}.
119 | }
120 | \seealso{
121 | See \code{\link{dca}} for exploiting negative constrains.
122 | }
123 | \author{
124 | Nan Xiao <\url{https://nanx.me}>
125 | }
126 | \keyword{mahalanobis}
127 | \keyword{metric}
128 | \keyword{rca}
129 | \keyword{transformation}
130 | 


--------------------------------------------------------------------------------
/man/sdml-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sdml-package.R
 3 | \docType{package}
 4 | \name{sdml-package}
 5 | \alias{sdml}
 6 | \alias{sdml-package}
 7 | \title{sdml: Supervised Distance Metric Learning with R}
 8 | \description{
 9 | The sdml package aims to implement the state-of-the-art
10 |     algorithms for supervised distance metric learning.
11 |     It includes global and local methods such as
12 |     (Kernel) Relevant Component Analysis,
13 |     (Kernel) Discriminative Component Analysis,
14 |     (Kernel) Local Fisher Discriminant Analysis, etc.
15 |     These distance metric learning methods are widely applied in
16 |     feature extraction, dimensionality reduction, clustering,
17 |     classification, information retrieval,
18 |     and computer vision problems.
19 | }
20 | \seealso{
21 | Useful links:
22 | \itemize{
23 |   \item \url{https://github.com/nanxstats/sdml}
24 |   \item Report bugs at \url{https://github.com/nanxstats/sdml/issues}
25 | }
26 | 
27 | }
28 | \keyword{internal}
29 | 


--------------------------------------------------------------------------------
/sdml.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(sdml)
3 | 
4 | test_check("sdml")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test_helper_functions.R:
--------------------------------------------------------------------------------
1 | context('helper functions')
2 | 
3 | test_that('package set up successfully', {
4 |   expect_that(sum(1,2), not(throws_error()))
5 | })
6 | 


--------------------------------------------------------------------------------