├── .gitattributes ├── .gitfat ├── .gitignore ├── README.md ├── diffusionMaps.py └── test_DiffusionMap.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.mat filter=fat -crlf 2 | *.fig filter=fat -crlf 3 | *.png filter=fat -crlf 4 | *.gif filter=fat -crlf 5 | *.jpg filter=fat -crlf 6 | *.pdf filter=fat -crlf 7 | *.eps filter=fat -crlf 8 | *.mexa64 filter=fat -crlf 9 | *.zip filter=fat -crlf 10 | *.tar filter=fat -crlf 11 | *.jar filter=fat -crlf 12 | *.old filter=fat -crlf 13 | *.backup filter=fat -crlf 14 | *.amt_tree filter=fat -crlf 15 | *.avi filter=fat -crtf 16 | *.mp4 filter=fat -crtf 17 | *.xoj filter=fat -crtf 18 | *.csv filter=fat -crtf 19 | *.dll filter=fat -crtf 20 | *.shelf filter=fat -crtf 21 | *.xls filter=fat -crtf 22 | *.xlsx filter=fat -crtf 23 | -------------------------------------------------------------------------------- /.gitfat: -------------------------------------------------------------------------------- 1 | [rsync] 2 | remote = your.remote-host.org:/share/fat-store 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #java class 2 | *.class 3 | 4 | #matlab autosaves 5 | *~ 6 | 7 | #svn remnants 8 | .svn 9 | 10 | # some latex stuff 11 | *.aux 12 | *.lof 13 | *.log 14 | *.toc 15 | *.fls 16 | *.out 17 | #bib files 18 | *.bbl 19 | *.bcf 20 | *.blg 21 | *-blx.aux 22 | *-blx.bib 23 | *.run.xml 24 | 25 | #python compiled files 26 | *.pyc 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyDiffusionMaps 2 | Diffusion maps for python 3 | 4 | see 5 | - [Diffusion maps for high-dimensional single-cell analysis of differentiation data.](http://bioinformatics.oxfordjournals.org/content/31/18/2989) 6 | - [Geometric diffusions as a tool for harmonic analysis and structure definition of data: Diffusion maps](http://www.pnas.org/content/102/21/7426.long) 7 | 8 | ## TODO: 9 | - sparse matrix usage in density_normalize is inefficient 10 | 11 | -------------------------------------------------------------------------------- /diffusionMaps.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import BaseEstimator 2 | from sklearn.neighbors import NearestNeighbors 3 | import numpy as np 4 | from scipy.sparse import csr_matrix, issparse, diags 5 | from scipy.sparse.linalg import eigsh 6 | from scipy.linalg import eigh 7 | import matplotlib.pyplot as plt 8 | import scipy.spatial.distance 9 | import logging 10 | logging.basicConfig(level=logging.INFO) #for DEVEL, just use full logging 11 | 12 | 13 | """ 14 | [1] Haghverdi, L., Buettner, F. & Theis, F. J. 15 | Diffusion maps for high-dimensional single-cell analysis of differentiation data. 16 | Bioinformatics 31, 2989–2998 (2015). 17 | [2] Laleh Haghverdi, Maren B¨uttner, F. Alexander Wolf, Florian Buettner, Fabian J. Theis 18 | Diffusion pseudotime robustly reconstructs lineage branching 19 | """ 20 | 21 | def _check_Z_for_division(Z,eps): 22 | "Z might be zero sometimes, we add a small constant epsilon to it. make sure this doesnt change to much" 23 | 24 | # check that all nonzeros of z are significantly larger than eps 25 | ixNonZero = Z!=0 26 | assert np.all(np.abs(Z[ixNonZero]) > eps*10), 'values to small. might introduce some error since close to zero division' 27 | 28 | 29 | def _density_normalize(kernelMat, symmetrize=False): 30 | """ 31 | 1. density normalization: Eq (4-5) of [1] or Eq 3,4 in [2] 32 | W_xy = K(x,y)/Z(x)Z(y) 33 | thats the Coifman anisotropy thingy, trying to mitigate the effect of density 34 | (alpha=1 in Coifman) 35 | 36 | 2. strange row normalization Eq(5,6) in [2] or Eq(5,6) in [1] 37 | this is to get the "normalized graph laplacian" as in Coifman. 38 | 39 | essentially this makes it a transition matrix. This is asymmetric! 40 | 41 | 3. optional: symmetrize the transition matrix again! (see [2] Suppl.Eq 7) 42 | 43 | be very careful here if K is a sparse matrix, which behaves differently from usual np.ndarray 44 | in terms of operators *, / 45 | 46 | :param symmetrize: if True, we return a symmetrized transition matrix 47 | otherwise the classic non-symmetric transition matrix 48 | """ 49 | eps = 1e-100 50 | 51 | # the method only works on symmetric matrices (relies that Z is the same along rows and cols) 52 | atol_symmetric = 1e-10 #TODO loose tolerance 53 | if issparse(kernelMat): 54 | np.testing.assert_allclose((kernelMat-kernelMat.T).A, 0, atol=atol_symmetric) 55 | else: 56 | np.testing.assert_allclose(kernelMat-kernelMat.T, 0, atol=atol_symmetric) 57 | 58 | "calculate: P_xy / Z(x)Z(y)" 59 | "rescale each column by Z and also each row by Z" 60 | "easily done by just multipling with a diagonal matrix from the left (scaling rows) and right (rescaling columsn)" 61 | # note that row and column sum are the same as the matrix is symmetric!! 62 | if issparse(kernelMat): 63 | Z = np.array(kernelMat.sum(0)).flatten() # a bit ugly, Z is this strange type(matrix), which one cannot cast into a 1d array, hence the detour to np.array 64 | _check_Z_for_division(Z, eps) 65 | scalingMat = diags(1.0 / (Z + eps), offsets=0) # multiplying by this (one the right) is equivalent to rescaling the columns 66 | P_tilde = scalingMat * kernelMat * scalingMat # this is matrix multiply! 67 | # assert np.testing.assert_allclose(P_tilde.toarray(), P_tilde.T.toarray(), err_msg='Ptilde should be symmetric') 68 | 69 | else: 70 | Z = kernelMat.sum(0).flatten() # make sure it doesnt have two dimensions, needed for the broadcasting below 71 | _check_Z_for_division(Z, eps) 72 | invZ = 1.0 / (Z + eps) # careful about zero division. 73 | #TODO replace by matrix multiplicaition?! -> M@N 74 | P_tilde = kernelMat * invZ * invZ.reshape(-1,1) # broadcasts along rows and columsn, sclaing them both 75 | # assert np.testing.assert_allclose(P_tilde, P_tilde.T, err_msg='Ptilde should be symmetric', atol=atol_symmetric) 76 | 77 | "THIS PTILDE HAS TO BE SYMMETRIC HERE!!" 78 | logging.warning("max discrepancy of Ptilde symmetry: %e" % np.max(np.abs(P_tilde - P_tilde.T))) 79 | 80 | # Eq (5,6) of [1] 81 | # once again, the same trick with diagonal matrix for resacling 82 | if issparse(kernelMat): 83 | # import pdb 84 | # pdb.set_trace() 85 | if symmetrize: # Eq 7 of 86 | logging.warning("not clear how the symmetric version is implemented") 87 | rowsum = np.array(P_tilde.sum(1)).flatten() 88 | _check_Z_for_division(rowsum, eps) 89 | scalingMat_rows = diags(1.0 / (rowsum + eps), offsets=0) 90 | sqrt_scale_row = np.sqrt(scalingMat_rows) 91 | 92 | colsum = np.array(P_tilde.sum(0)).flatten() 93 | _check_Z_for_division(colsum, eps) 94 | scalingMat_cols = diags(1.0 / (colsum + eps), offsets=0) 95 | sqrt_scale_col = np.sqrt(scalingMat_cols) 96 | 97 | logging.warning("max discrepancy of row/colsum: %e" % np.max(np.abs(rowsum-colsum))) 98 | logging.warning("max discrepancy of sqrt: %e" % np.max(np.abs(sqrt_scale_row-sqrt_scale_col))) 99 | 100 | P_tilde = sqrt_scale_row * P_tilde * sqrt_scale_col 101 | else: 102 | Z_tilde = np.array(P_tilde.sum(1)).flatten() 103 | _check_Z_for_division(Z_tilde, eps) 104 | scalingMat = diags(1.0 / (Z_tilde + eps), offsets=0) 105 | 106 | P_tilde = scalingMat * P_tilde 107 | else: 108 | Z_tilde = P_tilde.sum(1).flatten() # make sure it doesnt have two dimensions, needed for the broadcasting below 109 | _check_Z_for_division(Z_tilde, eps) 110 | invZ_tilde = 1.0 / (Z_tilde + eps) 111 | ixnonZero = Z_tilde != 0 #same fuzz about the zero 112 | 113 | # nasty: since zInv_tilde is a 1D vector it automatically broadcasts along rows (leading to col normalization) 114 | # hence we have to make the broadcasting explicit, giving shape to invZ 115 | if symmetrize: # Eq 7 of 116 | raise NotImplementedError("not clear how symmetric is implemented. ask maren") 117 | logging.warning("not clear how the symmetric version is implemented") 118 | sqrt_invZ_tilde = np.sqrt(invZ_tilde) 119 | P_tilde[np.ix_(ixnonZero, ixnonZero)] = P_tilde[np.ix_(ixnonZero, ixnonZero)] * sqrt_invZ_tilde[ixnonZero].reshape(-1, 1) * sqrt_invZ_tilde[ixnonZero] # normalizes each row 120 | else: 121 | P_tilde[np.ix_(ixnonZero, ixnonZero)] = P_tilde[np.ix_(ixnonZero, ixnonZero)] * invZ_tilde[ixnonZero].reshape(-1,1) #normalizes each row 122 | 123 | return P_tilde 124 | 125 | 126 | def _calc_dpt(T): 127 | ":param T: transition matrix" 128 | n_vectors = T.shape[0]-1 # somehow the method can only compute all but the first EV 129 | 130 | assert issparse(T), "T should be sparse" 131 | logging.info("Calculating full eigenvalue decomposition") 132 | lambdas, V = eigsh(T, k=n_vectors) # psi(0) which is the stationary density 133 | 134 | # the last eigenvalue/eigenvector pair is the stationary state which we ommit here 135 | # note that we're missing the smalest eigenvector here!! 136 | prefactor = lambdas/(1-lambdas) 137 | 138 | M = V[:,:-1] @ np.diag(prefactor[:-1]) @ V[:,:-1].T # [:-1] skip the last EV which is the steady state 139 | 140 | logging.info("calculating dpt matrix") 141 | 142 | # we have to iterate over all eigenvectors, 143 | # build a difference matrix and multiply by the prefactor 144 | # dpt2 is then jsut the sum over all these matrixes 145 | dpt2_matrix = np.zeros((V.shape[0], V.shape[0])) 146 | for i in range(0, n_vectors - 1): # -1 again to skip the stst-vector 147 | currentPsi = V[:, i].reshape(-1, 1) # a row vector 148 | # due to numpy broadcasting the next line will 149 | # become a matrix: difference of everyone vs evergone 150 | squared_difference_matrix = (currentPsi - currentPsi.T) ** 2 151 | dpt2_matrix = dpt2_matrix + prefactor[i]**2 * squared_difference_matrix 152 | 153 | import warnings 154 | warnings.warn('changed to return sqrt(dtp2)') 155 | return M, np.sqrt(dpt2_matrix) 156 | 157 | 158 | class DiffusionMap(BaseEstimator): 159 | 160 | """ 161 | diffusion maps for dimension reduction along the lines of [1] 162 | this one uses nearest neighbours to approximate the kernel matrix 163 | 164 | 165 | """ 166 | 167 | def __init__(self, sigma, embedding_dim, k=100): 168 | """ 169 | :param sigma: diffusion kernel width 170 | :param embedding_dim: how many dimension to reduce to 171 | :param k: kNN parameter (kNN is used when calculating the kernel matrix). the larger the more accurate, but the more RAM needed 172 | :return: 173 | """ 174 | self.sigma = sigma 175 | self.embedding_dim = embedding_dim 176 | self.k = k 177 | self.local_sigma = None 178 | 179 | # NN is the most expensive caluclation, cache it 180 | self._cached_nn_distances = None 181 | self._cached_nn_indices = None 182 | 183 | def fit_transform(self, X, density_normalize=True, symmetrize=False): 184 | """ 185 | estimates the diffusion map embedding 186 | :param X: data matrix (samples x features) 187 | :param density_normalize: boolean, wheter to apply density normalization of the kernel matrix 188 | :return: 189 | """ 190 | 191 | # calculate the kernelmatrix based on a neirest neighbour graph 192 | # kernelMat is called $P_xy$ in [1] 193 | k = min(self.k, X.shape[0]) # limit the number of neighbours 194 | 195 | logging.info("Calculating kernel matrix") 196 | kernelMat = self._get_kernel_matrix(X, k) 197 | 198 | # set the diagonal to 0: no diffusion onto itself 199 | kernelMat.setdiag(np.zeros(X.shape[0])) # TODO not sure if this is to be done BEFORE or after normailzation 200 | 201 | if density_normalize: 202 | logging.info("density normalization") 203 | kernelMat = _density_normalize(kernelMat, symmetrize=symmetrize) 204 | 205 | #also, store the kernel matrix (mostly debugging) 206 | self.kernelMat = kernelMat 207 | 208 | # calculate the eigenvectors of the matrx 209 | logging.info("Calculating eigenvalue decomposition") 210 | 211 | #TODO Warning: kernel matrix os not symmetric after density normalization, eigsh might fail!? 212 | lambdas, V = eigsh(kernelMat, k=self.embedding_dim) # calculate as many eigs as the requested embedding dim 213 | 214 | # eigsh returns the k largest eigenvals but ascending order (smallest first), so resort 215 | ix = lambdas.argsort()[::-1] 216 | 217 | # TODO could think about getting rid of the first EV, which has only density info 218 | return V[:,ix], lambdas[ix] 219 | 220 | def _get_NN(self, dataMatrix, k): 221 | """ 222 | caluclates the distance to the k-nearest neighbours, 223 | return an array of distances and indices of nearest 224 | neigbours (see NearestNeighbors.kneighbors output) 225 | 226 | :param dataMatrix: matrix containing one sample per row 227 | :param k: number of nearest nneighbours 228 | :return: 229 | """ 230 | 231 | if self._cached_nn_distances is None or self._cached_nn_indices is None: 232 | nbrs = NearestNeighbors(n_neighbors=k, algorithm='auto').fit(dataMatrix) 233 | distances, indices = nbrs.kneighbors(dataMatrix) 234 | 235 | # cache for later 236 | self._cached_nn_distances = distances 237 | self._cached_nn_indices = indices 238 | 239 | else: # load cached 240 | distances = self._cached_nn_distances 241 | indices = self._cached_nn_indices 242 | 243 | return distances, indices 244 | 245 | def _get_kernel_matrix(self, X, k): 246 | """ 247 | returns the kernel matrix for the samples in X using a Gaussian Kernel and a kNN-approximation, 248 | called K(x,y) in [2] 249 | 250 | - all distances are zero, except within the neartest neighbours 251 | - also symmetrizing the matrix (kNN is not symmetric necceseraly) 252 | 253 | if self.sigma !=0 just apply a single specified sigma to all datapoints. 254 | if self.sigma ==0, estimate sigma for each datapoint via nearest-neighbour distance 255 | 256 | :param X: data matrix NxF, where N=number of samples, F= number of features 257 | :param k: number of nearest neighbours to consider in kNN 258 | :return: symmetric sparse matrix of NxN 259 | """ 260 | 261 | distances, indices = self._get_NN(X, k=k) 262 | 263 | if self.sigma != 0: 264 | logging.info("calculating kernel matrix with global sigma %f" % self.sigma) 265 | diffDist = np.exp(-(0.5 / self.sigma**2) * distances**2) 266 | else: 267 | logging.info("calculating kernel matrix with local sigma") 268 | local_sigma_squared = np.median(distances**2, axis=1).reshape(-1,1) # .shape = (datapoints, 1) 269 | local_sigma_squared += 1e-15 # numerical stability, also dropout leads to 0 distance of different datapoints 270 | 271 | self.local_sigma = np.sqrt(local_sigma_squared) 272 | "more tricky as for each datapoint + knn,s we have to consider different sigmas" 273 | # distances.shape = (datapoints, kNNs) 274 | diffDist = [] 275 | for i in range(len(indices)): # for each datapoint calculate the row in the kernel matrix, taking care of the local sigmas of each datapoint 276 | 277 | prefactor_nom = 2 * self.local_sigma[i] * self.local_sigma[indices[i]] 278 | prefactor_denom = local_sigma_squared[i] + local_sigma_squared[indices[i]] 279 | prefactor = np.sqrt(prefactor_nom/prefactor_denom) 280 | exp_denom = 2 * prefactor_denom 281 | diffDist.append(prefactor * np.exp(-(distances[i].reshape(-1,1) ** 2)/ exp_denom)) # reshape otherwise autobroadcasting goes from (k,) -> (k,k) 282 | diffDist = np.array(diffDist) 283 | 284 | 285 | # build a sparse matrix out of the diffusionDistances; some crazy magic with the sparse matrixes 286 | N = X.shape[0] 287 | indptr = range(0, (N+1)*k, k) # some helper matrix, specfiing that the first k indices in indices,flatten() belong to the first row of data 288 | 289 | K = csr_matrix((diffDist.flatten(), indices.flatten(), indptr), shape=(N, N)) 290 | 291 | # due to the kNN approximation, the matrix K is not neccesarily symmetric 292 | # (if x is a kNN of y, y doesnt have to be a kNN of x) 293 | # lets make it symmetric again, just filling in the missing entries 294 | 295 | shared_mask = (K!=0).multiply(K.T!=0) # marking entries that are nonzero in both matrixes. mulitply is elemntwise! 296 | K_sym = K + K.T - K.multiply(shared_mask) # adding them up, subtracting the common part that was counted twice! 297 | 298 | np.testing.assert_allclose((K_sym-K_sym.T).A, 0, atol=1e-6) # todo loose tolerance 299 | 300 | return K_sym 301 | 302 | 303 | if __name__ == '__main__': 304 | 305 | # testing with MNIST 306 | from sklearn.datasets import fetch_mldata 307 | mnist = fetch_mldata('MNIST original') 308 | X, y = mnist.data.astype(np.float32), mnist.target 309 | ix_perm = np.random.permutation(X.shape[0]) # shuffle the data 310 | X, y = X[ix_perm,:], y[ix_perm] 311 | 312 | X,y = X[:1000,:], y[:1000] 313 | 314 | X/=255 315 | 316 | df = DiffusionMap(sigma=5, embedding_dim=10) 317 | V,lam = df.fit_transform(X, density_normalize=False) 318 | 319 | plt.scatter(V[:,0], V[:,1], c=y) 320 | plt.show() 321 | -------------------------------------------------------------------------------- /test_DiffusionMap.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from diffusionMaps import DiffusionMap, _density_normalize 4 | from scipy.sparse import csr_matrix, issparse 5 | import scipy.sparse 6 | 7 | 8 | @pytest.fixture 9 | def mixtureNormals(request): 10 | # teardown 11 | def fin(): 12 | pass 13 | request.addfinalizer(fin) 14 | 15 | samples = 50 16 | dims = 3 17 | X1 = np.random.normal(0,1, size=(samples,dims)) 18 | X2 = np.random.normal(3,1, size=(samples, dims)) 19 | 20 | X = np.vstack([X1,X2]) 21 | return X 22 | 23 | def create_sym_matrix(n): 24 | q = np.random.rand(n,n) 25 | return np.dot(q,q.T) 26 | 27 | 28 | def create_sparse_sym_matrix(n, density=0.1): # sparseness = .9 -> 90% entries zero 29 | q = np.random.rand(n,n) 30 | sym = np.dot(q,q.T) 31 | ix = np.random.binomial(1,density/2, size=(n,n)) 32 | ix = ix+ix.T*(ix==0) # make the selection of entrys also symmetric 33 | return csr_matrix(sym*ix) 34 | 35 | 36 | def test_DiffusionMap_fit_transform_output_dimensions(mixtureNormals): 37 | X = mixtureNormals 38 | 39 | embDim = 2 40 | df = DiffusionMap(sigma=1, embedding_dim=embDim ) 41 | X_embed, lam = df.fit_transform(X) 42 | 43 | assert X_embed.shape == (X.shape[0], embDim ), "returns wrong dimensionally" 44 | assert lam.shape[0] == X_embed.shape[1], "must return as many eigenvalues as embedded dimensions" 45 | 46 | 47 | def test_DiffusionMap_nearestNeighbour_number_of_neighbours(mixtureNormals): 48 | X = mixtureNormals 49 | embDim = 2 50 | df = DiffusionMap(sigma=1, embedding_dim=embDim) 51 | 52 | kNN = 4 53 | distances, indices = df._get_NN(X,k=kNN) 54 | 55 | assert distances.shape == (X.shape[0], kNN) 56 | assert indices.shape == (X.shape[0], kNN) 57 | 58 | 59 | def test_DiffusionMap_get_kernel_matrix_number_of_neighbours(mixtureNormals): 60 | """actually we would like to test for the exact number of neighvours 61 | but due tot the symmetrizing, it can exceed the kNN""" 62 | X = mixtureNormals 63 | embDim = 2 64 | df = DiffusionMap(sigma=1, embedding_dim=embDim) 65 | 66 | kNN = 4 67 | K = df._get_kernel_matrix(X,k=kNN) 68 | assert K.shape == (X.shape[0], X.shape[0]) 69 | 70 | nonzero_elements_per_row = np.sum(K.toarray()!=0, 1) 71 | print(nonzero_elements_per_row) 72 | assert np.all(nonzero_elements_per_row >= kNN) # the number of nonzero elements must be kNN or larger (due to the symmetrizing 73 | 74 | 75 | def test_DiffusionMap_get_kernel_matrix_symmetry(mixtureNormals): 76 | "make sure the kernel matrix is symmetric" 77 | X = mixtureNormals 78 | df = DiffusionMap(sigma=1,embedding_dim=2) 79 | K = df._get_kernel_matrix(X,k=2) 80 | 81 | Q = (K-K.T).toarray() # np.all doesnt work on sparse matrices 82 | assert np.all(Q==0), 'returned kernel matrix is not symmetric' 83 | 84 | 85 | def test__get_kernel_matrix_sparse(mixtureNormals): 86 | df = DiffusionMap(sigma=1,embedding_dim=2) 87 | K = df._get_kernel_matrix(mixtureNormals,k=10) 88 | assert issparse(K) 89 | 90 | 91 | def test__density_normalize__sparse(mixtureNormals): 92 | "must return sparse if we put in sparse" 93 | K = csr_matrix([[0,1],[1,1]]) 94 | assert issparse(_density_normalize(K)), 'returned matrix is not sparse after normalization' 95 | 96 | 97 | def test__density_normalize__rowsum(mixtureNormals): 98 | "enforce rows summing to on for the desniy normalization" 99 | K = create_sparse_sym_matrix(100, density=0.1) 100 | K_norm = _density_normalize(K) 101 | np.testing.assert_allclose(K_norm.toarray().sum(1), 1) 102 | 103 | 104 | def test__density_normalize__not_sparse_rowsum(mixtureNormals): 105 | "enforce rows summing to on for the desniy normalization" 106 | K = create_sym_matrix(100) 107 | K_norm = _density_normalize(K) 108 | np.testing.assert_allclose(K_norm.sum(1), 1) 109 | 110 | 111 | # def test__density_normalize__not_sparse_symmetrize(mixtureNormals): 112 | # "check the symmetrized version of the transtion matrix" 113 | # K = create_sym_matrix(100) 114 | # Tsym = _density_normalize(K, symmetrize=True) 115 | # # check symmetry 116 | # np.testing.assert_allclose(Tsym, Tsym.T, err_msg="Tsym is not symmetric") 117 | # # check the rowsum =1 118 | # np.testing.assert_allclose(Tsym.sum(1), 1) 119 | 120 | def test__density_normalize__sparse_symmetrize(mixtureNormals): 121 | "check the symmetrized version of the transtion matrix" 122 | K = create_sparse_sym_matrix(100, 0.1) 123 | Tsym = _density_normalize(K, symmetrize=True) 124 | # check symmetry 125 | np.testing.assert_allclose(Tsym.toarray(), Tsym.T.toarray(), err_msg="Tsym is not symmetric") 126 | # check the rowsum =1 127 | np.testing.assert_allclose(Tsym.toarray().sum(1), 1, err_msg="rowsum <> 1") 128 | 129 | def test__density_normalize__not_sparse(mixtureNormals): 130 | K = create_sym_matrix(2) 131 | K_norm = _density_normalize(K) 132 | assert isinstance(K_norm, np.ndarray), 'must return full matrix if we put in a full matrix' 133 | 134 | 135 | 136 | def test_density_normalize_same_result_sparse_nonsparse(): 137 | 138 | for d in [0.0001, 0.01, 0.5]: # test it for different sparsity, as sometimes entre rows/col become zero 139 | K_sparse = create_sparse_sym_matrix(5, density=d) 140 | K_full = K_sparse.A 141 | 142 | n_sparse = _density_normalize(K_sparse).A 143 | n_full = _density_normalize(K_full) 144 | 145 | np.testing.assert_allclose(n_sparse, n_full) 146 | 147 | 148 | def test_DiffusionMap_fit_transform_eigenvalue_ordering(mixtureNormals): 149 | "must return the largest first" 150 | X = mixtureNormals 151 | 152 | embDim = 2 153 | df = DiffusionMap(sigma=1, embedding_dim=embDim ) 154 | X_embed, lam = df.fit_transform(X) 155 | assert(lam[0]> lam[1]) 156 | --------------------------------------------------------------------------------