├── .gitattributes ├── codes ├── LDA │ ├── LDA.png │ └── LDA.py ├── LE │ ├── LE.png │ ├── LE_1.png │ └── LE.py ├── LLE │ ├── LLE.png │ └── LLE.py ├── LPP │ ├── LPP.png │ └── LPP.py ├── PCA │ ├── PCA.png │ ├── KPCA.png │ ├── KPCA.py │ └── PCA.py ├── MDS │ ├── MDS_1.png │ ├── MDS_2.png │ ├── MDS.py │ └── MDS_tensorflow.py ├── T-SNE │ ├── T-SNE.png │ ├── TSNE_tensorflow.py │ ├── TSNE.py │ └── TSNE_tensorflow.ipynb ├── ISOMAP │ ├── Isomap.png │ └── ISOMAP.py ├── AutoEncoder │ ├── AutoEncoder.png │ └── AutoEncoder.py ├── SVD │ └── SVD.py └── ICA │ └── ICA.py ├── .gitignore ├── README.md └── LICENSE /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-language=python 2 | -------------------------------------------------------------------------------- /codes/LDA/LDA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/LDA/LDA.png -------------------------------------------------------------------------------- /codes/LE/LE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/LE/LE.png -------------------------------------------------------------------------------- /codes/LE/LE_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/LE/LE_1.png -------------------------------------------------------------------------------- /codes/LLE/LLE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/LLE/LLE.png -------------------------------------------------------------------------------- /codes/LPP/LPP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/LPP/LPP.png -------------------------------------------------------------------------------- /codes/PCA/PCA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/PCA/PCA.png -------------------------------------------------------------------------------- /codes/MDS/MDS_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/MDS/MDS_1.png -------------------------------------------------------------------------------- /codes/MDS/MDS_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/MDS/MDS_2.png -------------------------------------------------------------------------------- /codes/PCA/KPCA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/PCA/KPCA.png -------------------------------------------------------------------------------- /codes/T-SNE/T-SNE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/T-SNE/T-SNE.png -------------------------------------------------------------------------------- /codes/ISOMAP/Isomap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/ISOMAP/Isomap.png -------------------------------------------------------------------------------- /codes/AutoEncoder/AutoEncoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heucoder/dimensionality_reduction_alo_codes/HEAD/codes/AutoEncoder/AutoEncoder.png -------------------------------------------------------------------------------- /codes/SVD/SVD.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | ''' 4 | author: heucoder 5 | email: 812860165@qq.com 6 | date: 2019.6.13 7 | ''' 8 | 9 | import numpy as np 10 | from sklearn.datasets import load_iris 11 | 12 | def svd(data): 13 | ''' 14 | :param data: 15 | :return: U, Sigma, VT 16 | ''' 17 | 18 | # mean 19 | N, D = data.shape 20 | data = data - np.mean(data, axis=0) 21 | 22 | # V 23 | Veig_val, Veig_vector = np.linalg.eigh(np.dot(data.T, data)) 24 | VT = Veig_vector[:, np.argsort(-abs(Veig_val))].T 25 | 26 | # U 27 | Ueig_val, Ueig_vector = np.linalg.eigh(np.dot(data, data.T)) 28 | U = Ueig_vector[:, np.argsort(-abs(Ueig_val))] 29 | 30 | # Sigma 31 | Sigma = np.zeros((N, D)) 32 | for i in range(D): 33 | Sigma[i, i] = np.dot(data, VT[i])[i]/U[i,i] 34 | 35 | return U, Sigma, VT 36 | 37 | if __name__ == '__main__': 38 | iris = load_iris() 39 | X = iris.data 40 | Y = iris.target 41 | U, Sigma, VT = svd(X) 42 | 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /codes/MDS/MDS.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | from sklearn.datasets import load_iris 4 | from sklearn.manifold import MDS 5 | import matplotlib.pyplot as plt 6 | 7 | ''' 8 | author: heucoder 9 | email: 812860165@qq.com 10 | date: 2019.6.13 11 | ''' 12 | 13 | def cal_pairwise_dist(x): 14 | '''计算pairwise 距离, x是matrix 15 | (a-b)^2 = a^2 + b^2 - 2*a*b 16 | ''' 17 | sum_x = np.sum(np.square(x), 1) 18 | dist = np.add(np.add(-2 * np.dot(x, x.T), sum_x).T, sum_x) 19 | #返回任意两个点之间距离的平方 20 | return dist 21 | 22 | 23 | def my_mds(data, n_dims): 24 | ''' 25 | 26 | :param data: (n_samples, n_features) 27 | :param n_dims: target n_dims 28 | :return: (n_samples, n_dims) 29 | ''' 30 | 31 | n, d = data.shape 32 | dist = cal_pairwise_dist(data) 33 | dist[dist < 0 ] = 0 34 | T1 = np.ones((n,n))*np.sum(dist)/n**2 35 | T2 = np.sum(dist, axis = 1, keepdims=True)/n 36 | T3 = np.sum(dist, axis = 0, keepdims=True)/n 37 | 38 | B = -(T1 - T2 - T3 + dist)/2 39 | 40 | eig_val, eig_vector = np.linalg.eig(B) 41 | index_ = np.argsort(-eig_val)[:n_dims] 42 | picked_eig_val = eig_val[index_].real 43 | picked_eig_vector = eig_vector[:, index_] 44 | # print(picked_eig_vector.shape, picked_eig_val.shape) 45 | return picked_eig_vector*picked_eig_val**(0.5) 46 | 47 | if __name__ == '__main__': 48 | iris = load_iris() 49 | data = iris.data 50 | Y = iris.target 51 | data_1 = my_mds(data, 2) 52 | 53 | data_2 = MDS(n_components=2).fit_transform(data) 54 | 55 | plt.figure(figsize=(8, 4)) 56 | plt.subplot(121) 57 | plt.title("my_MDS") 58 | plt.scatter(data_1[:, 0], data_1[:, 1], c=Y) 59 | 60 | plt.subplot(122) 61 | plt.title("sklearn_MDS") 62 | plt.scatter(data_2[:, 0], data_2[:, 1], c=Y) 63 | plt.savefig("MDS_1.png") 64 | plt.show() -------------------------------------------------------------------------------- /codes/PCA/KPCA.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # 实现KPCA 3 | 4 | from sklearn.datasets import load_iris 5 | from sklearn.decomposition import KernelPCA 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from scipy.spatial.distance import pdist, squareform 9 | 10 | ''' 11 | author: heucoder 12 | email: 812860165@qq.com 13 | date: 2019.6.13 14 | ''' 15 | 16 | 17 | def sigmoid(x, coef = 0.25): 18 | x = np.dot(x, x.T) 19 | return np.tanh(coef*x+1) 20 | 21 | def linear(x): 22 | x = np.dot(x, x.T) 23 | return x 24 | 25 | def rbf(x, gamma = 15): 26 | sq_dists = pdist(x, 'sqeuclidean') 27 | mat_sq_dists = squareform(sq_dists) 28 | return np.exp(-gamma*mat_sq_dists) 29 | 30 | def kpca(data, n_dims=2, kernel = rbf): 31 | ''' 32 | 33 | :param data: (n_samples, n_features) 34 | :param n_dims: target n_dims 35 | :param kernel: kernel functions 36 | :return: (n_samples, n_dims) 37 | ''' 38 | 39 | K = kernel(data) 40 | # 41 | N = K.shape[0] 42 | one_n = np.ones((N, N)) / N 43 | K = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n) 44 | # 45 | eig_values, eig_vector = np.linalg.eig(K) 46 | idx = eig_values.argsort()[::-1] 47 | eigval = eig_values[idx][:n_dims] 48 | eigvector = eig_vector[:, idx][:, :n_dims] 49 | print(eigval) 50 | eigval = eigval**(1/2) 51 | vi = eigvector/eigval.reshape(-1,n_dims) 52 | data_n = np.dot(K, vi) 53 | return data_n 54 | 55 | 56 | if __name__ == "__main__": 57 | data = load_iris().data 58 | Y = load_iris().target 59 | data_1 = kpca(data, kernel=rbf) 60 | 61 | 62 | sklearn_kpca = KernelPCA(n_components=2, kernel="rbf", gamma=15) 63 | data_2 = sklearn_kpca.fit_transform(data) 64 | 65 | plt.figure(figsize=(8,4)) 66 | plt.subplot(121) 67 | plt.title("my_KPCA") 68 | plt.scatter(data_1[:, 0], data_1[:, 1], c = Y) 69 | 70 | plt.subplot(122) 71 | plt.title("sklearn_KPCA") 72 | plt.scatter(data_2[:, 0], data_2[:, 1], c = Y) 73 | plt.show() 74 | -------------------------------------------------------------------------------- /codes/LDA/LDA.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import numpy as np 3 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 4 | from sklearn.datasets import load_iris 5 | import matplotlib.pyplot as plt 6 | 7 | ''' 8 | author: heucoder 9 | email: 812860165@qq.com 10 | date: 2019.6.13 11 | ''' 12 | 13 | 14 | def lda(data, target, n_dim): 15 | ''' 16 | :param data: (n_samples, n_features) 17 | :param target: data class 18 | :param n_dim: target dimension 19 | :return: (n_samples, n_dims) 20 | ''' 21 | 22 | clusters = np.unique(target) 23 | 24 | if n_dim > len(clusters)-1: 25 | print("K is too much") 26 | print("please input again") 27 | exit(0) 28 | 29 | #within_class scatter matrix 30 | Sw = np.zeros((data.shape[1],data.shape[1])) 31 | for i in clusters: 32 | datai = data[target == i] 33 | datai = datai-datai.mean(0) 34 | Swi = np.mat(datai).T*np.mat(datai) 35 | Sw += Swi 36 | 37 | #between_class scatter matrix 38 | SB = np.zeros((data.shape[1],data.shape[1])) 39 | u = data.mean(0) #所有样本的平均值 40 | for i in clusters: 41 | Ni = data[target == i].shape[0] 42 | ui = data[target == i].mean(0) #某个类别的平均值 43 | SBi = Ni*np.mat(ui - u).T*np.mat(ui - u) 44 | SB += SBi 45 | S = np.linalg.inv(Sw)*SB 46 | eigVals,eigVects = np.linalg.eig(S) #求特征值,特征向量 47 | eigValInd = np.argsort(eigVals) 48 | eigValInd = eigValInd[:(-n_dim-1):-1] 49 | w = eigVects[:,eigValInd] 50 | data_ndim = np.dot(data, w) 51 | 52 | return data_ndim 53 | 54 | if __name__ == '__main__': 55 | iris = load_iris() 56 | X = iris.data 57 | Y = iris.target 58 | data_1 = lda(X, Y, 2) 59 | 60 | data_2 = LinearDiscriminantAnalysis(n_components=2).fit_transform(X, Y) 61 | 62 | 63 | plt.figure(figsize=(8,4)) 64 | plt.subplot(121) 65 | plt.title("my_LDA") 66 | plt.scatter(data_1[:, 0], data_1[:, 1], c = Y) 67 | 68 | plt.subplot(122) 69 | plt.title("sklearn_LDA") 70 | plt.scatter(data_2[:, 0], data_2[:, 1], c = Y) 71 | plt.savefig("LDA.png") 72 | plt.show() -------------------------------------------------------------------------------- /codes/ICA/ICA.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # 代码来源:https://blog.csdn.net/lizhe_dashuju/article/details/50263339 3 | 4 | 5 | # FastICA 6 | import math 7 | import random 8 | import matplotlib.pyplot as plt 9 | from numpy import * 10 | 11 | n_components = 2 12 | 13 | def f1(x, period = 4): 14 | return 0.5*(x-math.floor(x/period)*period) 15 | 16 | def create_data(): 17 | #data number 18 | n = 500 19 | #data time 20 | T = [0.1*xi for xi in range(0, n)] 21 | #source 22 | S = array([[sin(xi) for xi in T], [f1(xi) for xi in T]], float32) 23 | #mix matrix 24 | A = array([[0.8, 0.2], [-0.3, -0.7]], float32) 25 | return T, S, dot(A, S) 26 | 27 | def whiten(X): 28 | #zero mean 29 | X_mean = X.mean(axis=-1) 30 | X -= X_mean[:, newaxis] 31 | #whiten 32 | A = dot(X, X.transpose()) 33 | D , E = linalg.eig(A) 34 | D2 = linalg.inv(array([[D[0], 0.0], [0.0, D[1]]], float32)) 35 | D2[0,0] = sqrt(D2[0,0]); D2[1,1] = sqrt(D2[1,1]) 36 | V = dot(D2, E.transpose()) 37 | return dot(V, X), V 38 | 39 | def _logcosh(x, fun_args=None, alpha = 1): 40 | gx = tanh(alpha * x, x); g_x = gx ** 2; g_x -= 1.; g_x *= -alpha 41 | return gx, g_x.mean(axis=-1) 42 | 43 | def do_decorrelation(W): 44 | #black magic 45 | s, u = linalg.eigh(dot(W, W.T)) 46 | return dot(dot(u * (1. / sqrt(s)), u.T), W) 47 | 48 | def do_fastica(X): 49 | n, m = X.shape; p = float(m); g = _logcosh 50 | #black magic 51 | X *= sqrt(X.shape[1]) 52 | #create w 53 | W = ones((n,n), float32) 54 | for i in range(n): 55 | for j in range(i): 56 | W[i,j] = random.random() 57 | 58 | #compute W 59 | maxIter = 200 60 | for ii in range(maxIter): 61 | gwtx, g_wtx = g(dot(W, X)) 62 | W1 = do_decorrelation(dot(gwtx, X.T) / p - g_wtx[:, newaxis] * W) 63 | lim = max( abs(abs(diag(dot(W1, W.T))) - 1) ) 64 | W = W1 65 | if lim < 0.0001: 66 | break 67 | return W 68 | 69 | def show_data(T, S): 70 | plt.plot(T, [S[0,i] for i in range(S.shape[1])], marker="*") 71 | plt.plot(T, [S[1,i] for i in range(S.shape[1])], marker="o") 72 | plt.show() 73 | 74 | def main(): 75 | T, S, D = create_data() 76 | Dwhiten, K = whiten(D) 77 | W = do_fastica(Dwhiten) 78 | #Sr: reconstructed source 79 | Sr = dot(dot(W, K), D) 80 | show_data(T, D) 81 | show_data(T, S) 82 | show_data(T, Sr) 83 | -------------------------------------------------------------------------------- /codes/PCA/PCA.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | from sklearn.datasets import load_iris 4 | from sklearn.decomposition import PCA 5 | import matplotlib.pyplot as plt 6 | 7 | ''' 8 | author: heucoder 9 | email: 812860165@qq.com 10 | date: 2019.6.13 11 | ''' 12 | 13 | def pca(data, n_dim): 14 | ''' 15 | 16 | pca is O(D^3) 17 | :param data: (n_samples, n_features(D)) 18 | :param n_dim: target dimensions 19 | :return: (n_samples, n_dim) 20 | ''' 21 | data = data - np.mean(data, axis = 0, keepdims = True) 22 | 23 | cov = np.dot(data.T, data) 24 | 25 | eig_values, eig_vector = np.linalg.eig(cov) 26 | # print(eig_values) 27 | indexs_ = np.argsort(-eig_values)[:n_dim] 28 | picked_eig_values = eig_values[indexs_] 29 | picked_eig_vector = eig_vector[:, indexs_] 30 | data_ndim = np.dot(data, picked_eig_vector) 31 | return data_ndim 32 | 33 | 34 | # data 降维的矩阵(n_samples, n_features) 35 | # n_dim 目标维度 36 | # fit n_features >> n_samples, reduce cal 37 | def highdim_pca(data, n_dim): 38 | ''' 39 | 40 | when n_features(D) >> n_samples(N), highdim_pca is O(N^3) 41 | 42 | :param data: (n_samples, n_features) 43 | :param n_dim: target dimensions 44 | :return: (n_samples, n_dim) 45 | ''' 46 | N = data.shape[0] 47 | data = data - np.mean(data, axis = 0, keepdims = True) 48 | 49 | Ncov = np.dot(data, data.T) 50 | 51 | Neig_values, Neig_vector = np.linalg.eig(Ncov) 52 | indexs_ = np.argsort(-Neig_values)[:n_dim] 53 | Npicked_eig_values = Neig_values[indexs_] 54 | # print(Npicked_eig_values) 55 | Npicked_eig_vector = Neig_vector[:, indexs_] 56 | # print(Npicked_eig_vector.shape) 57 | 58 | picked_eig_vector = np.dot(data.T, Npicked_eig_vector) 59 | picked_eig_vector = picked_eig_vector/(N*Npicked_eig_values.reshape(-1, n_dim))**0.5 60 | # print(picked_eig_vector.shape) 61 | 62 | data_ndim = np.dot(data, picked_eig_vector) 63 | return data_ndim 64 | 65 | if __name__ == "__main__": 66 | data = load_iris() 67 | X = data.data 68 | Y = data.target 69 | data_2d1 = pca(X, 2) 70 | plt.figure(figsize=(8,4)) 71 | plt.subplot(121) 72 | plt.title("my_PCA") 73 | plt.scatter(data_2d1[:, 0], data_2d1[:, 1], c = Y) 74 | 75 | sklearn_pca = PCA(n_components=2) 76 | data_2d2 = sklearn_pca.fit_transform(X) 77 | plt.subplot(122) 78 | plt.title("sklearn_PCA") 79 | plt.scatter(data_2d2[:, 0], data_2d2[:, 1], c = Y) 80 | plt.show() -------------------------------------------------------------------------------- /codes/ISOMAP/ISOMAP.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import numpy as np 3 | from sklearn.datasets import make_s_curve 4 | import matplotlib.pyplot as plt 5 | from sklearn.manifold import Isomap 6 | from mpl_toolkits.mplot3d import Axes3D 7 | 8 | def floyd(D,n_neighbors=15): 9 | Max = np.max(D)*1000 10 | n1,n2 = D.shape 11 | k = n_neighbors 12 | D1 = np.ones((n1,n1))*Max 13 | D_arg = np.argsort(D,axis=1) 14 | for i in range(n1): 15 | D1[i,D_arg[i,0:k+1]] = D[i,D_arg[i,0:k+1]] 16 | for k in range(n1): 17 | for i in range(n1): 18 | for j in range(n1): 19 | if D1[i,k]+D1[k,j]