├── .gitignore ├── LICENSE ├── README.md ├── code ├── blockSim.py ├── bregman.py └── cot.py ├── data ├── features │ ├── CaffeNet4096 │ │ ├── amazon.mat │ │ ├── caltech10.mat │ │ ├── dslr.mat │ │ └── webcam.mat │ └── GoogleNet1024 │ │ ├── amazon.mat │ │ ├── caltech10.mat │ │ ├── dslr.mat │ │ └── webcam.mat ├── ml-100k │ ├── u.data │ ├── u.data.txt │ ├── u.genre │ ├── u.item │ └── u.user ├── mnist.mat └── usps.mat ├── example ├── coot_mnist.ipynb ├── mnist_usps.pdf ├── mnist_usps.png ├── transp_piv_mnist.pdf ├── transp_piv_mnist.png ├── transp_piv_usps.pdf └── transp_piv_usps.png └── expe ├── clootclustering_faces.pdf ├── clootclustering_faces.png ├── cot_coclustering_movielens.py ├── cot_coclustering_sim.py ├── test_HDA_full.py └── visu_cootclustering_faces.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Python Optimal Transport 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # COOT 2 | 3 | Python3 implementation of the paper [CO-Optimal Transport](https://arxiv.org/abs/2002.03731) (NeurIPS 2020) 4 | 5 | CO-Optimal Transport (COOT) is an Optimal Transport problem between measures whose supports do not necessarily live in the same metric space. It aims to simultaneously optimize two transport maps between both samples and features of two datasets with possibly different number of rows and columns. 6 | 7 | Feel free to ask if any question. 8 | 9 | If you use this toolbox in your research and find it useful, please cite COOT using the following bibtex reference: 10 | 11 | ``` 12 | @incollection{coot_2020, 13 | title = {CO-Optimal Transport}, 14 | author = {Redko, Ievgen, Vayer, Titouan, Flamary, R\'{e}mi and Courty, Nicolas}, 15 | booktitle = {Advances in Neural Information Processing Systems 33}, 16 | year = {2020} 17 | } 18 | ``` 19 | 20 | ### Prerequisites 21 | 22 | * Numpy (>= 1.11) 23 | * Matplotlib (>= 1.5) 24 | * For Optimal transport [Python Optimal Transport](https://pythonot.github.io/) POT (>=0.5.1) 25 | 26 | ### What is included ? 27 | 28 | * The main function that computes the COOT problem between two datasets in the code folder (file cot.py) 29 | 30 | * Demo notebooks: 31 | - [coot_mnist.ipynb](./example/coot_mnist.ipynb): COOT on the MNIST/USPS problem 32 | 33 | ![](./example/mnist_usps.png) 34 | 35 | * Code to repoduce results of the paper in the expe folder. 36 | 37 | ![](./expe/clootclustering_faces.png) 38 | 39 | 40 | ### Authors 41 | * [Ievgen Redko](https://ievred.github.io/) 42 | * [Titouan Vayer](https://github.com/tvayer) 43 | * [Rémi Flamary](https://github.com/rflamary) 44 | * [Nicolas Courty](https://github.com/ncourty) -------------------------------------------------------------------------------- /code/blockSim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def partitionrnd(n, prop, g, seed=None): 5 | """Generate a random partition. 6 | n : number of instances 7 | prop : proportion of clusters 8 | g : number of clusters 9 | """ 10 | if seed is not None: 11 | np.random.seed(seed) 12 | tmp = np.random.uniform(low=0.0, high=1.0, size=n) 13 | return g + 1 - np.sum(np.tile(tmp, (g, 1)).T < np.tile(np.cumsum(prop), (n, 1)), axis=1) 14 | 15 | 16 | def generatedata(mu, n=100, d=100, prop_r=None, prop_c=None, noise=1, sigma = None, seed=None): 17 | """Generate a data matrix with a gxm bloc structure. 18 | n : number of rows 19 | d : number of columns 20 | prop_r : proportion of row clusters 21 | prop_c : proportion of column clusters 22 | g : number of row clusters 23 | m : number of column clusters 24 | mu : a gxm matrix with the mean of each block 25 | Return the matrix and the partitions 26 | """ 27 | if seed is not None: 28 | np.random.seed(seed) 29 | g, m = mu.shape 30 | if prop_r is None: 31 | prop_r = (1 / g * np.ones(g)) 32 | if prop_c is None: 33 | prop_c = (1 / m * np.ones(m)) 34 | if sigma is None: 35 | s = (g, m) 36 | sigma = 0.1 * np.ones(s) * noise 37 | 38 | # Generate one partition for instances and one for variables 39 | z_i = partitionrnd(n, prop_r, g) 40 | w_j = partitionrnd(d, prop_c, m) 41 | 42 | # Generate the data matrix x 43 | s = (n, d) 44 | data = np.zeros(s) 45 | 46 | for i in range(n): 47 | for j in range(d): 48 | data[i][j] = np.random.normal(mu.item(z_i[i] - 1, w_j[j] - 1), sigma.item(z_i[i] - 1, w_j[j] - 1), 1)[0] 49 | 50 | return data, z_i, w_j 51 | -------------------------------------------------------------------------------- /code/bregman.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def sinkhorn_scaling(a,b,K,numItermax=1000, stopThr=1e-9, verbose=False,log=False,always_raise=False, **kwargs): 4 | a = np.asarray(a, dtype=np.float64) 5 | b = np.asarray(b, dtype=np.float64) 6 | K = np.asarray(K, dtype=np.float64) 7 | 8 | # init data 9 | Nini = len(a) 10 | Nfin = len(b) 11 | 12 | if len(b.shape) > 1: 13 | nbb = b.shape[1] 14 | else: 15 | nbb = 0 16 | 17 | if log: 18 | log = {'err': []} 19 | 20 | # we assume that no distances are null except those of the diagonal of 21 | # distances 22 | if nbb: 23 | u = np.ones((Nini, nbb)) / Nini 24 | v = np.ones((Nfin, nbb)) / Nfin 25 | else: 26 | u = np.ones(Nini) / Nini 27 | v = np.ones(Nfin) / Nfin 28 | 29 | # print(reg) 30 | # print(np.min(K)) 31 | 32 | Kp = (1 / a).reshape(-1, 1) * K 33 | cpt = 0 34 | err = 1 35 | while (err > stopThr and cpt < numItermax): 36 | uprev = u 37 | vprev = v 38 | KtransposeU = np.dot(K.T, u) 39 | v = np.divide(b, KtransposeU) 40 | u = 1. / np.dot(Kp, v) 41 | 42 | zero_in_transp=np.any(KtransposeU == 0) 43 | nan_in_dual= np.any(np.isnan(u)) or np.any(np.isnan(v)) 44 | inf_in_dual=np.any(np.isinf(u)) or np.any(np.isinf(v)) 45 | if zero_in_transp or nan_in_dual or inf_in_dual: 46 | # we have reached the machine precision 47 | # come back to previous solution and quit loop 48 | print('Warning: numerical errors at iteration in sinkhorn_scaling', cpt) 49 | #if zero_in_transp: 50 | #print('Zero in transp : ',KtransposeU) 51 | #if nan_in_dual: 52 | #print('Nan in dual') 53 | #print('u : ',u) 54 | #print('v : ',v) 55 | #print('KtransposeU ',KtransposeU) 56 | #print('K ',K) 57 | #print('M ',M) 58 | 59 | # if always_raise: 60 | # raise NanInDualError 61 | #if inf_in_dual: 62 | # print('Inf in dual') 63 | u = uprev 64 | v = vprev 65 | 66 | break 67 | if cpt % 10 == 0: 68 | # we can speed up the process by checking for the error only all 69 | # the 10th iterations 70 | if nbb: 71 | err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ 72 | np.sum((v - vprev)**2) / np.sum((v)**2) 73 | else: 74 | transp = u.reshape(-1, 1) * (K * v) 75 | err = np.linalg.norm((np.sum(transp, axis=0) - b))**2 76 | if log: 77 | log['err'].append(err) 78 | 79 | if verbose: 80 | if cpt % 200 == 0: 81 | print( 82 | '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) 83 | print('{:5d}|{:8e}|'.format(cpt, err)) 84 | cpt = cpt + 1 85 | if log: 86 | log['u'] = u 87 | log['v'] = v 88 | 89 | if nbb: # return only loss 90 | res = np.zeros((nbb)) 91 | for i in range(nbb): 92 | res[i] = np.sum( 93 | u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M) 94 | if log: 95 | return res, log 96 | else: 97 | return res 98 | 99 | else: # return OT matrix 100 | 101 | if log: 102 | return u.reshape((-1, 1)) * K * v.reshape((1, -1)), log 103 | else: 104 | return u.reshape((-1, 1)) * K * v.reshape((1, -1)) -------------------------------------------------------------------------------- /code/cot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import ot 3 | from scipy import stats 4 | from scipy.sparse import random 5 | from bregman import sinkhorn_scaling 6 | 7 | def random_gamma_init(p,q, **kwargs): 8 | """ Returns random coupling matrix with marginal p,q 9 | """ 10 | rvs=stats.beta(1e-1,1e-1).rvs 11 | S=random(len(p), len(q), density=1, data_rvs=rvs) 12 | return sinkhorn_scaling(p,q,S.A, **kwargs) 13 | 14 | def init_matrix_np(X1, X2, v1, v2): 15 | """Return loss matrices and tensors for COOT fast computation 16 | Returns the value of |X1-X2|^{2} \otimes T as done in [1] based on [2] for the Gromov-Wasserstein distance. 17 | Where : 18 | - X1 : The source dataset of shape (n,d) 19 | - X2 : The target dataset of shape (n',d') 20 | - v1 ,v2 : weights (histograms) on the columns of resp. X1 and X2 21 | - T : Coupling matrix of shape (n,n') 22 | Parameters 23 | ---------- 24 | X1 : numpy array, shape (n, d) 25 | Source dataset 26 | X2 : numpy array, shape (n', d') 27 | Target dataset 28 | v1 : numpy array, shape (d,) 29 | Weight (histogram) on the features of X1. 30 | v2 : numpy array, shape (d',) 31 | Weight (histogram) on the features of X2. 32 | 33 | Returns 34 | ------- 35 | constC : ndarray, shape (n, n') 36 | Constant C matrix (see paragraph 1.2 of supplementary material in [1]) 37 | hC1 : ndarray, shape (n, d) 38 | h1(X1) matrix (see paragraph 1.2 of supplementary material in [1]) 39 | hC2 : ndarray, shape (n', d') 40 | h2(X2) matrix (see paragraph 1.2 of supplementary material in [1]) 41 | References 42 | ---------- 43 | .. [1] Redko Ievgen, Vayer Titouan, Flamary R{\'e}mi and Courty Nicolas 44 | "CO-Optimal Transport" 45 | .. [2] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, 46 | "Gromov-Wasserstein averaging of kernel and distance matrices." 47 | International Conference on Machine Learning (ICML). 2016. 48 | """ 49 | def f1(a): 50 | return (a ** 2) 51 | 52 | def f2(b): 53 | return (b ** 2) 54 | 55 | def h1(a): 56 | return a 57 | 58 | def h2(b): 59 | return 2 * b 60 | 61 | constC1 = np.dot(np.dot(f1(X1), v1.reshape(-1, 1)), 62 | np.ones(f1(X2).shape[0]).reshape(1, -1)) 63 | constC2 = np.dot(np.ones(f1(X1).shape[0]).reshape(-1, 1), 64 | np.dot(v2.reshape(1, -1), f2(X2).T)) 65 | 66 | constC = constC1 + constC2 67 | hX1 = h1(X1) 68 | hX2 = h2(X2) 69 | 70 | return constC, hX1, hX2 71 | 72 | 73 | def cot_numpy(X1, X2, w1 = None, w2 = None, v1 = None, v2 = None, 74 | niter=10, algo='emd', reg=0,algo2='emd', 75 | reg2=0, verbose=True, log=False, random_init=False, C_lin=None): 76 | 77 | """ Returns COOT between two datasets X1,X2 (see [1]) 78 | 79 | The function solves the following optimization problem: 80 | .. math:: 81 | COOT = \min_{Ts,Tv} \sum_{i,j,k,l} |X1_{i,k}-X2_{j,l}|^{2}*Ts_{i,j}*Tv_{k,l} 82 | 83 | Where : 84 | - X1 : The source dataset 85 | - X2 : The target dataset 86 | - w1,w2 : weights (histograms) on the samples (rows) of resp. X1 and X2 87 | - v1,v2 : weights (histograms) on the features (columns) of resp. X1 and X2 88 | 89 | Parameters 90 | ---------- 91 | X1 : numpy array, shape (n, d) 92 | Source dataset 93 | X2 : numpy array, shape (n', d') 94 | Target dataset 95 | w1 : numpy array, shape (n,) 96 | Weight (histogram) on the samples of X1. If None uniform distribution is considered. 97 | w2 : numpy array, shape (n',) 98 | Weight (histogram) on the samples of X2. If None uniform distribution is considered. 99 | v1 : numpy array, shape (d,) 100 | Weight (histogram) on the features of X1. If None uniform distribution is considered. 101 | v2 : numpy array, shape (d',) 102 | Weight (histogram) on the features of X2. If None uniform distribution is considered. 103 | niter : integer 104 | Number max of iterations of the BCD for solving COOT. 105 | algo : string 106 | Choice of algorithm for solving OT problems on samples each iteration. Choice ['emd','sinkhorn']. 107 | If 'emd' returns sparse solution 108 | If 'sinkhorn' returns regularized solution 109 | algo2 : string 110 | Choice of algorithm for solving OT problems on features each iteration. Choice ['emd','sinkhorn']. 111 | If 'emd' returns sparse solution 112 | If 'sinkhorn' returns regularized solution 113 | reg : float 114 | Regularization parameter for samples coupling matrix. Ignored if algo='emd' 115 | reg2 : float 116 | Regularization parameter for features coupling matrix. Ignored if algo='emd' 117 | eps : float 118 | Threshold for the convergence 119 | random_init : bool 120 | Wether to use random initialization for the coupling matrices. If false identity couplings are considered. 121 | log : bool, optional 122 | record log if True 123 | C_lin : numpy array, shape (n, n') 124 | Prior on the sample correspondences. Added to the cost for the samples transport 125 | 126 | Returns 127 | ------- 128 | Ts : numpy array, shape (n,n') 129 | Optimal Transport coupling between the samples 130 | Tv : numpy array, shape (d,d') 131 | Optimal Transport coupling between the features 132 | cost : float 133 | Optimization value after convergence 134 | log : dict 135 | convergence information and coupling marices 136 | References 137 | ---------- 138 | .. [1] Redko Ievgen, Vayer Titouan, Flamary R{\'e}mi and Courty Nicolas 139 | "CO-Optimal Transport" 140 | Example 141 | ---------- 142 | import numpy as np 143 | from cot import cot_numpy 144 | 145 | n_samples=300 146 | Xs=np.random.rand(n_samples,2) 147 | Xt=np.random.rand(n_samples,1) 148 | cot_numpy(Xs,Xt) 149 | """ 150 | if v1 is None: 151 | v1 = np.ones(X1.shape[1]) / X1.shape[1] # is (d,) 152 | if v2 is None: 153 | v2 = np.ones(X2.shape[1]) / X2.shape[1] # is (d',) 154 | if w1 is None: 155 | w1 = np.ones(X1.shape[0]) / X1.shape[0] # is (n',) 156 | if w2 is None: 157 | w2 = np.ones(X2.shape[0]) / X2.shape[0] # is (n,) 158 | 159 | if not random_init: 160 | Ts = np.ones((X1.shape[0], X2.shape[0])) / (X1.shape[0] * X2.shape[0]) # is (n,n') 161 | Tv = np.ones((X1.shape[1], X2.shape[1])) / (X1.shape[1] * X2.shape[1]) # is (d,d') 162 | else: 163 | Ts=random_gamma_init(w1,w2) 164 | Tv=random_gamma_init(v1,v2) 165 | 166 | 167 | constC_s, hC1_s, hC2_s = init_matrix_np(X1, X2, v1, v2) 168 | 169 | constC_v, hC1_v, hC2_v = init_matrix_np(X1.T, X2.T, w1, w2) 170 | cost = np.inf 171 | 172 | log_out ={} 173 | log_out['cost'] = [] 174 | 175 | for i in range(niter): 176 | Tsold = Ts 177 | Tvold = Tv 178 | costold = cost 179 | 180 | M = constC_s - np.dot(hC1_s, Tv).dot(hC2_s.T) 181 | if C_lin is not None: 182 | M=M+C_lin 183 | if algo == 'emd': 184 | Ts = ot.emd(w1, w2, M, numItermax=1e7) 185 | elif algo == 'sinkhorn': 186 | Ts = ot.sinkhorn(w1, w2, M, reg) 187 | 188 | M = constC_v - np.dot(hC1_v, Ts).dot(hC2_v.T) 189 | 190 | if algo2 == 'emd': 191 | Tv = ot.emd(v1, v2, M, numItermax=1e7) 192 | elif algo2 == 'sinkhorn': 193 | Tv = ot.sinkhorn(v1,v2, M, reg2) 194 | 195 | delta = np.linalg.norm(Ts - Tsold) + np.linalg.norm(Tv - Tvold) 196 | cost = np.sum(M * Tv) 197 | 198 | if log: 199 | log_out['cost'].append(cost) 200 | 201 | if verbose: 202 | print('Delta: {0} Loss: {1}'.format(delta, cost)) 203 | 204 | if delta < 1e-16 or np.abs(costold - cost) < 1e-7: 205 | if verbose: 206 | print('converged at iter ', i) 207 | break 208 | if log: 209 | return Ts, Tv, cost, log_out 210 | else: 211 | return Ts, Tv, cost 212 | 213 | 214 | -------------------------------------------------------------------------------- /data/features/CaffeNet4096/amazon.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/features/CaffeNet4096/amazon.mat -------------------------------------------------------------------------------- /data/features/CaffeNet4096/caltech10.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/features/CaffeNet4096/caltech10.mat -------------------------------------------------------------------------------- /data/features/CaffeNet4096/dslr.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/features/CaffeNet4096/dslr.mat -------------------------------------------------------------------------------- /data/features/CaffeNet4096/webcam.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/features/CaffeNet4096/webcam.mat -------------------------------------------------------------------------------- /data/features/GoogleNet1024/amazon.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/features/GoogleNet1024/amazon.mat -------------------------------------------------------------------------------- /data/features/GoogleNet1024/caltech10.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/features/GoogleNet1024/caltech10.mat -------------------------------------------------------------------------------- /data/features/GoogleNet1024/dslr.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/features/GoogleNet1024/dslr.mat -------------------------------------------------------------------------------- /data/features/GoogleNet1024/webcam.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/features/GoogleNet1024/webcam.mat -------------------------------------------------------------------------------- /data/ml-100k/u.genre: -------------------------------------------------------------------------------- 1 | unknown|0 2 | Action|1 3 | Adventure|2 4 | Animation|3 5 | Children's|4 6 | Comedy|5 7 | Crime|6 8 | Documentary|7 9 | Drama|8 10 | Fantasy|9 11 | Film-Noir|10 12 | Horror|11 13 | Musical|12 14 | Mystery|13 15 | Romance|14 16 | Sci-Fi|15 17 | Thriller|16 18 | War|17 19 | Western|18 20 | 21 | -------------------------------------------------------------------------------- /data/ml-100k/u.item: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/ml-100k/u.item -------------------------------------------------------------------------------- /data/ml-100k/u.user: -------------------------------------------------------------------------------- 1 | 1|24|M|technician|85711 2 | 2|53|F|other|94043 3 | 3|23|M|writer|32067 4 | 4|24|M|technician|43537 5 | 5|33|F|other|15213 6 | 6|42|M|executive|98101 7 | 7|57|M|administrator|91344 8 | 8|36|M|administrator|05201 9 | 9|29|M|student|01002 10 | 10|53|M|lawyer|90703 11 | 11|39|F|other|30329 12 | 12|28|F|other|06405 13 | 13|47|M|educator|29206 14 | 14|45|M|scientist|55106 15 | 15|49|F|educator|97301 16 | 16|21|M|entertainment|10309 17 | 17|30|M|programmer|06355 18 | 18|35|F|other|37212 19 | 19|40|M|librarian|02138 20 | 20|42|F|homemaker|95660 21 | 21|26|M|writer|30068 22 | 22|25|M|writer|40206 23 | 23|30|F|artist|48197 24 | 24|21|F|artist|94533 25 | 25|39|M|engineer|55107 26 | 26|49|M|engineer|21044 27 | 27|40|F|librarian|30030 28 | 28|32|M|writer|55369 29 | 29|41|M|programmer|94043 30 | 30|7|M|student|55436 31 | 31|24|M|artist|10003 32 | 32|28|F|student|78741 33 | 33|23|M|student|27510 34 | 34|38|F|administrator|42141 35 | 35|20|F|homemaker|42459 36 | 36|19|F|student|93117 37 | 37|23|M|student|55105 38 | 38|28|F|other|54467 39 | 39|41|M|entertainment|01040 40 | 40|38|M|scientist|27514 41 | 41|33|M|engineer|80525 42 | 42|30|M|administrator|17870 43 | 43|29|F|librarian|20854 44 | 44|26|M|technician|46260 45 | 45|29|M|programmer|50233 46 | 46|27|F|marketing|46538 47 | 47|53|M|marketing|07102 48 | 48|45|M|administrator|12550 49 | 49|23|F|student|76111 50 | 50|21|M|writer|52245 51 | 51|28|M|educator|16509 52 | 52|18|F|student|55105 53 | 53|26|M|programmer|55414 54 | 54|22|M|executive|66315 55 | 55|37|M|programmer|01331 56 | 56|25|M|librarian|46260 57 | 57|16|M|none|84010 58 | 58|27|M|programmer|52246 59 | 59|49|M|educator|08403 60 | 60|50|M|healthcare|06472 61 | 61|36|M|engineer|30040 62 | 62|27|F|administrator|97214 63 | 63|31|M|marketing|75240 64 | 64|32|M|educator|43202 65 | 65|51|F|educator|48118 66 | 66|23|M|student|80521 67 | 67|17|M|student|60402 68 | 68|19|M|student|22904 69 | 69|24|M|engineer|55337 70 | 70|27|M|engineer|60067 71 | 71|39|M|scientist|98034 72 | 72|48|F|administrator|73034 73 | 73|24|M|student|41850 74 | 74|39|M|scientist|T8H1N 75 | 75|24|M|entertainment|08816 76 | 76|20|M|student|02215 77 | 77|30|M|technician|29379 78 | 78|26|M|administrator|61801 79 | 79|39|F|administrator|03755 80 | 80|34|F|administrator|52241 81 | 81|21|M|student|21218 82 | 82|50|M|programmer|22902 83 | 83|40|M|other|44133 84 | 84|32|M|executive|55369 85 | 85|51|M|educator|20003 86 | 86|26|M|administrator|46005 87 | 87|47|M|administrator|89503 88 | 88|49|F|librarian|11701 89 | 89|43|F|administrator|68106 90 | 90|60|M|educator|78155 91 | 91|55|M|marketing|01913 92 | 92|32|M|entertainment|80525 93 | 93|48|M|executive|23112 94 | 94|26|M|student|71457 95 | 95|31|M|administrator|10707 96 | 96|25|F|artist|75206 97 | 97|43|M|artist|98006 98 | 98|49|F|executive|90291 99 | 99|20|M|student|63129 100 | 100|36|M|executive|90254 101 | 101|15|M|student|05146 102 | 102|38|M|programmer|30220 103 | 103|26|M|student|55108 104 | 104|27|M|student|55108 105 | 105|24|M|engineer|94043 106 | 106|61|M|retired|55125 107 | 107|39|M|scientist|60466 108 | 108|44|M|educator|63130 109 | 109|29|M|other|55423 110 | 110|19|M|student|77840 111 | 111|57|M|engineer|90630 112 | 112|30|M|salesman|60613 113 | 113|47|M|executive|95032 114 | 114|27|M|programmer|75013 115 | 115|31|M|engineer|17110 116 | 116|40|M|healthcare|97232 117 | 117|20|M|student|16125 118 | 118|21|M|administrator|90210 119 | 119|32|M|programmer|67401 120 | 120|47|F|other|06260 121 | 121|54|M|librarian|99603 122 | 122|32|F|writer|22206 123 | 123|48|F|artist|20008 124 | 124|34|M|student|60615 125 | 125|30|M|lawyer|22202 126 | 126|28|F|lawyer|20015 127 | 127|33|M|none|73439 128 | 128|24|F|marketing|20009 129 | 129|36|F|marketing|07039 130 | 130|20|M|none|60115 131 | 131|59|F|administrator|15237 132 | 132|24|M|other|94612 133 | 133|53|M|engineer|78602 134 | 134|31|M|programmer|80236 135 | 135|23|M|student|38401 136 | 136|51|M|other|97365 137 | 137|50|M|educator|84408 138 | 138|46|M|doctor|53211 139 | 139|20|M|student|08904 140 | 140|30|F|student|32250 141 | 141|49|M|programmer|36117 142 | 142|13|M|other|48118 143 | 143|42|M|technician|08832 144 | 144|53|M|programmer|20910 145 | 145|31|M|entertainment|V3N4P 146 | 146|45|M|artist|83814 147 | 147|40|F|librarian|02143 148 | 148|33|M|engineer|97006 149 | 149|35|F|marketing|17325 150 | 150|20|F|artist|02139 151 | 151|38|F|administrator|48103 152 | 152|33|F|educator|68767 153 | 153|25|M|student|60641 154 | 154|25|M|student|53703 155 | 155|32|F|other|11217 156 | 156|25|M|educator|08360 157 | 157|57|M|engineer|70808 158 | 158|50|M|educator|27606 159 | 159|23|F|student|55346 160 | 160|27|M|programmer|66215 161 | 161|50|M|lawyer|55104 162 | 162|25|M|artist|15610 163 | 163|49|M|administrator|97212 164 | 164|47|M|healthcare|80123 165 | 165|20|F|other|53715 166 | 166|47|M|educator|55113 167 | 167|37|M|other|L9G2B 168 | 168|48|M|other|80127 169 | 169|52|F|other|53705 170 | 170|53|F|healthcare|30067 171 | 171|48|F|educator|78750 172 | 172|55|M|marketing|22207 173 | 173|56|M|other|22306 174 | 174|30|F|administrator|52302 175 | 175|26|F|scientist|21911 176 | 176|28|M|scientist|07030 177 | 177|20|M|programmer|19104 178 | 178|26|M|other|49512 179 | 179|15|M|entertainment|20755 180 | 180|22|F|administrator|60202 181 | 181|26|M|executive|21218 182 | 182|36|M|programmer|33884 183 | 183|33|M|scientist|27708 184 | 184|37|M|librarian|76013 185 | 185|53|F|librarian|97403 186 | 186|39|F|executive|00000 187 | 187|26|M|educator|16801 188 | 188|42|M|student|29440 189 | 189|32|M|artist|95014 190 | 190|30|M|administrator|95938 191 | 191|33|M|administrator|95161 192 | 192|42|M|educator|90840 193 | 193|29|M|student|49931 194 | 194|38|M|administrator|02154 195 | 195|42|M|scientist|93555 196 | 196|49|M|writer|55105 197 | 197|55|M|technician|75094 198 | 198|21|F|student|55414 199 | 199|30|M|writer|17604 200 | 200|40|M|programmer|93402 201 | 201|27|M|writer|E2A4H 202 | 202|41|F|educator|60201 203 | 203|25|F|student|32301 204 | 204|52|F|librarian|10960 205 | 205|47|M|lawyer|06371 206 | 206|14|F|student|53115 207 | 207|39|M|marketing|92037 208 | 208|43|M|engineer|01720 209 | 209|33|F|educator|85710 210 | 210|39|M|engineer|03060 211 | 211|66|M|salesman|32605 212 | 212|49|F|educator|61401 213 | 213|33|M|executive|55345 214 | 214|26|F|librarian|11231 215 | 215|35|M|programmer|63033 216 | 216|22|M|engineer|02215 217 | 217|22|M|other|11727 218 | 218|37|M|administrator|06513 219 | 219|32|M|programmer|43212 220 | 220|30|M|librarian|78205 221 | 221|19|M|student|20685 222 | 222|29|M|programmer|27502 223 | 223|19|F|student|47906 224 | 224|31|F|educator|43512 225 | 225|51|F|administrator|58202 226 | 226|28|M|student|92103 227 | 227|46|M|executive|60659 228 | 228|21|F|student|22003 229 | 229|29|F|librarian|22903 230 | 230|28|F|student|14476 231 | 231|48|M|librarian|01080 232 | 232|45|M|scientist|99709 233 | 233|38|M|engineer|98682 234 | 234|60|M|retired|94702 235 | 235|37|M|educator|22973 236 | 236|44|F|writer|53214 237 | 237|49|M|administrator|63146 238 | 238|42|F|administrator|44124 239 | 239|39|M|artist|95628 240 | 240|23|F|educator|20784 241 | 241|26|F|student|20001 242 | 242|33|M|educator|31404 243 | 243|33|M|educator|60201 244 | 244|28|M|technician|80525 245 | 245|22|M|student|55109 246 | 246|19|M|student|28734 247 | 247|28|M|engineer|20770 248 | 248|25|M|student|37235 249 | 249|25|M|student|84103 250 | 250|29|M|executive|95110 251 | 251|28|M|doctor|85032 252 | 252|42|M|engineer|07733 253 | 253|26|F|librarian|22903 254 | 254|44|M|educator|42647 255 | 255|23|M|entertainment|07029 256 | 256|35|F|none|39042 257 | 257|17|M|student|77005 258 | 258|19|F|student|77801 259 | 259|21|M|student|48823 260 | 260|40|F|artist|89801 261 | 261|28|M|administrator|85202 262 | 262|19|F|student|78264 263 | 263|41|M|programmer|55346 264 | 264|36|F|writer|90064 265 | 265|26|M|executive|84601 266 | 266|62|F|administrator|78756 267 | 267|23|M|engineer|83716 268 | 268|24|M|engineer|19422 269 | 269|31|F|librarian|43201 270 | 270|18|F|student|63119 271 | 271|51|M|engineer|22932 272 | 272|33|M|scientist|53706 273 | 273|50|F|other|10016 274 | 274|20|F|student|55414 275 | 275|38|M|engineer|92064 276 | 276|21|M|student|95064 277 | 277|35|F|administrator|55406 278 | 278|37|F|librarian|30033 279 | 279|33|M|programmer|85251 280 | 280|30|F|librarian|22903 281 | 281|15|F|student|06059 282 | 282|22|M|administrator|20057 283 | 283|28|M|programmer|55305 284 | 284|40|M|executive|92629 285 | 285|25|M|programmer|53713 286 | 286|27|M|student|15217 287 | 287|21|M|salesman|31211 288 | 288|34|M|marketing|23226 289 | 289|11|M|none|94619 290 | 290|40|M|engineer|93550 291 | 291|19|M|student|44106 292 | 292|35|F|programmer|94703 293 | 293|24|M|writer|60804 294 | 294|34|M|technician|92110 295 | 295|31|M|educator|50325 296 | 296|43|F|administrator|16803 297 | 297|29|F|educator|98103 298 | 298|44|M|executive|01581 299 | 299|29|M|doctor|63108 300 | 300|26|F|programmer|55106 301 | 301|24|M|student|55439 302 | 302|42|M|educator|77904 303 | 303|19|M|student|14853 304 | 304|22|F|student|71701 305 | 305|23|M|programmer|94086 306 | 306|45|M|other|73132 307 | 307|25|M|student|55454 308 | 308|60|M|retired|95076 309 | 309|40|M|scientist|70802 310 | 310|37|M|educator|91711 311 | 311|32|M|technician|73071 312 | 312|48|M|other|02110 313 | 313|41|M|marketing|60035 314 | 314|20|F|student|08043 315 | 315|31|M|educator|18301 316 | 316|43|F|other|77009 317 | 317|22|M|administrator|13210 318 | 318|65|M|retired|06518 319 | 319|38|M|programmer|22030 320 | 320|19|M|student|24060 321 | 321|49|F|educator|55413 322 | 322|20|M|student|50613 323 | 323|21|M|student|19149 324 | 324|21|F|student|02176 325 | 325|48|M|technician|02139 326 | 326|41|M|administrator|15235 327 | 327|22|M|student|11101 328 | 328|51|M|administrator|06779 329 | 329|48|M|educator|01720 330 | 330|35|F|educator|33884 331 | 331|33|M|entertainment|91344 332 | 332|20|M|student|40504 333 | 333|47|M|other|V0R2M 334 | 334|32|M|librarian|30002 335 | 335|45|M|executive|33775 336 | 336|23|M|salesman|42101 337 | 337|37|M|scientist|10522 338 | 338|39|F|librarian|59717 339 | 339|35|M|lawyer|37901 340 | 340|46|M|engineer|80123 341 | 341|17|F|student|44405 342 | 342|25|F|other|98006 343 | 343|43|M|engineer|30093 344 | 344|30|F|librarian|94117 345 | 345|28|F|librarian|94143 346 | 346|34|M|other|76059 347 | 347|18|M|student|90210 348 | 348|24|F|student|45660 349 | 349|68|M|retired|61455 350 | 350|32|M|student|97301 351 | 351|61|M|educator|49938 352 | 352|37|F|programmer|55105 353 | 353|25|M|scientist|28480 354 | 354|29|F|librarian|48197 355 | 355|25|M|student|60135 356 | 356|32|F|homemaker|92688 357 | 357|26|M|executive|98133 358 | 358|40|M|educator|10022 359 | 359|22|M|student|61801 360 | 360|51|M|other|98027 361 | 361|22|M|student|44074 362 | 362|35|F|homemaker|85233 363 | 363|20|M|student|87501 364 | 364|63|M|engineer|01810 365 | 365|29|M|lawyer|20009 366 | 366|20|F|student|50670 367 | 367|17|M|student|37411 368 | 368|18|M|student|92113 369 | 369|24|M|student|91335 370 | 370|52|M|writer|08534 371 | 371|36|M|engineer|99206 372 | 372|25|F|student|66046 373 | 373|24|F|other|55116 374 | 374|36|M|executive|78746 375 | 375|17|M|entertainment|37777 376 | 376|28|F|other|10010 377 | 377|22|M|student|18015 378 | 378|35|M|student|02859 379 | 379|44|M|programmer|98117 380 | 380|32|M|engineer|55117 381 | 381|33|M|artist|94608 382 | 382|45|M|engineer|01824 383 | 383|42|M|administrator|75204 384 | 384|52|M|programmer|45218 385 | 385|36|M|writer|10003 386 | 386|36|M|salesman|43221 387 | 387|33|M|entertainment|37412 388 | 388|31|M|other|36106 389 | 389|44|F|writer|83702 390 | 390|42|F|writer|85016 391 | 391|23|M|student|84604 392 | 392|52|M|writer|59801 393 | 393|19|M|student|83686 394 | 394|25|M|administrator|96819 395 | 395|43|M|other|44092 396 | 396|57|M|engineer|94551 397 | 397|17|M|student|27514 398 | 398|40|M|other|60008 399 | 399|25|M|other|92374 400 | 400|33|F|administrator|78213 401 | 401|46|F|healthcare|84107 402 | 402|30|M|engineer|95129 403 | 403|37|M|other|06811 404 | 404|29|F|programmer|55108 405 | 405|22|F|healthcare|10019 406 | 406|52|M|educator|93109 407 | 407|29|M|engineer|03261 408 | 408|23|M|student|61755 409 | 409|48|M|administrator|98225 410 | 410|30|F|artist|94025 411 | 411|34|M|educator|44691 412 | 412|25|M|educator|15222 413 | 413|55|M|educator|78212 414 | 414|24|M|programmer|38115 415 | 415|39|M|educator|85711 416 | 416|20|F|student|92626 417 | 417|27|F|other|48103 418 | 418|55|F|none|21206 419 | 419|37|M|lawyer|43215 420 | 420|53|M|educator|02140 421 | 421|38|F|programmer|55105 422 | 422|26|M|entertainment|94533 423 | 423|64|M|other|91606 424 | 424|36|F|marketing|55422 425 | 425|19|M|student|58644 426 | 426|55|M|educator|01602 427 | 427|51|M|doctor|85258 428 | 428|28|M|student|55414 429 | 429|27|M|student|29205 430 | 430|38|M|scientist|98199 431 | 431|24|M|marketing|92629 432 | 432|22|M|entertainment|50311 433 | 433|27|M|artist|11211 434 | 434|16|F|student|49705 435 | 435|24|M|engineer|60007 436 | 436|30|F|administrator|17345 437 | 437|27|F|other|20009 438 | 438|51|F|administrator|43204 439 | 439|23|F|administrator|20817 440 | 440|30|M|other|48076 441 | 441|50|M|technician|55013 442 | 442|22|M|student|85282 443 | 443|35|M|salesman|33308 444 | 444|51|F|lawyer|53202 445 | 445|21|M|writer|92653 446 | 446|57|M|educator|60201 447 | 447|30|M|administrator|55113 448 | 448|23|M|entertainment|10021 449 | 449|23|M|librarian|55021 450 | 450|35|F|educator|11758 451 | 451|16|M|student|48446 452 | 452|35|M|administrator|28018 453 | 453|18|M|student|06333 454 | 454|57|M|other|97330 455 | 455|48|M|administrator|83709 456 | 456|24|M|technician|31820 457 | 457|33|F|salesman|30011 458 | 458|47|M|technician|Y1A6B 459 | 459|22|M|student|29201 460 | 460|44|F|other|60630 461 | 461|15|M|student|98102 462 | 462|19|F|student|02918 463 | 463|48|F|healthcare|75218 464 | 464|60|M|writer|94583 465 | 465|32|M|other|05001 466 | 466|22|M|student|90804 467 | 467|29|M|engineer|91201 468 | 468|28|M|engineer|02341 469 | 469|60|M|educator|78628 470 | 470|24|M|programmer|10021 471 | 471|10|M|student|77459 472 | 472|24|M|student|87544 473 | 473|29|M|student|94708 474 | 474|51|M|executive|93711 475 | 475|30|M|programmer|75230 476 | 476|28|M|student|60440 477 | 477|23|F|student|02125 478 | 478|29|M|other|10019 479 | 479|30|M|educator|55409 480 | 480|57|M|retired|98257 481 | 481|73|M|retired|37771 482 | 482|18|F|student|40256 483 | 483|29|M|scientist|43212 484 | 484|27|M|student|21208 485 | 485|44|F|educator|95821 486 | 486|39|M|educator|93101 487 | 487|22|M|engineer|92121 488 | 488|48|M|technician|21012 489 | 489|55|M|other|45218 490 | 490|29|F|artist|V5A2B 491 | 491|43|F|writer|53711 492 | 492|57|M|educator|94618 493 | 493|22|M|engineer|60090 494 | 494|38|F|administrator|49428 495 | 495|29|M|engineer|03052 496 | 496|21|F|student|55414 497 | 497|20|M|student|50112 498 | 498|26|M|writer|55408 499 | 499|42|M|programmer|75006 500 | 500|28|M|administrator|94305 501 | 501|22|M|student|10025 502 | 502|22|M|student|23092 503 | 503|50|F|writer|27514 504 | 504|40|F|writer|92115 505 | 505|27|F|other|20657 506 | 506|46|M|programmer|03869 507 | 507|18|F|writer|28450 508 | 508|27|M|marketing|19382 509 | 509|23|M|administrator|10011 510 | 510|34|M|other|98038 511 | 511|22|M|student|21250 512 | 512|29|M|other|20090 513 | 513|43|M|administrator|26241 514 | 514|27|M|programmer|20707 515 | 515|53|M|marketing|49508 516 | 516|53|F|librarian|10021 517 | 517|24|M|student|55454 518 | 518|49|F|writer|99709 519 | 519|22|M|other|55320 520 | 520|62|M|healthcare|12603 521 | 521|19|M|student|02146 522 | 522|36|M|engineer|55443 523 | 523|50|F|administrator|04102 524 | 524|56|M|educator|02159 525 | 525|27|F|administrator|19711 526 | 526|30|M|marketing|97124 527 | 527|33|M|librarian|12180 528 | 528|18|M|student|55104 529 | 529|47|F|administrator|44224 530 | 530|29|M|engineer|94040 531 | 531|30|F|salesman|97408 532 | 532|20|M|student|92705 533 | 533|43|M|librarian|02324 534 | 534|20|M|student|05464 535 | 535|45|F|educator|80302 536 | 536|38|M|engineer|30078 537 | 537|36|M|engineer|22902 538 | 538|31|M|scientist|21010 539 | 539|53|F|administrator|80303 540 | 540|28|M|engineer|91201 541 | 541|19|F|student|84302 542 | 542|21|M|student|60515 543 | 543|33|M|scientist|95123 544 | 544|44|F|other|29464 545 | 545|27|M|technician|08052 546 | 546|36|M|executive|22911 547 | 547|50|M|educator|14534 548 | 548|51|M|writer|95468 549 | 549|42|M|scientist|45680 550 | 550|16|F|student|95453 551 | 551|25|M|programmer|55414 552 | 552|45|M|other|68147 553 | 553|58|M|educator|62901 554 | 554|32|M|scientist|62901 555 | 555|29|F|educator|23227 556 | 556|35|F|educator|30606 557 | 557|30|F|writer|11217 558 | 558|56|F|writer|63132 559 | 559|69|M|executive|10022 560 | 560|32|M|student|10003 561 | 561|23|M|engineer|60005 562 | 562|54|F|administrator|20879 563 | 563|39|F|librarian|32707 564 | 564|65|M|retired|94591 565 | 565|40|M|student|55422 566 | 566|20|M|student|14627 567 | 567|24|M|entertainment|10003 568 | 568|39|M|educator|01915 569 | 569|34|M|educator|91903 570 | 570|26|M|educator|14627 571 | 571|34|M|artist|01945 572 | 572|51|M|educator|20003 573 | 573|68|M|retired|48911 574 | 574|56|M|educator|53188 575 | 575|33|M|marketing|46032 576 | 576|48|M|executive|98281 577 | 577|36|F|student|77845 578 | 578|31|M|administrator|M7A1A 579 | 579|32|M|educator|48103 580 | 580|16|M|student|17961 581 | 581|37|M|other|94131 582 | 582|17|M|student|93003 583 | 583|44|M|engineer|29631 584 | 584|25|M|student|27511 585 | 585|69|M|librarian|98501 586 | 586|20|M|student|79508 587 | 587|26|M|other|14216 588 | 588|18|F|student|93063 589 | 589|21|M|lawyer|90034 590 | 590|50|M|educator|82435 591 | 591|57|F|librarian|92093 592 | 592|18|M|student|97520 593 | 593|31|F|educator|68767 594 | 594|46|M|educator|M4J2K 595 | 595|25|M|programmer|31909 596 | 596|20|M|artist|77073 597 | 597|23|M|other|84116 598 | 598|40|F|marketing|43085 599 | 599|22|F|student|R3T5K 600 | 600|34|M|programmer|02320 601 | 601|19|F|artist|99687 602 | 602|47|F|other|34656 603 | 603|21|M|programmer|47905 604 | 604|39|M|educator|11787 605 | 605|33|M|engineer|33716 606 | 606|28|M|programmer|63044 607 | 607|49|F|healthcare|02154 608 | 608|22|M|other|10003 609 | 609|13|F|student|55106 610 | 610|22|M|student|21227 611 | 611|46|M|librarian|77008 612 | 612|36|M|educator|79070 613 | 613|37|F|marketing|29678 614 | 614|54|M|educator|80227 615 | 615|38|M|educator|27705 616 | 616|55|M|scientist|50613 617 | 617|27|F|writer|11201 618 | 618|15|F|student|44212 619 | 619|17|M|student|44134 620 | 620|18|F|writer|81648 621 | 621|17|M|student|60402 622 | 622|25|M|programmer|14850 623 | 623|50|F|educator|60187 624 | 624|19|M|student|30067 625 | 625|27|M|programmer|20723 626 | 626|23|M|scientist|19807 627 | 627|24|M|engineer|08034 628 | 628|13|M|none|94306 629 | 629|46|F|other|44224 630 | 630|26|F|healthcare|55408 631 | 631|18|F|student|38866 632 | 632|18|M|student|55454 633 | 633|35|M|programmer|55414 634 | 634|39|M|engineer|T8H1N 635 | 635|22|M|other|23237 636 | 636|47|M|educator|48043 637 | 637|30|M|other|74101 638 | 638|45|M|engineer|01940 639 | 639|42|F|librarian|12065 640 | 640|20|M|student|61801 641 | 641|24|M|student|60626 642 | 642|18|F|student|95521 643 | 643|39|M|scientist|55122 644 | 644|51|M|retired|63645 645 | 645|27|M|programmer|53211 646 | 646|17|F|student|51250 647 | 647|40|M|educator|45810 648 | 648|43|M|engineer|91351 649 | 649|20|M|student|39762 650 | 650|42|M|engineer|83814 651 | 651|65|M|retired|02903 652 | 652|35|M|other|22911 653 | 653|31|M|executive|55105 654 | 654|27|F|student|78739 655 | 655|50|F|healthcare|60657 656 | 656|48|M|educator|10314 657 | 657|26|F|none|78704 658 | 658|33|M|programmer|92626 659 | 659|31|M|educator|54248 660 | 660|26|M|student|77380 661 | 661|28|M|programmer|98121 662 | 662|55|M|librarian|19102 663 | 663|26|M|other|19341 664 | 664|30|M|engineer|94115 665 | 665|25|M|administrator|55412 666 | 666|44|M|administrator|61820 667 | 667|35|M|librarian|01970 668 | 668|29|F|writer|10016 669 | 669|37|M|other|20009 670 | 670|30|M|technician|21114 671 | 671|21|M|programmer|91919 672 | 672|54|F|administrator|90095 673 | 673|51|M|educator|22906 674 | 674|13|F|student|55337 675 | 675|34|M|other|28814 676 | 676|30|M|programmer|32712 677 | 677|20|M|other|99835 678 | 678|50|M|educator|61462 679 | 679|20|F|student|54302 680 | 680|33|M|lawyer|90405 681 | 681|44|F|marketing|97208 682 | 682|23|M|programmer|55128 683 | 683|42|M|librarian|23509 684 | 684|28|M|student|55414 685 | 685|32|F|librarian|55409 686 | 686|32|M|educator|26506 687 | 687|31|F|healthcare|27713 688 | 688|37|F|administrator|60476 689 | 689|25|M|other|45439 690 | 690|35|M|salesman|63304 691 | 691|34|M|educator|60089 692 | 692|34|M|engineer|18053 693 | 693|43|F|healthcare|85210 694 | 694|60|M|programmer|06365 695 | 695|26|M|writer|38115 696 | 696|55|M|other|94920 697 | 697|25|M|other|77042 698 | 698|28|F|programmer|06906 699 | 699|44|M|other|96754 700 | 700|17|M|student|76309 701 | 701|51|F|librarian|56321 702 | 702|37|M|other|89104 703 | 703|26|M|educator|49512 704 | 704|51|F|librarian|91105 705 | 705|21|F|student|54494 706 | 706|23|M|student|55454 707 | 707|56|F|librarian|19146 708 | 708|26|F|homemaker|96349 709 | 709|21|M|other|N4T1A 710 | 710|19|M|student|92020 711 | 711|22|F|student|15203 712 | 712|22|F|student|54901 713 | 713|42|F|other|07204 714 | 714|26|M|engineer|55343 715 | 715|21|M|technician|91206 716 | 716|36|F|administrator|44265 717 | 717|24|M|technician|84105 718 | 718|42|M|technician|64118 719 | 719|37|F|other|V0R2H 720 | 720|49|F|administrator|16506 721 | 721|24|F|entertainment|11238 722 | 722|50|F|homemaker|17331 723 | 723|26|M|executive|94403 724 | 724|31|M|executive|40243 725 | 725|21|M|student|91711 726 | 726|25|F|administrator|80538 727 | 727|25|M|student|78741 728 | 728|58|M|executive|94306 729 | 729|19|M|student|56567 730 | 730|31|F|scientist|32114 731 | 731|41|F|educator|70403 732 | 732|28|F|other|98405 733 | 733|44|F|other|60630 734 | 734|25|F|other|63108 735 | 735|29|F|healthcare|85719 736 | 736|48|F|writer|94618 737 | 737|30|M|programmer|98072 738 | 738|35|M|technician|95403 739 | 739|35|M|technician|73162 740 | 740|25|F|educator|22206 741 | 741|25|M|writer|63108 742 | 742|35|M|student|29210 743 | 743|31|M|programmer|92660 744 | 744|35|M|marketing|47024 745 | 745|42|M|writer|55113 746 | 746|25|M|engineer|19047 747 | 747|19|M|other|93612 748 | 748|28|M|administrator|94720 749 | 749|33|M|other|80919 750 | 750|28|M|administrator|32303 751 | 751|24|F|other|90034 752 | 752|60|M|retired|21201 753 | 753|56|M|salesman|91206 754 | 754|59|F|librarian|62901 755 | 755|44|F|educator|97007 756 | 756|30|F|none|90247 757 | 757|26|M|student|55104 758 | 758|27|M|student|53706 759 | 759|20|F|student|68503 760 | 760|35|F|other|14211 761 | 761|17|M|student|97302 762 | 762|32|M|administrator|95050 763 | 763|27|M|scientist|02113 764 | 764|27|F|educator|62903 765 | 765|31|M|student|33066 766 | 766|42|M|other|10960 767 | 767|70|M|engineer|00000 768 | 768|29|M|administrator|12866 769 | 769|39|M|executive|06927 770 | 770|28|M|student|14216 771 | 771|26|M|student|15232 772 | 772|50|M|writer|27105 773 | 773|20|M|student|55414 774 | 774|30|M|student|80027 775 | 775|46|M|executive|90036 776 | 776|30|M|librarian|51157 777 | 777|63|M|programmer|01810 778 | 778|34|M|student|01960 779 | 779|31|M|student|K7L5J 780 | 780|49|M|programmer|94560 781 | 781|20|M|student|48825 782 | 782|21|F|artist|33205 783 | 783|30|M|marketing|77081 784 | 784|47|M|administrator|91040 785 | 785|32|M|engineer|23322 786 | 786|36|F|engineer|01754 787 | 787|18|F|student|98620 788 | 788|51|M|administrator|05779 789 | 789|29|M|other|55420 790 | 790|27|M|technician|80913 791 | 791|31|M|educator|20064 792 | 792|40|M|programmer|12205 793 | 793|22|M|student|85281 794 | 794|32|M|educator|57197 795 | 795|30|M|programmer|08610 796 | 796|32|F|writer|33755 797 | 797|44|F|other|62522 798 | 798|40|F|writer|64131 799 | 799|49|F|administrator|19716 800 | 800|25|M|programmer|55337 801 | 801|22|M|writer|92154 802 | 802|35|M|administrator|34105 803 | 803|70|M|administrator|78212 804 | 804|39|M|educator|61820 805 | 805|27|F|other|20009 806 | 806|27|M|marketing|11217 807 | 807|41|F|healthcare|93555 808 | 808|45|M|salesman|90016 809 | 809|50|F|marketing|30803 810 | 810|55|F|other|80526 811 | 811|40|F|educator|73013 812 | 812|22|M|technician|76234 813 | 813|14|F|student|02136 814 | 814|30|M|other|12345 815 | 815|32|M|other|28806 816 | 816|34|M|other|20755 817 | 817|19|M|student|60152 818 | 818|28|M|librarian|27514 819 | 819|59|M|administrator|40205 820 | 820|22|M|student|37725 821 | 821|37|M|engineer|77845 822 | 822|29|F|librarian|53144 823 | 823|27|M|artist|50322 824 | 824|31|M|other|15017 825 | 825|44|M|engineer|05452 826 | 826|28|M|artist|77048 827 | 827|23|F|engineer|80228 828 | 828|28|M|librarian|85282 829 | 829|48|M|writer|80209 830 | 830|46|M|programmer|53066 831 | 831|21|M|other|33765 832 | 832|24|M|technician|77042 833 | 833|34|M|writer|90019 834 | 834|26|M|other|64153 835 | 835|44|F|executive|11577 836 | 836|44|M|artist|10018 837 | 837|36|F|artist|55409 838 | 838|23|M|student|01375 839 | 839|38|F|entertainment|90814 840 | 840|39|M|artist|55406 841 | 841|45|M|doctor|47401 842 | 842|40|M|writer|93055 843 | 843|35|M|librarian|44212 844 | 844|22|M|engineer|95662 845 | 845|64|M|doctor|97405 846 | 846|27|M|lawyer|47130 847 | 847|29|M|student|55417 848 | 848|46|M|engineer|02146 849 | 849|15|F|student|25652 850 | 850|34|M|technician|78390 851 | 851|18|M|other|29646 852 | 852|46|M|administrator|94086 853 | 853|49|M|writer|40515 854 | 854|29|F|student|55408 855 | 855|53|M|librarian|04988 856 | 856|43|F|marketing|97215 857 | 857|35|F|administrator|V1G4L 858 | 858|63|M|educator|09645 859 | 859|18|F|other|06492 860 | 860|70|F|retired|48322 861 | 861|38|F|student|14085 862 | 862|25|M|executive|13820 863 | 863|17|M|student|60089 864 | 864|27|M|programmer|63021 865 | 865|25|M|artist|11231 866 | 866|45|M|other|60302 867 | 867|24|M|scientist|92507 868 | 868|21|M|programmer|55303 869 | 869|30|M|student|10025 870 | 870|22|M|student|65203 871 | 871|31|M|executive|44648 872 | 872|19|F|student|74078 873 | 873|48|F|administrator|33763 874 | 874|36|M|scientist|37076 875 | 875|24|F|student|35802 876 | 876|41|M|other|20902 877 | 877|30|M|other|77504 878 | 878|50|F|educator|98027 879 | 879|33|F|administrator|55337 880 | 880|13|M|student|83702 881 | 881|39|M|marketing|43017 882 | 882|35|M|engineer|40503 883 | 883|49|M|librarian|50266 884 | 884|44|M|engineer|55337 885 | 885|30|F|other|95316 886 | 886|20|M|student|61820 887 | 887|14|F|student|27249 888 | 888|41|M|scientist|17036 889 | 889|24|M|technician|78704 890 | 890|32|M|student|97301 891 | 891|51|F|administrator|03062 892 | 892|36|M|other|45243 893 | 893|25|M|student|95823 894 | 894|47|M|educator|74075 895 | 895|31|F|librarian|32301 896 | 896|28|M|writer|91505 897 | 897|30|M|other|33484 898 | 898|23|M|homemaker|61755 899 | 899|32|M|other|55116 900 | 900|60|M|retired|18505 901 | 901|38|M|executive|L1V3W 902 | 902|45|F|artist|97203 903 | 903|28|M|educator|20850 904 | 904|17|F|student|61073 905 | 905|27|M|other|30350 906 | 906|45|M|librarian|70124 907 | 907|25|F|other|80526 908 | 908|44|F|librarian|68504 909 | 909|50|F|educator|53171 910 | 910|28|M|healthcare|29301 911 | 911|37|F|writer|53210 912 | 912|51|M|other|06512 913 | 913|27|M|student|76201 914 | 914|44|F|other|08105 915 | 915|50|M|entertainment|60614 916 | 916|27|M|engineer|N2L5N 917 | 917|22|F|student|20006 918 | 918|40|M|scientist|70116 919 | 919|25|M|other|14216 920 | 920|30|F|artist|90008 921 | 921|20|F|student|98801 922 | 922|29|F|administrator|21114 923 | 923|21|M|student|E2E3R 924 | 924|29|M|other|11753 925 | 925|18|F|salesman|49036 926 | 926|49|M|entertainment|01701 927 | 927|23|M|programmer|55428 928 | 928|21|M|student|55408 929 | 929|44|M|scientist|53711 930 | 930|28|F|scientist|07310 931 | 931|60|M|educator|33556 932 | 932|58|M|educator|06437 933 | 933|28|M|student|48105 934 | 934|61|M|engineer|22902 935 | 935|42|M|doctor|66221 936 | 936|24|M|other|32789 937 | 937|48|M|educator|98072 938 | 938|38|F|technician|55038 939 | 939|26|F|student|33319 940 | 940|32|M|administrator|02215 941 | 941|20|M|student|97229 942 | 942|48|F|librarian|78209 943 | 943|22|M|student|77841 944 | -------------------------------------------------------------------------------- /data/mnist.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/mnist.mat -------------------------------------------------------------------------------- /data/usps.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/data/usps.mat -------------------------------------------------------------------------------- /example/mnist_usps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/example/mnist_usps.pdf -------------------------------------------------------------------------------- /example/mnist_usps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/example/mnist_usps.png -------------------------------------------------------------------------------- /example/transp_piv_mnist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/example/transp_piv_mnist.pdf -------------------------------------------------------------------------------- /example/transp_piv_mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/example/transp_piv_mnist.png -------------------------------------------------------------------------------- /example/transp_piv_usps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/example/transp_piv_usps.pdf -------------------------------------------------------------------------------- /example/transp_piv_usps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/example/transp_piv_usps.png -------------------------------------------------------------------------------- /expe/clootclustering_faces.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/expe/clootclustering_faces.pdf -------------------------------------------------------------------------------- /expe/clootclustering_faces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PythonOT/COOT/a0356c8121b061d4762aece64936364b9e64d6dd/expe/clootclustering_faces.png -------------------------------------------------------------------------------- /expe/cot_coclustering_movielens.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import sys 5 | sys.path.append("../code/") 6 | from cot import * 7 | 8 | def cot_clustering(X, ns, nv, niter_cluster=10, niter=10, algo1='emd', algo2 = 'emd', reg1 = 0., reg2 = 0., verbose = False): 9 | Xc = np.random.randn(ns, nv) 10 | old_cost = 0 11 | 12 | for i in range(niter_cluster): 13 | Ts, Tv, cost = cot_numpy(X, Xc, niter=niter, algo=algo1, reg = reg1, algo2 = algo2, reg2 = reg2, verbose=verbose) 14 | Xc = Ts.T.dot(X).dot(Tv) * ns * nv 15 | 16 | if verbose: 17 | print(cost) 18 | 19 | if np.abs(old_cost - cost) < 1e-7: 20 | if verbose: 21 | print('converged at iter ', i) 22 | break 23 | 24 | old_cost = cost 25 | 26 | if verbose: 27 | print("\n\n") 28 | 29 | return Ts, Tv, Xc 30 | 31 | # Import the data and get the rating matrix 32 | df = pd.read_csv("../data/ml-100k/u.data", delimiter='\t',header=None, names=["user", "item", "rating", "timestamp"]) 33 | R_df = df.pivot(index = 'user', columns ='item', values = 'rating').fillna(0).values 34 | 35 | movies = pd.read_csv('../data/ml-100k/u.item', sep='|', header=None, encoding='latin-1').values[:,1] 36 | 37 | mean_ratings = np.true_divide(R_df.sum(0),(R_df!=0).sum(0)) 38 | idx_best = np.argsort(mean_ratings)[::-1].tolist() 39 | 40 | n_users, n_items = len(df.user.unique()), len(df.item.unique()) 41 | 42 | viz_orig = False 43 | viz_cot = True 44 | 45 | if viz_orig: 46 | plt.figure(figsize = (9,6)) 47 | plt.imshow(R_df, cmap='Blues') 48 | plt.xlabel("Users", fontsize = 15) 49 | plt.ylabel("Movies", fontsize = 15) 50 | plt.title('Original MovieLens matrix', fontsize = 20) 51 | plt.xticks([]) 52 | plt.yticks([]) 53 | plt.show() 54 | 55 | ns = 10 56 | nv = 20 57 | algo1 = 'emd' 58 | algo2 = 'emd' 59 | 60 | Ts, Tv, Xc = cot_clustering(R_df, ns = ns, nv = nv, 61 | niter_cluster=10, niter=300, algo1=algo1, algo2=algo2, reg1=0, reg2=0, verbose = False) 62 | 63 | yc = Tv.argmax(1) 64 | 65 | sum_ratings_cot = np.sum(Xc, axis = 0) 66 | idx_mov = np.argsort(sum_ratings_cot)[::-1] 67 | idx_user = np.argsort(np.sum(Xc[:,idx_mov], axis = 1)) 68 | Xc = Xc[:,idx_mov] 69 | Xc = Xc[idx_user,:] 70 | 71 | if viz_cot: 72 | plt.figure(figsize = (9,6)) 73 | plt.imshow(Xc, cmap='Blues') 74 | plt.xlabel("Users clusters", fontsize = 15) 75 | plt.ylabel("Movies clusters", fontsize = 15) 76 | plt.title('Summarized MovieLens matrix', fontsize = 20) 77 | plt.xticks([]) 78 | plt.yticks([]) 79 | plt.show() 80 | 81 | print("Movies in the most rated cluster") 82 | idx_best_cluster = np.where(yc == idx_mov[0]) 83 | print(movies[idx_best_cluster]) 84 | 85 | print("\nMovies in the least rated cluster") 86 | idx_worst_cluster = np.where(yc == idx_mov[-1]) 87 | print(movies[idx_worst_cluster]) 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /expe/cot_coclustering_sim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import * 3 | import sys 4 | sys.path.append("../code/") 5 | from blockSim import * 6 | from cot import * 7 | 8 | def cot_clustering(X, ns, nv, niter_cluster=10, niter=10, algo1='emd', algo2 = 'emd', reg1 = 0., reg2 = 0., verbose = False): 9 | Xc = np.random.randn(ns, nv) 10 | old_cost = 0 11 | 12 | for i in range(niter_cluster): 13 | Ts, Tv, cost = cot_numpy(X, Xc, niter=niter, algo=algo1, reg = reg1, algo2 = algo2, reg2 = reg2, verbose=verbose) 14 | Xc = Ts.T.dot(X).dot(Tv) * ns * nv 15 | 16 | if verbose: 17 | print(cost) 18 | 19 | if np.abs(old_cost - cost) < 1e-7: 20 | if verbose: 21 | print('converged at iter ', i) 22 | break 23 | 24 | old_cost = cost 25 | 26 | if verbose: 27 | print("\n\n") 28 | 29 | return Ts, Tv, Xc 30 | 31 | 32 | mu = [] 33 | sigma = [] 34 | prop_r = [] 35 | prop_c = [] 36 | 37 | # D1 38 | mu.append(np.matrix([[4.0, 0.5, 1.5],[1.8, 4.5, 1.1], [1.5, 1.5, 5.5]])) 39 | 40 | prop_r.append([1./3, 1./3, 1./3]) 41 | prop_c.append([1./3, 1./3, 1./3]) 42 | 43 | # D2 44 | 45 | mu.append(np.matrix([[4.0, 0.5, 1.5],[1.8, 4.5, 5.1], [3.5, 1.5, 5.5]])) 46 | 47 | prop_r.append([0.2, 0.3, 0.5]) 48 | prop_c.append([0.2, 0.3, 0.5]) 49 | 50 | ## D3 51 | 52 | mu.append(np.matrix([[4.0, 0.5, 7.5, 0.5],[0.5, 3.5, 7.8, 0.5]])) 53 | 54 | prop_r.append([0.5, 0.5]) 55 | prop_c.append([0.5, 0.2, 0.1, 0.2]) 56 | 57 | # D4 58 | 59 | mu.append(np.matrix([[1.5, 1.5, 1.5, 1.5], 60 | [2.5, 1.5, 1.5, 1.5], 61 | [2.6, 2.6, 1.5, 1.5], 62 | [2.6, 2.6, 2.5, 1.5], 63 | [2.5, 2.5, 2.6, 2.5]])) 64 | 65 | prop_r.append([0.1, 0.2, 0.2, 0.3, 0.2]) 66 | prop_c.append([0.25, 0.25, 0.25, 0.25]) 67 | 68 | ### 69 | ns = [600, 600, 300, 300] 70 | ds = [300, 300, 200, 300] 71 | 72 | nb_r = [3, 3, 2, 5] 73 | nb_c = [3, 3, 4, 4] 74 | 75 | sigmas = [0.1, 0.15, 0.2, 0.15] 76 | 77 | algo1 = 'sinkhorn' 78 | algo2 = 'sinkhorn' 79 | 80 | reg_c = [0.1, 0.3, 0.3, 0.04] 81 | reg_v = [0.1, 0.3, 0.3, 0.04] 82 | 83 | nrep = 100 84 | err_cc, err_r, err_c = [], [], [] 85 | 86 | for i in range(len(mu)): 87 | 88 | print("Data set D"+str(i+1)) 89 | reg1 = reg_c[i] 90 | reg2 = reg_v[i] 91 | 92 | for j in range(nrep): 93 | 94 | dat, z, w = generatedata(n=ns[i], d=ds[i], prop_r=prop_r[i], 95 | prop_c=prop_c[i], mu=mu[i], noise=sigmas[i]) 96 | 97 | Ts, Tv, Xc = cot_clustering(X = dat, ns = nb_r[i], nv = nb_c[i], 98 | niter_cluster=20, niter=100, algo1=algo1, algo2=algo2, reg1=reg1, reg2=reg2, verbose=False) 99 | 100 | yr = Ts.argmax(1) 101 | yc = Tv.argmax(1) 102 | 103 | err_r.append(1 - np.sum(np.max(confusion_matrix(yr, z), 1)) / ns[i]) 104 | err_c.append(1 - np.sum(np.max(confusion_matrix(yc, w), 1)) / ds[i]) 105 | err_cc.append(err_c[-1] + err_r[-1] - err_c[-1] * err_r[-1]) 106 | 107 | print("Rows clustering error = "+str(np.mean(err_r))+'±'+str(np.std(err_r))) 108 | print("Columns clustering error = "+str(np.mean(err_c))+'±'+str(np.std(err_c))) 109 | print("Co-clustering error = "+str(np.mean(err_cc))+'±'+str(np.std(err_cc))) 110 | -------------------------------------------------------------------------------- /expe/test_HDA_full.py: -------------------------------------------------------------------------------- 1 | #%% Reproduce HDA experiment of the paper 2 | 3 | 4 | import os 5 | import time 6 | from random import shuffle 7 | import numpy as np 8 | from scipy.io import loadmat 9 | from sklearn import preprocessing 10 | from sklearn.metrics import euclidean_distances 11 | import sys 12 | sys.path.append('../code') 13 | from cot import * 14 | from functools import reduce 15 | 16 | import matplotlib.pylab as pl 17 | 18 | from sklearn import svm 19 | 20 | 21 | ############################################################################### 22 | # Part of code about arguments to modify # 23 | # # 24 | 25 | #featuresToUse = ["CaffeNet4096", "GoogleNet1024"] # surf, CaffeNet4096, GoogleNet1024 26 | featuresToUse = ["CaffeNet4096", "GoogleNet1024"] # surf, CaffeNet4096, GoogleNet1024 27 | numRepetition = 10 28 | n_samples_tab = [0,1,3,5] # nombre de samples par classe 29 | # see function adaptData for available algorithms 30 | 31 | # # 32 | # End of part of code about arguments to modify # 33 | ############################################################################### 34 | 35 | 36 | def generateSubset(X, Y, nPerClass): 37 | idx = [] 38 | for c in np.unique(Y): 39 | idxClass = np.argwhere(Y == c).ravel() 40 | shuffle(idxClass) 41 | idx.extend(idxClass[0:min(nPerClass, len(idxClass))]) 42 | return (X[idx, :], Y[idx]) 43 | 44 | 45 | # ---------------------------- DATA Loading Part ------------------------------ 46 | 47 | domainSourceNames = ['caltech10','amazon','webcam'] 48 | domainTargetNames = ['caltech10','amazon','webcam'] 49 | 50 | tests = [] 51 | data_source = {} 52 | data_target = {} 53 | 54 | min_max_scaler = preprocessing.MinMaxScaler() 55 | 56 | for sourceDomain in domainSourceNames: 57 | possible_data = loadmat(os.path.join("../data/", "features", featuresToUse[0], 58 | sourceDomain + '.mat')) 59 | if featuresToUse == "surf": 60 | # Normalize the surf histograms 61 | feat = (possible_data['fts'].astype(float) / 62 | np.tile(np.sum(possible_data['fts'], 1), 63 | (np.shape(possible_data['fts'])[1], 1)).T) 64 | else: 65 | feat = possible_data['fts'].astype(float) 66 | 67 | # Z-score 68 | #feat = preprocessing.scale(feat) 69 | #feat = min_max_scaler.fit_transform(feat) 70 | 71 | labels = possible_data['labels'].ravel() 72 | data_source[sourceDomain] = [feat, labels] 73 | 74 | for targetDomain in domainTargetNames: 75 | #if targetDomain is sourceDomain: 76 | possible_data = loadmat(os.path.join("../data/", "features", featuresToUse[1], 77 | targetDomain + '.mat')) 78 | if featuresToUse == "surf": 79 | # Normalize the surf histograms 80 | feat = (possible_data['fts'].astype(float) / 81 | np.tile(np.sum(possible_data['fts'], 1), 82 | (np.shape(possible_data['fts'])[1], 1)).T) 83 | else: 84 | feat = possible_data['fts'].astype(float) 85 | 86 | # Z-score 87 | #feat = preprocessing.scale(feat) 88 | #feat = min_max_scaler.fit_transform(feat) 89 | 90 | #feat=np.dot(np.diag(1./np.sum(feat,axis=1)),feat) 91 | 92 | labels = possible_data['labels'].ravel() 93 | data_target[targetDomain] = [feat, labels] 94 | 95 | perClassSource = 20 96 | if sourceDomain == 'dslr': 97 | perClassSource = 8 98 | tests.append([sourceDomain, targetDomain, perClassSource]) 99 | 100 | meansAcc = {} 101 | stdsAcc = {} 102 | totalTime = {} 103 | 104 | print("Feature used for source: ", featuresToUse[0]) 105 | print("Feature used for target: ", featuresToUse[1]) 106 | 107 | 108 | #%% 109 | from sklearn.preprocessing import OneHotEncoder as onehot 110 | from sklearn.neighbors import KNeighborsClassifier 111 | enc = onehot(handle_unknown='ignore',sparse=False) 112 | 113 | 114 | def comp_(v=1e6): 115 | def comp(x,y): 116 | if x==y or y==-1: 117 | return 0 118 | else: 119 | return v 120 | return comp 121 | 122 | def compute_cost_matrix(ys,yt,v=np.inf): 123 | M=ot.dist(ys.reshape(-1,1),yt.reshape(-1,1),metric=comp_(v)) 124 | return M 125 | 126 | #%% 127 | 128 | import ot 129 | 130 | # -------------------- Main testing loop -------------------------------------- 131 | 132 | all_results={} 133 | 134 | for n_samples in n_samples_tab: 135 | dict_tmp={} 136 | 137 | for test in tests: 138 | Sname = test[0] 139 | Tname = test[1] 140 | perClassSource = test[2] 141 | testName = Sname.upper()[:1] + '->' + Tname.upper()[:1] 142 | print(testName, end=" ") 143 | 144 | dict_tmp[testName] = {} 145 | 146 | perf_baseline= [] 147 | perf_COT = [] 148 | time_COT = [] 149 | 150 | 151 | # --------------------II. prepare data------------------------------------- 152 | Sx_tot = data_source[Sname][0] 153 | Sy_tot = data_source[Sname][1] 154 | Tx_tot = data_target[Tname][0] 155 | Ty_tot = data_target[Tname][1] 156 | 157 | for repe in range(numRepetition): 158 | Sx, Sy = generateSubset(Sx_tot, Sy_tot, perClassSource) 159 | Tx, Ty = generateSubset(Tx_tot, Ty_tot, perClassSource) 160 | 161 | idx = np.random.permutation(Tx.shape[0]) 162 | for i in range(Tx.shape[0]): 163 | Tx=Tx[idx,:] 164 | Ty=Ty[idx] 165 | 166 | #semi supervision 167 | nb_perclass = n_samples 168 | Sy_ss =-1*np.ones_like(Sy) 169 | 170 | for c in np.unique(Sy): 171 | idx=np.where(Sy==c)[0] 172 | Sy_ss[idx[:nb_perclass]]=c 173 | 174 | M_lin = compute_cost_matrix(Ty,Sy_ss,v=1e2) 175 | # --- compuet baseline score by 1NN 176 | 177 | idx=np.where(Sy_ss!=-1)[0] 178 | idx_inv=np.where(Sy_ss==-1)[0] 179 | 180 | if nb_perclass!=0: 181 | neigh = KNeighborsClassifier(n_neighbors=3).fit(Sx[idx,:],Sy[idx]) 182 | ys_estimated = neigh.predict(Sx[idx_inv,:]) 183 | perf_baseline.append(100*np.mean(Sy[idx_inv]==ys_estimated)) 184 | print('Accuracy 3NN on source (baseline): {:.2f}'.format(100*np.mean(Sy[idx_inv]==ys_estimated))) 185 | 186 | #print('mean perf',np.mean(r)) 187 | 188 | # --------------------III. run experiments--------------------------------- 189 | 190 | 191 | # ------------------- COT ----------------------------------------------- 192 | ot.tic() 193 | Tv, Tc, cost = cot_numpy(Sx, Tx, niter=100,C_lin=M_lin.T, 194 | algo='sinkhorn', reg=1e0, 195 | algo2='emd', verbose = False) 196 | time_COT.append(ot.toc()) 197 | 198 | yt_onehot = enc.fit_transform(Ty.reshape(-1,1)) 199 | ys_onehot_estimated = Tv.shape[0]*np.dot(Tv,yt_onehot) 200 | ys_estimated=enc.inverse_transform(ys_onehot_estimated).reshape(-1) 201 | 202 | perf=100*np.mean(Sy[idx_inv]==ys_estimated[idx_inv]) 203 | perf_COT.append(perf) 204 | print('Accuracy COT labelprop: {:.2f}'.format(perf)) 205 | 206 | 207 | if n_samples!=0: 208 | print('mean perf baseline= {:.2f} ({:.2f})'.format(np.mean(perf_baseline),np.std(perf_baseline))) 209 | print('mean perf COT= {:.2f} ({:.2f})'.format(np.mean(perf_COT),np.std(perf_COT))) 210 | 211 | dict_tmp[testName]['baseline']=perf_baseline 212 | dict_tmp[testName]['COT']=perf_COT 213 | dict_tmp[testName]['time_COT']=time_COT 214 | 215 | all_results[n_samples] = dict_tmp 216 | 217 | np.save('results_D_to_G.npy',all_results) -------------------------------------------------------------------------------- /expe/visu_cootclustering_faces.py: -------------------------------------------------------------------------------- 1 | # Reproduce Fig 4 of the paper 2 | 3 | import sys 4 | sys.path.append('../code') 5 | import numpy as np 6 | from scipy import ndimage 7 | import scipy as sp 8 | import matplotlib.pylab as pl 9 | import ot 10 | import scipy.io 11 | 12 | 13 | #%% 14 | 15 | from sklearn.datasets import fetch_olivetti_faces 16 | 17 | dataset = fetch_olivetti_faces() 18 | Xtot1 = dataset.data 19 | 20 | y=dataset.target 21 | 22 | 23 | def get_data(x,n): 24 | 25 | idx=np.random.permutation(x.shape[0]) 26 | 27 | xr =x[idx[:n],:] 28 | return xr 29 | 30 | 31 | #%% 32 | n=400 33 | x=get_data(Xtot1,n) 34 | x=x/x.max() 35 | 36 | #%% 37 | from cot import cot_numpy 38 | 39 | np.random.seed=1986 40 | 41 | def cot_clustering(X,ns,nv,niter_cluster=10, niter=10, algo='emd'): 42 | 43 | Xc=np.random.randn(ns,nv) 44 | old_cost=0 45 | 46 | for i in range(niter_cluster): 47 | Ts,Tv,cost=cot_numpy(X, Xc, niter=10, algo=algo,verbose=False) 48 | 49 | Xc=Ts.T.dot(X).dot(Tv)*ns*nv 50 | 51 | print(cost) 52 | 53 | if abs(old_cost-cost)==0: 54 | break 55 | 56 | old_cost=cost 57 | 58 | return Ts,Tv,Xc 59 | 60 | 61 | 62 | 63 | 64 | ns=9 65 | 66 | nv=40 67 | 68 | ot.tic() 69 | Ts,Tv,Xc= cot_clustering(x,ns,nv,niter_cluster=50, niter=20, algo='emd') 70 | ot.toc() 71 | 72 | 73 | 74 | 75 | #%% plot var pos 76 | 77 | 78 | 79 | mxc=Xc.mean(0) 80 | isort=np.argsort(mxc) 81 | #isort=np.arange(nv) 82 | Iclass=np.zeros((64,64)) 83 | 84 | #pl.figure(4,(8,5)) 85 | 86 | for i in range(nv): 87 | #pl.subplot(5,8,i+1) 88 | #pl.imshow(Tv[:,isort[i]].reshape((64,64))*mxc[isort[i]]/np.max(mxc),vmax=Tv.max(),cmap='jet') 89 | #pl.title('V={:1.2f}'.format(Xc.mean(0)[isort[i]])) 90 | Iclass[Tv[:,isort[i]].reshape((64,64))>0]=i#*(Tv[:,isort[i]].reshape((64,64))>0) 91 | #pl.axis('off') 92 | 93 | 94 | pl.figure(5) 95 | pl.imshow(Iclass,cmap='jet') 96 | pl.title('Pixel (variable) clustering') 97 | 98 | 99 | #%% 100 | 101 | pl.figure(6,(16,10)) 102 | 103 | clustrs=Tv.dot(Xc.T).T 104 | 105 | for i in range(ns): 106 | pl.subplot(3,4,i+1) 107 | pl.imshow(clustrs[i,:].reshape((64,64)),cmap='gray') 108 | pl.axis('off') 109 | 110 | 111 | 112 | #%% 113 | 114 | data_rec=Ts.dot(Xc).dot(Tv.T) 115 | 116 | idplot=np.random.permutation(y.shape[0]) 117 | nbv=100 118 | 119 | pl.figure(7,(10,10)) 120 | for i in range(nbv): 121 | pl.subplot(10,10,i+1) 122 | pl.imshow(x[idplot[i],:].reshape((64,64)),cmap='gray') 123 | 124 | 125 | pl.figure(8,(10,10)) 126 | for i in range(nbv): 127 | pl.subplot(10,10,i+1) 128 | pl.imshow(data_rec[idplot[i],:].reshape((64,64)),cmap='gray') 129 | 130 | 131 | #%% 132 | 133 | nx=3 134 | ny=3 135 | delta=1 136 | wimg=64 137 | Icluster=np.zeros((wimg*ny+delta*(ny-1),wimg*nx+delta*(nx-1)))+clustrs.min() 138 | for i in range(ny): 139 | for j in range(nx): 140 | Icluster[i*(wimg+delta):i*(wimg+delta)+wimg,j*(wimg+delta):j*(wimg+delta)+wimg]=clustrs[i+ny*j,:].reshape((64,64)) 141 | 142 | 143 | pl.figure(1) 144 | pl.imshow(Icluster,cmap='gray') 145 | pl.title('Sample clusters') 146 | pl.axis('off') 147 | 148 | 149 | 150 | 151 | #%% 152 | 153 | isel=range(80,400,10) 154 | 155 | nx=3 156 | ny=3 157 | delta=1 158 | wimg=64 159 | Idata=np.zeros((wimg*ny+delta*(ny-1),wimg*nx+delta*(nx-1)))+clustrs.min() 160 | for i in range(ny): 161 | for j in range(nx): 162 | Idata[i*(wimg+delta):i*(wimg+delta)+wimg,j*(wimg+delta):j*(wimg+delta)+wimg]=x[isel[i+ny*j],:].reshape((64,64)) 163 | 164 | 165 | pl.figure(1) 166 | pl.imshow(Idata,cmap='gray') 167 | pl.title('Face dataset') 168 | pl.axis('off') 169 | 170 | 171 | #%% 172 | 173 | pl.figure(10,(12,4)) 174 | pl.clf() 175 | pl.subplot(1,3,1) 176 | pl.imshow(Idata,cmap='gray') 177 | pl.title('Face dataset') 178 | pl.axis('off') 179 | 180 | pl.subplot(1,3,2) 181 | pl.imshow(Icluster,cmap='gray') 182 | pl.title('Centroids for sample clustering') 183 | pl.axis('off') 184 | 185 | pl.subplot(1,3,3) 186 | pl.imshow(Iclass+1,cmap='nipy_spectral') 187 | pl.title('Pixel (feature) clustering') 188 | pl.axis('off') 189 | 190 | left, bottom, width, height = pl.gca().get_position().bounds 191 | cax = pl.gcf().add_axes([left+1.05*width, height*0.23, width*0.05, height]) 192 | pl.colorbar( cax=cax) 193 | 194 | pl.savefig('./clootclustering_faces.png') 195 | pl.savefig('./clootclustering_faces.pdf',bbox_inches='tight') 196 | 197 | 198 | 199 | --------------------------------------------------------------------------------