├── .env ├── .gitattributes ├── .gitignore ├── .style.yapf ├── .travis.yml ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── environment.yml ├── examples ├── iris.ipynb └── mnist.ipynb ├── requirements.txt ├── setup.cfg ├── setup.py ├── tsne ├── __init__.py ├── _version.py ├── bh_sne.pyx ├── bh_sne_src │ ├── Readme.txt │ ├── compile_linux │ ├── compile_mac │ ├── fast_tsne.m │ ├── quadtree.cpp │ ├── quadtree.h │ ├── tsne.cpp │ ├── tsne.h │ └── vptree.h └── tests │ ├── __init__.py │ ├── test_iris.py │ └── test_seed.py └── versioneer.py /.env: -------------------------------------------------------------------------------- 1 | if [[ "$CONDA_DEFAULT_ENV" != "tsne-dev" ]] 2 | then 3 | workon tsne-dev 4 | fi 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | tsne/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | MANIFEST 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # IPython Notebook 34 | .ipynb_checkpoints 35 | 36 | # Project specific 37 | tsne/bh_sne.cpp 38 | *.pkl.gz 39 | .cache 40 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = google 3 | spaces_before_comment = 4 4 | split_before_logical_operator = true 5 | column_limit = 100 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | # - "3.4" 6 | 7 | addons: 8 | apt: 9 | packages: 10 | - build-essential 11 | - libatlas-base-dev 12 | 13 | sudo: false 14 | 15 | install: 16 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 17 | wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 18 | else 19 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 20 | fi 21 | - bash miniconda.sh -b -p $HOME/miniconda 22 | - export PATH="$HOME/miniconda/bin:$PATH" 23 | - conda config --set always_yes yes --set changeps1 no 24 | - conda update -q conda 25 | - conda info -a 26 | 27 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION 28 | - source activate test-environment 29 | - conda install numpy cython scipy pytest scikit-learn 30 | - python setup.py install 31 | - source deactivate 32 | 33 | script: 34 | - pwd 35 | - source activate test-environment 36 | - cd tsne/tests 37 | - py.test -s -vv 38 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda 2 | 3 | RUN apt-get update && apt-get install -y git build-essential libatlas-base-dev 4 | 5 | RUN /opt/conda/bin/conda install ipython numpy cython scipy scikit-learn pytest -y 6 | 7 | VOLUME /tsne 8 | WORKDIR /tsne 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Laurens van der Maaten (Delft University of Technology) 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 1. Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | 3. All advertising materials mentioning features or use of this software 12 | must display the following acknowledgement: 13 | This product includes software developed by the Delft University of Technology. 14 | 4. Neither the name of the Delft University of Technology nor the names of 15 | its contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS 19 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 21 | EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 23 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 27 | OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include requirements.txt 3 | include tsne/bh_sne.pyx 4 | recursive-include tsne/bh_sne_src * 5 | exclude tsne/bh_sne.cpp 6 | include versioneer.py 7 | include tsne/_version.py 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | python setup.py build_ext --inplace 3 | 4 | install: 5 | python setup.py build_ext --inplace 6 | python setup.py install 7 | 8 | clean : 9 | rm -rf *.pyc *.so build/ bh_sne.cpp 10 | rm -rf tsne/*.pyc tsne/*.so tsne/build/ tsne/bh_sne.cpp 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python-TSNE 2 | =========== 3 | 4 | [![travis-ci](https://api.travis-ci.org/danielfrg/tsne.svg)](https://travis-ci.org/danielfrg/tsne) 5 | 6 | Python library containing T-SNE algorithms, compatible with Python 3.5. 7 | 8 | Algorithms 9 | ---------- 10 | 11 | ### Barnes-Hut-SNE 12 | 13 | A python ([cython](http://www.cython.org)) wrapper for [Barnes-Hut-SNE](http://homepage.tudelft.nl/19j49/t-SNE.html) aka fast-tsne. 14 | 15 | I basically took [osdf's code](https://github.com/osdf/py_bh_tsne) and made it pip compliant. 16 | 17 | Requirements 18 | ------------ 19 | 20 | * [numpy](numpy.scipy.org) > =1.7.1 21 | * [scipy](http://www.scipy.org/) >= 0.12.0 22 | * [cython](cython.org) >= 0.19.1 23 | * [cblas](http://www.netlib.org/blas/) or [openblas](https://github.com/xianyi/OpenBLAS). Tested version is v0.2.5 and v0.2.6 (not necessary for OSX). 24 | 25 | [Anaconda](http://continuum.io/downloads) is recommended. 26 | 27 | Installation 28 | ------------ 29 | 30 | You can install directly from the Github repository: 31 | 32 | ``` 33 | pip install git+https://github.com/alexisbcook/tsne.git 34 | ``` 35 | 36 | Usage 37 | ----- 38 | 39 | Basic usage: 40 | 41 | ``` 42 | from tsne import bh_sne 43 | X_2d = bh_sne(X) 44 | ``` 45 | 46 | ### Examples 47 | 48 | * [Iris](http://nbviewer.ipython.org/urls/raw.github.com/danielfrg/py_tsne/master/examples/iris.ipynb) 49 | * [MNIST](http://nbviewer.ipython.org/urls/raw.github.com/danielfrg/py_tsne/master/examples/mnist.ipynb) 50 | * [word2vec on presidential speeches](https://github.com/prateekpg2455/U.S-Presidential-Speeches) via [@prateekpg2455](https://github.com/prateekpg2455) 51 | 52 | More Information 53 | ---------------- 54 | 55 | See *Barnes-Hut-SNE* (2013), L.J.P. van der Maaten. It is available on [arxiv](http://arxiv.org/abs/1301.3342). 56 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: tsne-dev 2 | dependencies: 3 | - python=2.7 4 | - cython 5 | - ipython 6 | - numpy 7 | - scipy 8 | - scikit-learn 9 | - pytest 10 | - pylint 11 | - pip: 12 | - pytest-cov 13 | - yapf 14 | - versioneer 15 | -------------------------------------------------------------------------------- /examples/iris.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "code", 12 | "collapsed": false, 13 | "input": [ 14 | "from tsne import bh_sne" 15 | ], 16 | "language": "python", 17 | "metadata": {}, 18 | "outputs": [], 19 | "prompt_number": 1 20 | }, 21 | { 22 | "cell_type": "code", 23 | "collapsed": false, 24 | "input": [ 25 | "from sklearn.datasets import load_iris" 26 | ], 27 | "language": "python", 28 | "metadata": {}, 29 | "outputs": [], 30 | "prompt_number": 2 31 | }, 32 | { 33 | "cell_type": "code", 34 | "collapsed": false, 35 | "input": [ 36 | "iris = load_iris()" 37 | ], 38 | "language": "python", 39 | "metadata": {}, 40 | "outputs": [], 41 | "prompt_number": 3 42 | }, 43 | { 44 | "cell_type": "code", 45 | "collapsed": false, 46 | "input": [ 47 | "X = iris.data" 48 | ], 49 | "language": "python", 50 | "metadata": {}, 51 | "outputs": [], 52 | "prompt_number": 4 53 | }, 54 | { 55 | "cell_type": "code", 56 | "collapsed": false, 57 | "input": [ 58 | "y = iris.target" 59 | ], 60 | "language": "python", 61 | "metadata": {}, 62 | "outputs": [], 63 | "prompt_number": 5 64 | }, 65 | { 66 | "cell_type": "code", 67 | "collapsed": false, 68 | "input": [ 69 | "X_2d = bh_sne(X)" 70 | ], 71 | "language": "python", 72 | "metadata": {}, 73 | "outputs": [], 74 | "prompt_number": 6 75 | }, 76 | { 77 | "cell_type": "code", 78 | "collapsed": false, 79 | "input": [ 80 | "scatter(X_2d[:, 0], X_2d[:, 1], c=y)" 81 | ], 82 | "language": "python", 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "metadata": {}, 87 | "output_type": "pyout", 88 | "prompt_number": 7, 89 | "text": [ 90 | "" 91 | ] 92 | }, 93 | { 94 | "metadata": {}, 95 | "output_type": "display_data", 96 | "png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAD+CAYAAAAwAx7XAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xd0VHX+//Hn1CSTSe+NEnpLkKKAIEUFRMTQdV1hUXS/\nW/QL62JZF9FF3d3j7ur6FXdt6w+wEsQAIqKCgBHRpSZU6S29J5Nk6v39ERgSkpDCwOSG9+Mcz/HO\n3HvnNTfMZ+687+d+PhpFURSEEEK0O1pvBxBCCHF1SAMvhBDtlDTwQgjRTkkDL4QQ7ZQ08EII0U5J\nAy+EEO2UvrUbvvXWW2RlZeFyufj1r39NVFQUGRkZrFy5EoAZM2bQt29fjwUVQgjRMq1u4B966CEA\n9u3bx5o1a5g7dy6pqaksXLgQgBdeeIE+ffqg0Wg8k1QIIUSLXHGJxtfXF71eT3Z2NjExMRiNRoxG\nI1FRUeTk5HgioxBCiFZo9Rn8Bd988w0TJkygoqICf39/li5dCoDJZKK8vJyYmJgrDimEEKLlrqiB\n37FjB7GxscTFxZGVlYXFYmHu3LkoisLbb79NYGBgo9tu3LjxSl5aCCGuW7feemuz1mt1A3/8+HEO\nHjzI/fffD0B0dDTZ2dnu53NycoiOjr7sPgYMGNDalxdCiOvSrl27mr1uq2vw//jHPzh69CjPPfcc\n7777LlqtlmnTprF48WKef/55pk+f3tpdtynp6enejtAsasiphowgOT1NcnpPq8/gX3vttXqPJScn\nk5ycfEWBhBBCeIbGW8MFb9y4UUo0QgjRQrt27Wp2DV7uZBVCiHZKGvgmqKUup4acasgIkrO5Kivt\nlJVZm1zP2zmbSy05W0IaeCFEi+3bV8DkyZ9yxx0r+e67s96OIxohNXghRItYLDYmT05jx46aO9UD\nAox89919xMcHXPG+z5wp4/vvs1AUhaFD4+jQofF7aa5XLanBX/GdrEKI64vD4aK83OZerqpy4HC4\nLrvN6dNlfPPNaZxOF6NHd6Bz52AATpwoobTUSmxsACaTnkWLviMt7QgA48Z15o03xhIY6HP13kw7\nJyWaJqilLqeGnGrICJKzKUFBvrz00ijMZgM6nYYlS24jLs7c6Po7dmSwcOG3zJ+/id//fjOPPrqR\n4uJqMjLyuP32FYwZ8zGPPPI1WVkVfPPNafd2W7acpqzM1uh+PU0tf/eWkAZeCNFiw4fH89139/Hj\nj7NISemGwaC7zNpGtm3Lci/9+GM2VVV21q49RlFRNQBffXWSrKwK7ruvt3u9++7rTVCQkR9+yOLN\nN/fw7bdnqKqyX6231C5JiaYJw4cP93aEZlFDTjVkBMnZXAkJzauP9+vXlblzS/jrX38E4IEH+hEU\n5FOnZq/VaggK8mHevIGMHt0Bl0shOTmSI0dKuOuuVe4S0Jo1Uxg+PN7zbwbvH8+rQRp4IUSrKYrC\ngQMFOJ0KPXqE4uNTv0nx8dHz8MP9GTo0DpdLoUOHQHbsyKFfv3CeemoI27dnMXduEn37hmMw6Lj1\n1o7ubX/8MbtOff/UqdKr1sC3R1KiaYJa6nJqyKmGjCA5L8fpdOFyXex499lnx7jtthWMHv0Ry5bt\nx2p11NsmPT2dkBBfbrklgd69w3n88c1MnpzG7bevYODASP71r9vp1SuU7duzWLp0Hzt2ZLtfIzEx\nmODgmousvr46evUKv2rvTS1/95aQM3ghRLPs25fPCy9sR6uFp58eir+/geef/x6r1QnAU09tZcSI\neHr2DGt0H1lZ5WzaVHMhVVHg//5vN0ajlltv7cSTT24BwGDQsmHDDPr3j6R37zDWrZvGyZOlxMWZ\n6dcvolXZy8qsFBRUERhoJDzc1Kp9qJGcwTdBLXU5NeRUQ0aQnFDTIJaWVruXCwqqmDPnc6qq7BiN\nOv7wh60oCoSE+LrXCQw0otdrKS6uZu/ePI4cKa6XMzjYl7AwP8LD/Zg9uy8zZvQgJ6eS/PxK9zp2\nu4tz58rdy716hXHHHYkEBfnwzjuZLFmyi2PHipv9XvLyKnn66a0MGrSM6dPXNLqtWv7uLSENvBCi\njt27c7nrrk8YP34l27fX9H6xWh3cd18fjEYdhw8XMXZsZ/z8dDz77DBGj05g0KBo/vOfOwgP92Xx\n4m2MHv0Ro0d/yPffn6uz706dgli7dgp//vMtfPXVSf7+9/8ya1YfoqL8MRhqmqOgIB86dw6qs53F\nYmPRou94/PHNLFyYzi9/+SWFhVXNej/79+fz/vsHAdi7N69OV8z2Thr4JqilLqeGnGrICNd3zuLi\nKn71qy/JzCzg8OEiZs1aR3Z2BZGRJk6cKOXrr09x6FARixalU1BQRWrqYVyumjP51auPcPJkGf/v\n/+0DoLLSwauv7mLv3r11XsPf38D8+ZvIyqrg+PFS3n47g6KiSj766C6WL7+Tdeum0rt33Vq7xWLn\nv/+9OKHQnj15VFQ0r4/8pV04/fwMDa6nlr97S0gNXgjh5nQqVFZevFBaVeXA6VQwGHR1SjZOp0J1\ntYNVq45QWloz4Jifn57Zs/vh66ujurqmLt+1azAOR/0Lr7Uv1LpcCvfd14e4uMaHOggJ8eXhh/vz\n3HPfAfDQQ0mEh/s16z316xfOs8/ezH/+k8nw4fGMHJnQrO3aAxmLRghRx+bNp5k1ax12u4t33hnP\n+PGJaLUa9uzJY/r01RQWVvGHP9zExIldefHF7Xz22TEAxoxJYMmS2zlypIR//nMn3buH8PDDyXTs\nWLfcoigKX311koce2oCvr46lS+9kyJDYJnOVlVnJzMzH6VTo0yecsLDmNfBQM7xCaakVs9nQYFdO\nNWnJWDTSwAsh6jlzphyXy0VCQiBarcb9+Nmz5VRXO3A6XcycuYaJE7sSEuJLeLgfgwdHu0sriqKg\n0Wga2z2KopCVZaG0tJpPPvkJnU7DPff0IjExuNFt8vIqKS6uJizM97rqCXMpmfDDg9RSl1NDTjVk\nBMkJkJAQQMeOQXUad4D4+AC6dg3h1KkyTp8u5/XXd/PnP2+nosLmbtyPHSthx44csrMrGs2p0Wgw\nGrX84hef8/LLO/jb3/7LH//4LZWVDQ9FcOJECT/72VqGDn2PX/zic06fLvPwO1bP370lpIEXQrRY\nRIQJvb6m+XC5FLp2DQEgMzOfsWNXMG5cKr/4xeecOdN4Q1xZaef48VL38r59BY028Dt35rJrVy4A\n27ZlsWdPnqfeSrvW6mLUwYMHWbZsGb179+b+++8HYMmSJWRlZWE0Ghk5ciSjRo3yVE6vUUvfWDXk\nVENGkJzNkZQUQVraZL755jTJyREMHVpTQ9+06TTFxTUXY//73xz27y+gY8deFBVVERpat2YeHm7i\nd78bxN/+9l8Afv/7wXX61dfm71+354vJ5Pk6ulr+7i3R6qNkt9uZPHkyhw8fdj+m0WiYP38+4eFX\n73ZiIYTnWSw2qqudhIb6Nlg7dzpdnDhRisul0LlzEAaDjmHD4hg2LK7OegkJF4cN1mhqxoH/2c8+\nY9KkLvzlL6OIjvZ3P+/vb+A3v7mB227rhF6vpVevMHS6hosKgwZF8/jjN/LZZ8eYOrU7AwZEeeid\nt2+tLtEkJSVhNtcfA9pL12yvGrXU5dSQUw0Z4frL+dNPRdx772fcdtvHrF17DLvdWed5RVFYv/4E\nw4a9z7Bh77Nq1ZF661wwYkQCf/7zLdx5ZyKvv347wcE+vPbabUye3N19Z2ttQUG+3HhjDAMGROHn\n1/j5ZkSEiccfv4n166cxb96ger8GPEEtf/eW8OjvHD8/P1599VXMZjOzZ88mOjr6suunp6e7fxZd\nOLhtbbl21raQp7HlzMzMNpWnoeXMzMw2lUfty546nu++m0l+fiXnzlXw4IPr2br1ZxQWHnQ/n59f\nyeOPb3aP6vi7320iMVGL1ZrX4P5++cv+3Hyznp9+0vLQQxtQFEhOjmDYsDjCw/3o1SusTRy/q3U8\nr8Vyc11RN8kDBw6wc+dOdw3+gpMnT5KamsqCBQsa3Va6SQrhfadOlfLWWxlkZ1cwdGgcixdvY926\nqfTte3FQr+LialJSPiUzMx+o6Unz9dcziIz0r7OvwsIqCgoqyc62EB7ux1//+iPr1h1zP//kkzeR\nlBTJ+PGdycurZM+eXBQF+vePJCqq7r5E467ZnKyNfTcYDAb0enXfTCBEe6coCq+8spOlS2uGFtiy\n5QxvvDG2Xl/0kBBfXn/9dhYtSsdmc7J48Yh6jfupU6UsWbKb8nIbH398iJgYf+bOTXI38CEhvmg0\nEBHhR2Wlnb/97UfefjsDgF/8oi+LFw/H3994Dd719aXVrXBaWhp79uyhpKSEqqoqHn74YV5++WVK\nSkrw8/PjwQcf9GROr6ldRmrL1JBTDRnh+slptTrZty/fvVxUVE18fAAmU/2xWvr0Ceejj+7C5QKj\nsf70fDt35hIW5sc779Q02tnZFs6dq+Dvfx9NVlY5gwbFADVdI/PyKvn440PubVNTD/PYY4O93sCr\n5e/eEq1u4FNSUkhJSanz2Pz58684kBDi2vD11TN//mBmz16H06nwi1/0IT6+8Wn49PrG51318dGR\nk2Ohe/dQDh8uAmpKNlFRfmzYcIK8vEqWLz8AwBtvjOW22zry6adHABgzpgOBgXL2fjXIUAVCXMfs\ndieHDhVRVWWna9eQy/ZOycjIY+nSfcTEmJkxowcdOlwcYyY318Ibb+wmPj6Qs2fLCQvzY+LELoSH\n+3HoUBF33LHSfZE2MtLEmjVT2LevAEVRuOmmGPcXi9Pp4scfs9my5QxJSZHExflz6FARCQmBDBwY\npfpxZDzhmtXghRDqZjDomjVL0unTpUyZkkZRUc1NTDk5Fl56aZS7z3xUlD9PPTWUigo7AQFG912u\nAJ07B5GUFOG+E3XAgCji4gLo3j203utkZORz992fur8MXnxxBH/8YzqKorBmzRRuvlnmY20JGaqg\nCWrpG6uGnGrICNdXztrD9l5ORYWdRx8dyJNP3sRvfzuAvXvzOHu27jAEBoOOkBBfd+NeVeVg7948\n9u49zb//PZaFC4fy/PMjePHFW+rdmXpBfn6Vu3HXajX4+uqZM6cf8fEBHDpUdAXvtGlq+bu3hJzB\nC3EdOnasZkjfnJwKFiy4kcGDYy67fnr6OZ59tmYs9kGDovntbwfw+ut7efHFEQ3e+Wq3O1mx4hDz\n528C4C9/uYXf/OYGdwPudLoavGu1S5cg4uPNnD1bwXPP3czKlT9x6lQpc+b0o3//SA+88+uL1OCF\nuM44HC5++csN7oucgYFGtmy5t9647RdUVtqZOPGTOgN8Pf/8CD777CirV0+pN2MSQE5OBcOHf+Au\n6ZhMelatSmHq1NXY7S7eemscd97Zpd5olVDz5VNYWHNR9v33ay7MajSwefO9rZ50uz2R4YKFEI2y\n252cOHFxFMeyMhtVVfVnXbrAZDIwblwn93KXLsGcO1fOE08MabBxh5oeOomJF78wOncOYu3aY1gs\ndmw2J7/61Zd1JtaurUuXYG68MRar9WImRWl+OUlcJA18E9RSl1NDTjVkhPaf08/PwNNPD3X3Z1+w\nYDDx8Y1PlwcwZ04/li2bwD//OYZ//Wsss2b1pXv3YNauPcq6dcfIybHUWT842Je//30Ms2f34b77\nevGvf41l8+aLk137+urR6RqfEATg0UcHkpAQgE6nYfHi4XTt2vhkIJ6glr97S0gNXojr0OjRHdiy\n5R6sViedOwdjNjfeD72y0k5mZj779hUwbFgs/fqFY7M5mTdvk7vMM2dOXxYvHuG+Ser48RIeffRr\nbDYXN98cS2ioH6+9djvz5m2kutrJyy+PITY2ALvdycGDhVRVOejWrW43zb59I/jyyxlUVzuIjDQ1\nOlm2aJzU4IUQl5WefpZJk1YBNbXwzz+fRlxcAAMHLsVur+nxEhrqS3r6fe7hgN9/fz+PPLLRvY8P\nPpjI+PGJlJVZcbkUgoNrxn1fvfoIDz74BS6Xwty5STz99BCCghoeE17UkBq8EMJjsrIq3P+vKDV9\n4AMDjdx5Z6L78UmTuqLXazhzpoySkup6wx1cGAo4MNDH3bjn5Vn4xz92uGvrb7+dUa/UI66MNPBN\nUEtdTg051ZARJOelevUKcw8lEBbmR/fuoQQG+rB48QiWLp3AsmV38pvf3MCf/rSNAQOW8sAD6+nZ\nM5Rf/ao/3bqF8PTTg0lOrjtBx+7duTz11JY6F2IjIvy8Oh6NWv7uLSE1eCHEZfXrF8EXX0wnK6uC\n+PiLd6DGxQUQF1dzcfbLL0/y3ns1XRo3bz7Dt9+e5Z57erFgwY0cObKP4GAf9/7Kyqw8+uhG9u8v\nYP78Qcydm4Td7uKBB/o1ebFXtIw08E1Qy+hyasiphowgORvSs2cYPXuGNfq8Xl+3R0xpqY3x41NZ\nv34agwcPqvOcy6W4Z4R6+eUddOkSxOrVU4mNrT9D3LWklr97S0iJRghxxZKTI5k3byAREX5MmJBI\ncXE1VVUO8vIq660bHOzLK6/cSliYH/7+Bp57bniduVqF50gD3wS11OXUkFMNGUFytkZYmB+PP34T\nH344iaoqO//+9x6io/3p1CmowZxDhsSydeu9fP/9zxk/PrHBO1qvtbZ0PD1FSjRCCI/w9dXTv38k\nCxfezC9/aaFz52C6dg0hJ6fh9WNivFuSuR5IP3ghhFAR6QcvhBBCGvimqKUup4acasgIktPTJKf3\nSAMvhBDtVKtr8AcPHmTZsmX07t2b+++/H4CMjAxWrlwJwIwZM+jbt2+j20sNXgghWu6azMlqt9uZ\nPHkyhw8fBsDlcpGamsrChQsBeOGFF+jTp0+Ds70IIYS4+lpdoklKSsJsvtjNKScnh5iYGIxGI0aj\nkaioKHIa6x+lImqpy6khpxoyguT0NMnpPR6rwVdUVODv78/SpUtZunQpJpOJ8vKGZ2y5oPYBTU9P\nl+UrWM7MzGxTeRpazszMbFN51L4sx/P6PZ7NdUX94A8cOMDOnTu5//77ycrKIi0tjblz56IoCm+/\n/TZTp04lOjq6wW2lBi+EEC13zfrB1/5uiI6OJjs7272ck5PTaOMuhBDi6mt1A5+WlkZqaio7d+7k\nzTffRKvVMm3aNBYvXszzzz/P9OnTPZnTa1rzs8gb1JBTDRlBcnqa5PSeVveiSUlJISUlpc5jycnJ\nJCcnX3EoIYQQV07GohFCCBWRsWiEEEJIA98UtdTl1JBTDRlBcnqa5PQeaeCFEKKdkhq8EEKoiNTg\nhRBCSAPfFLXU5dSQUw0ZQXJ6muT0HmnghRCinZIavBBCqIjU4IUQQkgD3xS11OXUkFMNGUFyeprk\n9B5p4IUQop2SGrwQQqiI1OCFEEJIA98UtdTl1JBTDRlBcnqa5PQeaeCFEKKdkhq8EEKoiNTghRBC\nSAPfFLXU5dSQUw0ZQXJ6muT0HmnghRCinfJ4DX7JkiVkZWVhNBoZOXIko0aNanA9qcELIUTLtaQG\nr/f0i2s0GubPn094eLindy2EEKIFrkqJxksdc64KtdTl1JBTDRlBcnqa5PQejzfwfn5+vPrqq/z1\nr38lJyfnsuvWPqDp6emyfAXLmZmZbSpPQ8uZmZltKo/al+V4Xr/Hs7muWj/4kydPkpqayoIFCxp8\nXmrwQgjRcm2iH7zBYECv93iJXwghRDN5vIF/5ZVXWLRoEcuXL+fnP/+5p3d/zbXmZ5E3qCGnGjKC\n5PQ0yek9Hj/Fnjdvnqd3KYQQohVkLBohhFCRNlGDF0II4V3SwDdBLXU5NeRUQ0aQnJ4mOb1HGngh\nhGinpAYvhBAqIjV4IYQQ0sA3RS11OTXkVENGkJyeJjm9Rxp4IYRop6QGL4QQKiI1eCGEENLAN0Ut\ndTk15FRDRpCcniY5vUcaeCGEaKekBi+EECoiNXghhBDSwDdFLXU5NeRUQ0aQnJ4mOb1HGnghhGin\npAYvhBAqIjV4IYQQ0sA3RS11OTXkVENGkJyeJjm9Rxp4IYRopzxeg8/IyGDlypUAzJgxg759+za4\nntTghRCi5VpSg9d78oVdLhepqaksXLgQgBdeeIE+ffqg0Wg8+TJCCCGawaMlmpycHGJiYjAajRiN\nRqKiosjJyfHkS1xzF+pyLlzYsXs5TePUUD9UQ0aQnJ4mOb3How18RUUF/v7+LF26lKVLl2IymSgv\nL290/doHND09vU0t79y5k/T0dKK6RpGlO8c601pSAz7mlP4kCorX8126nJmZ2abyNLScmZnZpvKo\nfVmO5/V7PJvLozX4rKws0tLSmDt3Loqi8PbbbzN16lSio6PrrdtWa/DF2iIyjZk4NQ76WPuQ6ZNJ\nlbaKI8afANApOn5eNotQV5iXkwohrkdeq8FHR0eTnZ3tXs7JyWmwcW+rbNjYZNrIacMpAMwuM0W6\nIjRcvIbg1Dhx4vRWRCGEaDaPlmi0Wi3Tpk1j8eLFPP/880yfPt2Tu7/q7Bo7hbrCOo/l6LPpau+G\nUTGCAkOqhhLkCvZSwsa15ufbtaaGjCA5PU1yeo9Hz+ABkpOTSU5O9vRurwk/xY+hVcP42vQlaMDs\nCiC5qj/7DZncarmdQGcQ4a5wjBi9HVUIIZokY9FcwoGDAm0Bdo0Ns2Im0BWEDRsGDBTqCijUFhLk\nCiLKGY3e89+PQghxWV6rwbcH1ZpqfvTdzjGfo5hcJiaXTyXSFUWeLpePAz7CqXGAAtPLZxLvTPB2\nXCGEaJQMVXCJIm0hx3yOAlCprWS/cR8VVFCmKa9p3AE01KvVNyZLl8W3vlvJNGZg0ViuVmxV1A/V\nkBEkp6dJTu+RM/hLaNCAAhc6zhjxIVufRYASiFExYtPY0CgaIpwRTe4rX5vHpwErsWlsAIyoHMkg\n6+CrmF4IIS6SGnwtubocNvltpK+tLwd8DhDmDEODhgBnIEf1RxhZPQqL1kKAMxA/xQ+L1oK/4k9w\nI71qTuhPkBbwiXu5i60rkywp1+rtCCHaIRkPvpZSbQm52pxmlUfO6E+TY8imWFdMgq0DubpcKjQV\nVGjLyTXmkO63lUR7F3zx4VPzSlYEfsiKgA8p0BY0uD+Ty49oe819ABpFQzdbd4++NyGEuJx23cDn\n6/L4KOADPgh6jy/8P6dc0/iwCQDBrhAAdvruIFufxa2W23DiZK9xDwn2DiTYO+DCRYGugBJ9CQAW\nrYU8fW6D+wt3RTC86hZuqRzFOMsdxNrjPPsGa1FD/VANGUFyeprk9J52XYM/oT9BpbYSgNOGUxTq\nCghwBDS6fpw9nvEVE8jWZ9PV3pUQVyjJ1f3ppOuML77sM2ZgUkyEucLr1OlNLlOD+9OhI8HZgVhn\nHDp0nn57QghxWe26Bn/AsJ8N5vU1CwrcU/4zYpyxzd4+W5fFxwEfomgUNIqGkVWj2Oz3DfeU3UeF\nrpyjhiN0tHc6X7bxdW9XTTW5+hwcOIh0RhKgBNbZb7G2mBOG4+gVPR3tnQhSgjzyfoUQ7V+77Qdf\nXVJC9g8/UHb6NLE33UREUtJl1+/g6MiwyuGc0Z8myZbcrJ4vtRVri1E0Nd9/ikbBqrGCBhwaO93s\n3elmr6mp27FzRncai9ZCuDOc0/rTbPH/BoBO9s6Mr7gDHXpKtCVoFS1f+q8n93xZp6e1F7dVjsWA\noaWHQwghLktVNfhTX3/N2nvvZcsTT/DJpEkU/fTTZdc3K2Zusg5hqmU63e090LewETW5/NEpNd+B\nOkWHUfGhf/UNhDvD66x3Rn+alQErWG9eR2rAChQu/ig6aTiBRWtht88u3g9axlGfI+TqLtbszxnO\nYtdc+TjzaqgfqiEjSE5Pk5zeo6oz+Kzt293/bysrozIvj9DuTfdMqT0aZEv4Kb4MrxqODRvBrhAC\nnUH0dfWrNxbNWf0Zdz2+WluFj+LDaMutaNBgUky4FBff+34HwDn9WXrYenLY5xAAydU34KP4tCqf\nEEJcjqrO4BMnTIDz0/8FJSYSkHB1hwqIcEUS5YhGhw5fxbfRgcYSHB24cNLu5/Ij1BXGNr90Nvl/\nzWf+a8g15DKouuYGp9OGU0Q4I5hSPo1pZTNJsiY16wJs7V8FDRk+fHjL3+A1poaMIDk9TXJ6j6rO\n4OOGD2fGhg1UFRYS0q0bQR07evw1bFg5oz9Dob6AeHsCsc444pzxl90mwdGB6eUzsWgthDnDqdCU\nY9Vaa57UQIGugE62TuTZ8vB3+dPF3o1QVygOHE0OWFamKSPDZy95uhwGWAfRwdERrbq+l4UQXqKq\nlkJvNBI9aBCdx40jODHxqrzGWf1Z1gSk8Z1fOqkBH3Ok/PJ1fgA9euKdCfSw9yTcFY5ZMdeMHw+g\nQIgzhABXAJMtUxlXdQdGl5FvfbeyIuAjDhj2NzjXqx07NmwcNh7kv34/cMp4itXmTyls5KYqNdQP\n1ZARJKenSU7vUdUZ/LVQexAxl8aF1WBt8T60io5J5ZMp1hXho/gQ5AwiXLnYg+e04RQ7/H4EYIP/\neoLLg4l1XrwJKk+byybTRvxd5jr1eZfG5R7XRgghmiIN/CXiHfHoFB1OjROzy0y8KQFczd8+X5dH\nqvljrForwY4QUiqmEKKE1FnHoq01bIIGbLV60VRTzRf+6ynUF6BVtIyuvJUjymFsGhvdrT3cd9te\nSg31QzVkBMnpaZLTe6SBv0S0M4Z7yn6GRWsh2BVCSCMNamNydDnu+nuJvphCXQEhjrr7SLQnsse5\nmwpdOYm2xDrdLl0aF7bz27s0Ln7w/Z7pZfeg4CJACcSkNHzXrBBCXEpVNfhrQYOGSFcUnR2JhLhC\nWlyXC3DVGgpBocEGOcwVzszye/l56WxurxyPWTG7nzMpJm6zjEWn6NAoGm6pGkW4K5woV/RlG3c1\n1A/VkBEkp6dJTu+RM3gPi3XEMrFiEuf0Z+lk70ykM6rB9QKVQBrr+djR0YlZZXNAUQhUgqTXjBCi\nVTw+Fs2SJUvIysrCaDQycuRIRo0a1eB6nh6LxoaNEm0xTo2TUGcoPrXGhvGUMk0ZNo0Nf5c/fvh5\nfP9CCNEUr45Fo9FomD9/PuHh4U2v7AF27JzUn6BMV8ZWv82ggRurhjC4+sYGb0pqrXxtPp8GrMSi\ntdDL2pv1QnKsAAAehUlEQVQRVSPxV/ybvb0DB3m6XCxaC6HOMMJcYR7LJoQQDbkqv/2v5QCV+bo8\njhqPkOGz1z1cwI++26nUemb+0wt1uYPGA+7eLwd9DlCkLWrRfrL05/g44EM+M69hZcDHFLdw++bm\nbMvUkBEkp6dJTu9p9Rl8RkYGq1evrvPYrFmz8PPz49VXX8VsNjN79myio6Mb3Ud6erq7a9KFg9vS\n5fhRcVRqKwl2BVOiKwbA7DKTeyaXfSf2X/H+LzBYDbirMgpYSiyk7204v4LCkeKfcOgddAzsiL9i\nJseVg1kxU6GpoFJbyTnLOfbvPNDsPD/88AOmABP9evdr8PnMzMxWvb9ruZyZmdmm8qh9WY7n9Xs8\nm+uqjQd/8uRJUlNTWbBgQYPPe6oGX64p50vTF8Q54ynXlKGgcIN1IBGulg0NDGDRWLBhw0/xqzO+\nO9TU33f4/ki2PptB1YPpYu/a6DADZ3VnWRWwEqfGQQ9rT5Kt/dlnzMRH8UXRKBzw2cfMsp8R7mpe\nGcuBnWOGY+z23UW8PZ7+1hswK41PXCKEaL/axHjwBoMBvf7qd9IJUAK4o/JOKqjApJgwY256owYU\naQtZ459Gsa6YvrZ+3Fw1ok63xEAlkNFVt+LA0eTY7Zk+GTg1DgBCXCGkBaxy34F6Q/UAUsqmYnY1\nP2e+roDP/T8DDWTrswhyBdPPdvmx8IUQwuM1+FdeeYVFixaxfPlyfv7zn3t69w0yKSYilchWN+4A\nRw1HKNYXgwb2+WRSoM0H6pZqNGiaNTFH7RuXNIq2zvACBdoCvjN9yynDyWZns52faOQCSwPXF9RQ\nP1RDRpCcniY5vcfjp9jz5s3z9C6vCWPtMdkV0F3Boelp64UGDfm6fDraO1FkLeSwzyG0ipZERyLf\n+20jy3mOHvaezdpfmDOcLrYuHDMeqxmN0ta11dmEENePdj0na0uUakrZ7vc9WfpzDKgeSC9b7wa7\nWRZriynRFmNSTEQ4I5t1E5JFY6FAm0+hrpA9vrsp05aSUjGVTo5Ozc5XqanEoqnAR/EhsBlzuLpw\nYcOKAaNM+C1EO9ImavBtnRUrufocrBorEc5Igl3B3Fp5Gw4c+ODT4CxQxdpiPjGvoFxXjlbRMq18\nRpNjxV9g1zgIc4UxvOoWAlzmRu9wbYxJMTV7HJoqqsjw2csBn310ticyqHqwXJQV4jp0Xd0D78BO\niaaYCsr5yXiYTwJS+cy8hs/8V1OmKUWPHl986zTuu/buokxbSqXGQpmmlHJdOVAzENgZw5kmX9OF\ni90+O1kbkMaqgJXs9PkvQa5gj59V164fFuuK2OaXTomuhN2+u8jSZ3n0tVpLLTVOyelZktN7rpsz\neAcO9hn38Y1pI4n2Llg1F8d5z9fnc8xwjHhHQp3ulTZsWPtX827gOwQ6gxhXeQd6lwGHtmZ43whH\n010xbdg4ZjzqXs4xZFOtqfb4qJA6nY68PXvY8eqr+IaHMOyRW9nW7wBowHG+R48Q4vpy3TTwJdoS\nvjFtBA3k6/LpbevNOcNZAIKcQZTpytho/JIpFdPdtfdibRHbA7eBpmbo3wyfPdxTfi+nDKcIdYYR\n54i73EsC4IMPPa292WaqOTvoYO+In8uz49g4rFa6BQfz6aRJVBXWTFjSKTeXhA8nYPNRiLU3nfNa\nUMt425LTsySn91w3DbwOLTp0OHFSritDo2i5q/xu8rX5+OHHd37fYlYCcNWa3UNzSQVLr+iJcEUS\nYY1s9utq0JBkTSbKGYVdYyfKEe3Rgcoc1dUceP99qgoL3Y07QNmR49xZOAafwIbHkK8uLsZaUoJP\ncDC+Ic0f874iO5tDK1ZQkJlJ39mzibv5ZjTa66rSJ4RqXDefzGBXCHdV3E2oM5QEewe627vTydGZ\nKFcUBbp8bqy+idssY+vcwRrqCmVU0Rj8XWbi7PEMsA5q1Wv74UcnR2e62bvXDBPcCtayMk5u3MiB\nDz6g4MAB9+Mlx4+zecECzm7dSr85cwDQ6HQMffppQgIaHkO+7MwZvvyf/2HpwIF8+T//Q9mZpq8l\nXHDk00/Z9txz/LRqFatnzKDw0KFmb6uWGqfk9CzJ6T3t+gxeQaFIW4RNYyPIGURnRyIxZbHo0GHA\nQK42l8/Ma3BqnADcUXEn1JobVY8exy4n9w2+H72ixwefxl7qqjv55ZdsePhhAPzCwpj+xRcEd+mC\nRqtFo9Nxbts2dL6+jHn5ZaIHDSK0R49G95W7ezcnv/qqZr9ffUXurl0EJiTUW6/k+HGKjx7FJyiI\n6IED0er1dRp0p9VKVUHDk4ALIbyvXZ/Bn9Wd5f3A5XwU+D5bTZupohJffN13o5bqStyNO0CeLrfe\nPgYPGoy/4n9NGvdyTTklmmIc1L8oenrLFvf/VxUWYsnLAyCka1cmvPsugR064KiqImbIEML79EF7\nmWEidIa6d+PqjPX7+5ecOMGWJ55g7T338Ondd7u/EHrfey96U82vgvhbbqH0xAlslot31iqKQvm5\nc1RkZ9fbp1pqnJLTsySn97TrBn6n7w73mDAHfQ5QqisFam48ytfmY8VKiDMUAJ2io4Ojo9ey5uiy\neT9wGe8GvcNenz3YqempY6+qImv7dhJGjHCvG9SpE+bYWAC0ej2Jd97JzI0bmfTxx4SdP3O3Us1+\n4z7S/Fex37gPKxd7DUUPHMgNv/kNAfHx3PCb3xA1cGC9PKUnTnBq40YAnDYb+5cvr3ntxERuXLCA\nm558koD4eLY89RTW4ppRPBVF4eRXX/H+sGF8OHIk577//iocKSFEc7XrBr72mDB6RY9BMVKhKecL\n/89JDfiIKk0VXexdGVo1jOGVIwh21r/YeC3qci5cbPP9jiptFeHVoTj2ZZF3YB+O6mqyvv+elRMm\nsOeNNxjx4ouMe+stJq1YQVDHi19GGo2GnQcPYjRfHIsnR5/Dl/5fcMJ4nC/9vyBHn+N+zhQZydCF\nC5m5aRNDFy7EP7L+RWOfoCAM/hcnNAntWTOsgsHfn7w9e/jhL3/h4AcfkDByJMbAmusK5efO8cWD\nD2IrL6eqoICvH3mEqqKL496rpcYpOT1LcnpPu67BJ1mT0aKhUFfIgOqBhLnCOK07zWnDKQDOGE/T\ny9obgEhnFMFKsFdyatDgq/gSZAukx3IX2/9nDj8At736KtVlZQDk7d5N3u7d3L1yJSFdmx6LplpT\nDdR8sQ3c3wXr8UMURVkJ7d695nGjEf1lZt2KHjiQO5cv58D77xPcpQvdJk8GwGg2M/xPfyJxwgQU\np5O4YcPwOd/AazQaNLqLN3Bp9Xo0mvp3BAshro3rbiyaXF0uHwa8h6Kpedu3Wm4nyZZ8zXNcqkhb\nyMn8THbc+CDV50seBrOZlNRUUu+4AwDfkBCmff45oT16kLtrFzv++U/MMTEkP/wwwYmJl+yviDT/\nT+iTGcv+iU9QkZWFT1AQU9asIaJfv6v2Ps5s3crXv/0tOh8fxv7730Q3UP4RQrSejEVzGeHOcO6u\nmEyGTwaxjhgS7V2a3Kb2fKphzjBCr8J8qqGuMHy0fTkUF+du4ANiYwno2JGp69Zhyc4mtEcPQnv0\noPT0adKmTcNaUgJAVVERY19/vc6F1VBXKNMqZpC1bxMVWTVDFVhLS8nLyLiqDXzCLbcw8+uv0ej1\n+IWGXrXXEUI0rV3X4BuiQ0dnRyJ3W1IYbL0Js3L5MeS/++47zurP1ppPdQXF2uKrks0/JJxxb7xB\nl0mT6DJxIuPfeQdzdDRxQ4fSfcoUwvv0AcBRWelu3AGKjxzhwPkp+2oLVIIIjIiFWmWSCxdnryZT\nZGSDjbtaapyS07Mkp/dcd2fwLWU0GjmtP+mecMOitVCuKSOE5t/92RJhvXox4d13ARqtXwfExTFw\n3jx2vvIKWr2eIU8+Sa7V2uC6kf37c3dqKqc3bSJmyBBiBg++KrmFEG3PdVeDb42jhiOsNddMMO7j\n8mFm+b2ENXM+1aulurSU0uPH0fv6EtK9O1qdjPkuxPVAavAelmDvwJTyaVi0FiKcEV5v3AF8g4Lw\nveGGFm/nqK6m7PRptAYDQZ06SS8XIdqx664G31Lp6en44ENHRyd62/oQ4Wz+QGPXUnPqhxcGJntv\nyBA+GD6cs1u3XoNkF6mlxik5PUtyeo808CpypdW08nPn2Pz44wA4qqrY+vTTWMvLPRFNCNEGtapE\nc/DgQZYtW0bv3r25//773Y9nZGSwcuVKAGbMmEHfvn09k9KL2sL4FIrLxZlvv2X3a68R3qcP/R54\ngMAOHeqs05ycOoMBo9mM7XyjboqMrDcuzdXUFo5lc0hOz5Kc3tOqBt5utzN58mQOHz7sfszlcpGa\nmsrChQsBeOGFF+jTp4/UeD2g6PBh1s6cidNm49TGjehNJm46fybeEoEdOjDp44/5duFCfENCGLF4\nMXpf36Y3FEKoUqtKNElJSZjNdfuP5+TkEBMTg9FoxGg0EhUVRU5OTiN7UI+W1uXslZWc2bqVfcuW\nkbt7d72yiqOqiorsbGwVFc3ep62iAqfN5l4uPnKk1Tljhwxhypo1THz//csOKXw1qKXGKTk9S3J6\nz2XP4DMyMli9enWdx2bNmkXHjvVHXayoqMDf35+lS5cCYDKZKC8vJyYmptH9p6enu38WXTi4bW25\ndtbmrN/B4SBtyhQAdL6+zNiwgYh+/UhPTyfYaKR4zRr2v/cecSNG0P2RR+hx441N7j8oMZFu06Zx\nZOVKDGYzyQ89xHfffYeiKAwfPhxbeTmWoiK2bdvGsGHDmtyfwc/PK8czMzPT63/P9rQsx/P6PZ7N\n1ep+8AcOHGDnzp3uGnxWVhZpaWnMnTsXRVF4++23mTp1KtHR0Q1ur6Z+8C2x67XXSH/mGffyXR9+\nSOdx4wA4tXEjq6dPdz839l//oufMmZfdX0VWFgX796M1GPCLjERvNNYZbCxn5042zZ+PRqNhzCuv\nENWKrpNCCPVoST/4VveiufR7ITo6muxakzzk5OQ02ri3Z9GDBrlHVPQNDSWoUyf3c5ces6a+W6sK\nCtg4bx5rZs4kbcoUTn7xRZ3RGivz8vh89mwK9u0jPzOTL+bOpbLWvKxCiOtbqxr4tLQ0UlNT2blz\nJ2+++WbNjrRapk2bxuLFi3n++eeZXutMVc2aqss57Xaqiopw2msm6IgeNIjpGzaQ8umnTN+woU6d\nOyI5maSHHkJvMtF53DjizpdTGmPJy+PU11+7l4+tW+eehAPA6XBgrzWbkq2iAuV8jrZILTVOyelZ\nktN7WtWLJiUlhZSUlHqPJycnk5zs/aF3r5XKggL2vvkmP61cSYcxY+iWkkJkcjI6o5EjaWn4hYXR\n+777CO7cGQD/iAiGPfMMAx95BGNgoHsc9cb4BAcT1KkTpSdPAhA1YAAhXbtyKDUVxeUidsgQbl+y\nhPUPPggaDbf93/9hioq62m9bCKESMhbNZTitVtBqG+0rfvKrr1hTq4Y+7JlniBkyhC8eeADL+R5E\n3adM4fbXX29w3tPmKDh4kBPr1+MbGordYqHw4EGCOnVi+4sv0mnsWMb++99YS2umIgzs0EG6pQrR\nzslYNB6QvWMH3/7xjxjNZkYsXkxYr1711nFUV9dZdtpsWIuK3I07QMH+/Tiqq1vdwIf36oXRbGbF\nuHFUnt9vx9tuI/6WWzizZQv2ioo60/cJIcQFMlRBA8rPnWPtzJnk/Pgjpzdt4pvHHsNWq9Z9QdQN\nN9Dx/Ddp9KBBuBwO/GNiGPDIIzUraDTc+PjjDZZiqgoLKTtzxr3fyoICin76icqCgnrrOq1Wd+MO\nUH7mDL4hIfS69158QmqGLW5O/bDgwAH2v/ceJ7/6iupa48lfK2qpcUpOz5Kc3iNn8A1w2e1Yz8+F\nClCZn0/2Dz/gHx1NeO/e7scD4uMZ98YblJ05g72yEoO/PxH9+hHUuTNdJk5E5+Pjnqy6tuKjR/ny\nl78kLyOD/r/6FX1mzWLjo4+SvX07scOGMeYf/8BeWYlPQADBXbrgHxPDjU88wY9//SsanY7Bv/sd\npogIwnr3xlhrYuzLKTl2jE9TUqg6/wVy+7/+Ra8mumgKIdRNavANcNhsHProIzbNn4/OaOTmRYv4\n7z/+geJyMX39ekK6dWvWflxOJ/kZGViyswlKTCTsfGO/45VX2PanP7nXu+Pdd1k/Z457ecQLL/Dt\n009jDAggZdUqogcOxFZeTvGxY+iMRkK7d68zPV9zZG3fzsoJE9zLve65h9tff71F+xBCeJ/U4K+Q\n3mik58yZxN50E/n797Pt2WfdZ74VOTnNbuBzd+7kk7vuwmW34xsaytTPPiOsZ8969XjtJRdxLwxL\nYCsvZ//y5UQPHIgxIICo/v1b/Z78Y2MJ7tKFkmPHAEicOLHV+xJCqIPU4Buh9/EhtEcPXDod5WfP\nAuAfHU1AI3Oalhw/zuktWyg8dMj9WO7u3bjO90uvLiqi7Hx3x/CkJDqMGYMpKorBCxYQ3rcvgxcs\nILRHDwb/7ncU7N/v3kdwYmKz8jZVPwzq0IFJH33EncuXM3XdOjqOHt2s/XqSWmqcktOzJKf3yBl8\nE/yio7nlL3/BWlJCSNeu+AQH11un6MgRPk1JwZKdjTEggClr1hCZnOwuyQDofHzwj43FZrHw7VNP\n4R8TQ5cJEzi2di09pk7lpscfZ8Cvf43OZOLct99iLS4msn9/uk+d6rH3EtylC8Fdunhsf0KItk1q\n8E3YOG8e+5ctcy9P37Ch3sTVx9atY12tcfHHvPIKfWfNwl5ZSc6OHRQfO0ZkUhJRAwaQu3Mn577/\nHq1ez5G0NEqPH2fmN98QGB9fZ5+KokifdiFEPVKD9yBzrdEwNTodej+/euuYIiPRaLUoLhcAAQkJ\nABhMJhJuuYWEW24BasZ1Xz19uvvGpNEvv4xfWFi9xh2Qxl0IccWkBt+E4FGj6DN7NlEDBjBh6dIG\nx1CP7N+flE8+YdC8eUx87716Z/gXVGRnuxt3gPy9e9E1Y8KN/MxMDqWmcm7bNuxVVQ2uc2n90OV0\n1rsRy9vUUuOUnJ4lOb1HzuCbkG+3M+bvf8dpszU6+5HOYCBh5EgSRo687L4C4uPxi4igKj8fNBo6\njhlD3JAhl92m8OBBPpk40T3NXsqqVXQYNeqy25QcP86PL71E8dGj3PTEE3QcMwaNVr7LhbjeSA3+\nGis8dIiiw4fxj4oisn//JqfMO7lxI2tqjcw55A9/4Mbf/x5LXh7HP/+cgn376D51KrFDhrjLOlue\neoq9b7wB1HTBvHfLljoXfIUQ6iU1+Dak5PhxCg8cwDc01N2zpiWNbWB8PD7BwVhLSkCjIeb8DFDH\n163jm8ceA+DABx8w8+uv3XfZVpw7597eZbe3uVKNEOLakN/tTbiSulzZmTOsvfde1s2axScTJ3Ky\n1tjuzRXaowdT167ljv/8h2mff07MTTcB1Okrr9FoqCwq4uBHH3Fq0yYGzZ+P8fz4NwMeeYSg88MV\ne5taapyS07Mkp/fIGfxVZMnOrjNB9pHVq+l2990t3k94nz6E9+lT57HuU6dy4P33cVqtDHvmGb56\n+GH3KJbj3nqLe7duxWGxYI6La3LceSFE+yQNfBNaOsltbX4REZiioqjMzQWg45gxLd5HwYEDFO7f\njyk6mqgBA9yDi8UOGcLMjRuxlpRQfckQxSe//JIeHrxBylOu5FheS5LTsySn90gDfxUFd+7M5FWr\nyNm1C//ISKIb6T7ZmOKjR/n07rupOj/P6p3LlxPerx8GkwlTeLi75n5s/XrMsbFUZGUB0KEVXyRC\niPZHavBNuNK6XFivXvS57z463X47vsHBlJ46RcY77/D1//4vJ7/6CpfT2ei2FdnZ7sYdamaQWj1t\nGmtmzCB3924OpaZyauNGQnr04KYnn2TI008z7q236OCFcWaaQy01TsnpWZLTe1p9Bn/w4EGWLVtG\n7969ub/WbfpLliwhKysLo9HIyJEjGdVEn+3rzZnNm9m8YAEAhz78kMmrVxM3dGiD65pjY+uUeEK6\ndePQihUkz53L53PmUH76NAAjXnyRoJtvplNEBD6hoehbOXuUEKJ9aXUDb7fbmTx5MocPH67zuEaj\nYf78+YSHh19xuLbA03W5op9+cv+/y+FwD0PckJAuXZj86acUHTqEwd+f7X/5C87qagxms7txBzj1\n9dfcOGAA/tHRHs3qaWqpcUpOz5Kc3tPqEk1SUhJms7nB57x075QqdLztNgznL5SG9uhBSNeul10/\nrGdPuqWkEDt0KD1nzsQcG4tvWFidMeljBg+m5Pjxq5pbCKE+TZ7BZ2RksHr16jqPzZo1i46NTPTs\n5+fHq6++itlsZvbs2US38bPKpqSnp3v0mz1++HDuTk2lqrCQoM6dG5zMuyFGs5mkBx6g++TJ6E0m\nYgYP5tSmTegMBs5t20aYCs4+PH0srxbJ6VmS03uabOCTkpJISkpq9g7nnJ967uTJkyxfvpwF5+vN\nDal9QC9c4Ghry7Wzemr/sUOGkJ6eTnZREcNbuf8j5eWEDx2K2emk87hxrNqyhRxF8frxutxyZmZm\nm8qj9mU5ntfv8WyuKxqLZv/+/ezatavORdYLzp07x4oVK5g/f36D216vY9EIIcSVuCZj0aSlpbFn\nzx5KSkqoqqri4YcfBuCVV16huLgYPz8/HnzwwdbuXgghxBWS0SSboJa6nBpyqiEjSE5Pk5ye1ZIz\neLnRSQgh2ik5gxdCCBWRM3ghhBDSwDdFLeNTqCGnGjKC5PQ0yek90sALIUQ7JTV4IYRQEanBCyGE\nkAa+KWqpy6khpxoyguT0NMnpPdLACyFEOyU1eCGEUBGpwQshhJAGvilqqcupIacaMoLk9DTJ6T3S\nwAshRDslNXghhFARqcELIYSQBr4paqnLqSGnGjKC5PQ0yek90sALIUQ7JTV4IYRQEanBCyGEkAa+\nKWqpy6khpxoyguT0NMnpPfrWbPTWW2+RlZWFy+Xi17/+NVFRUQBkZGSwcuVKAGbMmEHfvn09l1QI\nIUSLXFENft++fXz//fc89NBDuFwuFi1axMKFCwF44YUXePbZZ9FoNA1uKzV4IYRouWtWg/f19UWv\nr/kRkJOTQ0xMDEajEaPRSFRUFDk5OVeyeyGEEFfgsg18RkYGixcvrvPfqVOn3M9/8803jB07FoCK\nigr8/f1ZunQpS5cuxWQyUV5efnXTXwNqqcupIacaMoLk9DTJ6T2tLtHs2LGD3Nxc7rzzTgCysrJI\nS0tj7ty5KIrC22+/zdSpU4mOjm5w+40bN7Y+tRBCXMeaW6Jp1UXW48ePc/DgQe6//373Y9HR0WRn\nZ7uXc3JyGm3cWxJQCCFE67TqDP63v/0tYWFhaLVaOnTowJw5cwDYu3evuxfN9OnTSUpK8mxaIYQQ\nzea1O1mFEEJcXXKjkxBCtFPSwAshRDvVqousnlJYWMhrr72G0+mkS5cuzJ4925txGmW32/nf//1f\nJk2axPjx470dp0GN3V3cVqjhLue2fgwvpYZ/l2r5jG/ZsoUNGzag0+mYOXNmm/n3efDgQZYtW0bv\n3r3dnVpa9FlSvOjll19WDh065M0IzbJu3TrlpZdeUr744gtvR2lSZmam8uabb3o7Rh1Op1P54x//\nqFitVsVqtSrPPPOM4nK5vB2rUW3xGDZEDf8u1fIZf+yxxxSn06lYLBblD3/4g7fjuO3du1f54Ycf\nlGXLlimK0vLPktdKNC6Xi9zcXHr06OGtCM1itVrJyMhg0KBBKCq4Hl377uK2Qm13ObfFY3gpNfy7\nVMtnHCA+Pp4DBw6wa9cuunfv7u04bklJSZjNZvdySz9L1+RfcUZGBqtXr67z2NSpU7HZbLz00ktU\nVlZyxx13cOONN16LOA1qKOOsWbPYvXs348ePp6SkxEvJ6mosZ8eOHYGau4snTJjgjWiNqn2XM+C+\nyzkmJsbLyRrWFo/hpdavX9+m/l02pKysrE19xi8nKSmJdevW4XA4GDdunLfjNKqln6Vr0sAnJSXV\n6xPvdDoxmUw89thjuFwuFi5cSP/+/TEajdciUrMyVlZWcujQIVJSUti8ebNXcl2qoZwX7Nixg9jY\nWOLi4q5xqsszm81YLJY6dzkHBgZ6O1aD2uoxrK0t/rtsiNlsblOf8cbk5uaya9cunnjiCQAWLVpE\nUlJSm8sJLf8see13qE6nIzw8nJKSEkJDQ9vkT+JDhw5ht9v55z//SV5eHk6nk759+xIfH+/taPU0\ndHdxW9HSu5y9pS0fw9rU8u9Sr9e3+c841JSSnE4nAIqiYLPZvJyortoluJZ+lrx6o1NBQQFvvfUW\nlZWVDB06tE3/LN68eTNWq7XN/nxr7O7itkINdzm39WPYkLb+71Itn/FVq1Zx+PBhXC4XN998M6NG\njfJ2JADS0tLYs2cPJSUl9O7dm4cffrhFnyW5k1UIIdopudFJCCHaKWnghRCinZIGXggh2ilp4IUQ\nop2SBl4IIdopaeCFEKKd+v/ytb1jl0dxAgAAAABJRU5ErkJggg==\n", 97 | "text": [ 98 | "" 99 | ] 100 | } 101 | ], 102 | "prompt_number": 7 103 | }, 104 | { 105 | "cell_type": "code", 106 | "collapsed": false, 107 | "input": [], 108 | "language": "python", 109 | "metadata": {}, 110 | "outputs": [] 111 | } 112 | ], 113 | "metadata": {} 114 | } 115 | ] 116 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython>=0.19.1 2 | numpy>=1.7.1 3 | scipy>=0.12.0 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # See the docstring in versioneer.py for instructions. Note that you must 2 | # re-run 'versioneer.py setup' after changing this section, and commit the 3 | # resulting files. 4 | 5 | [versioneer] 6 | VCS = git 7 | style = pep440 8 | versionfile_source = tsne/_version.py 9 | versionfile_build = tsne/_version.py 10 | tag_prefix = 11 | parentdir_prefix = tsne- 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | To upload a new version: 3 | 1. make clean 4 | 2. git tag a new version: git tag v1.x.x 5 | 3. python setup.py sdist 6 | 4. python setup.py sdist register upload 7 | """ 8 | 9 | import sys 10 | import platform 11 | 12 | from distutils.core import setup 13 | from setuptools import find_packages 14 | from distutils.extension import Extension 15 | 16 | import versioneer 17 | import numpy 18 | from Cython.Distutils import build_ext 19 | from Cython.Build import cythonize 20 | 21 | if sys.platform == 'darwin': 22 | # OS X 23 | version, _, _ = platform.mac_ver() 24 | parts = version.split('.') 25 | v1 = int(parts[0]) 26 | v2 = int(parts[1]) 27 | v3 = int(parts[2]) if len(parts) == 3 else None 28 | 29 | if v2 >= 10: 30 | # More than 10.10 31 | extra_compile_args=['-I/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers'] 32 | else: 33 | extra_compile_args=['-I/System/Library/Frameworks/vecLib.framework/Headers'] 34 | 35 | ext_modules = [Extension(name='tsne.bh_sne', 36 | sources=['tsne/bh_sne_src/quadtree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'], 37 | include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'], 38 | extra_compile_args=extra_compile_args, 39 | extra_link_args=['-Wl,-framework', '-Wl,Accelerate', '-lcblas'], 40 | language='c++')] 41 | else: 42 | extra_link_args = ['-lcblas'] 43 | dist = platform.linux_distribution(full_distribution_name=0)[0] 44 | redhat_dists = set(["redhat", "fedora", "centos"]) 45 | if dist in redhat_dists: 46 | extra_link_args = ['-lsatlas'] 47 | 48 | # LINUX 49 | ext_modules = [Extension(name='tsne.bh_sne', 50 | sources=['tsne/bh_sne_src/quadtree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'], 51 | include_dirs=[numpy.get_include(), '/usr/local/include', 'tsne/bh_sne_src/'], 52 | library_dirs=['/usr/local/lib'], 53 | extra_compile_args=['-msse2', '-O3', '-fPIC', '-w'], 54 | extra_link_args=extra_link_args, 55 | language='c++')] 56 | 57 | ext_modules = cythonize(ext_modules) 58 | 59 | with open('requirements.txt') as f: 60 | required = f.read().splitlines() 61 | 62 | cmdclass = versioneer.get_cmdclass() 63 | cmdclass['build_ext'] = build_ext 64 | 65 | setup(name='tsne.bh_sne', 66 | version=versioneer.get_version(), 67 | cmdclass=versioneer.get_cmdclass(), 68 | author='Daniel Rodriguez', 69 | author_email='df.rodriguez@gmail.com', 70 | url='https://github.com/danielfrg/py_tsne', 71 | description='TSNE implementations for python', 72 | license='Apache License Version 2.0, January 2004', 73 | packages=find_packages(), 74 | ext_modules=ext_modules, 75 | install_requires=required 76 | ) 77 | -------------------------------------------------------------------------------- /tsne/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | from __future__ import division 3 | import numpy as np 4 | import scipy.linalg as la 5 | import sys 6 | from tsne.bh_sne import BH_SNE 7 | 8 | def bh_sne(data, pca_d=None, d=2, perplexity=30., theta=0.5, 9 | random_state=None, copy_data=False): 10 | """ 11 | Run Barnes-Hut T-SNE on _data_. 12 | 13 | @param data The data. 14 | 15 | @param pca_d The dimensionality of data is reduced via PCA 16 | to this dimensionality. 17 | 18 | @param d The embedding dimensionality. Must be fixed to 19 | 2. 20 | 21 | @param perplexity The perplexity controls the effective number of 22 | neighbors. 23 | 24 | @param theta If set to 0, exact t-SNE is run, which takes 25 | very long for dataset > 5000 samples. 26 | 27 | @param random_state A numpy RandomState object; if None, use 28 | the numpy.random singleton. Init the RandomState 29 | with a fixed seed to obtain consistent results 30 | from run to run. 31 | 32 | @param copy_data Copy the data to prevent it from being modified 33 | by the C code 34 | """ 35 | N, _ = data.shape 36 | 37 | if pca_d is None: 38 | if copy_data: 39 | X = np.copy(data) 40 | else: 41 | X = data 42 | else: 43 | # do PCA 44 | data -= data.mean(axis=0) 45 | 46 | # working with covariance + (svd on cov.) is 47 | # much faster than svd on data directly. 48 | cov = np.dot(data.T, data) / N 49 | u, s, v = la.svd(cov, full_matrices=False) 50 | u = u[:, 0:pca_d] 51 | X = np.dot(data, u) 52 | 53 | if random_state is None: 54 | seed = np.random.randint(2**32-1) 55 | else: 56 | seed = random_state.randint(2**32-1) 57 | 58 | tsne = BH_SNE() 59 | Y = tsne.run(X, N, X.shape[1], d, perplexity, theta, seed) 60 | return Y 61 | 62 | from ._version import get_versions 63 | __version__ = get_versions()['version'] 64 | del get_versions 65 | -------------------------------------------------------------------------------- /tsne/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.15 (https://github.com/warner/python-versioneer) 10 | 11 | import errno 12 | import os 13 | import re 14 | import subprocess 15 | import sys 16 | 17 | 18 | def get_keywords(): 19 | # these strings will be replaced by git during git-archive. 20 | # setup.py/versioneer.py will grep for the variable names, so they must 21 | # each be defined on a line of their own. _version.py will just call 22 | # get_keywords(). 23 | git_refnames = " (HEAD -> master)" 24 | git_full = "3aeafd511bd31747b0e1aef1dffd077ee2c258d4" 25 | keywords = {"refnames": git_refnames, "full": git_full} 26 | return keywords 27 | 28 | 29 | class VersioneerConfig: 30 | pass 31 | 32 | 33 | def get_config(): 34 | # these strings are filled in when 'setup.py versioneer' creates 35 | # _version.py 36 | cfg = VersioneerConfig() 37 | cfg.VCS = "git" 38 | cfg.style = "pep440" 39 | cfg.tag_prefix = "" 40 | cfg.parentdir_prefix = "tsne-" 41 | cfg.versionfile_source = "tsne/_version.py" 42 | cfg.verbose = False 43 | return cfg 44 | 45 | 46 | class NotThisMethod(Exception): 47 | pass 48 | 49 | 50 | LONG_VERSION_PY = {} 51 | HANDLERS = {} 52 | 53 | 54 | def register_vcs_handler(vcs, method): # decorator 55 | def decorate(f): 56 | if vcs not in HANDLERS: 57 | HANDLERS[vcs] = {} 58 | HANDLERS[vcs][method] = f 59 | return f 60 | return decorate 61 | 62 | 63 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 64 | assert isinstance(commands, list) 65 | p = None 66 | for c in commands: 67 | try: 68 | dispcmd = str([c] + args) 69 | # remember shell=False, so use git.cmd on windows, not just git 70 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 71 | stderr=(subprocess.PIPE if hide_stderr 72 | else None)) 73 | break 74 | except EnvironmentError: 75 | e = sys.exc_info()[1] 76 | if e.errno == errno.ENOENT: 77 | continue 78 | if verbose: 79 | print("unable to run %s" % dispcmd) 80 | print(e) 81 | return None 82 | else: 83 | if verbose: 84 | print("unable to find command, tried %s" % (commands,)) 85 | return None 86 | stdout = p.communicate()[0].strip() 87 | if sys.version_info[0] >= 3: 88 | stdout = stdout.decode() 89 | if p.returncode != 0: 90 | if verbose: 91 | print("unable to run %s (error)" % dispcmd) 92 | return None 93 | return stdout 94 | 95 | 96 | def versions_from_parentdir(parentdir_prefix, root, verbose): 97 | # Source tarballs conventionally unpack into a directory that includes 98 | # both the project name and a version string. 99 | dirname = os.path.basename(root) 100 | if not dirname.startswith(parentdir_prefix): 101 | if verbose: 102 | print("guessing rootdir is '%s', but '%s' doesn't start with " 103 | "prefix '%s'" % (root, dirname, parentdir_prefix)) 104 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 105 | return {"version": dirname[len(parentdir_prefix):], 106 | "full-revisionid": None, 107 | "dirty": False, "error": None} 108 | 109 | 110 | @register_vcs_handler("git", "get_keywords") 111 | def git_get_keywords(versionfile_abs): 112 | # the code embedded in _version.py can just fetch the value of these 113 | # keywords. When used from setup.py, we don't want to import _version.py, 114 | # so we do it with a regexp instead. This function is not used from 115 | # _version.py. 116 | keywords = {} 117 | try: 118 | f = open(versionfile_abs, "r") 119 | for line in f.readlines(): 120 | if line.strip().startswith("git_refnames ="): 121 | mo = re.search(r'=\s*"(.*)"', line) 122 | if mo: 123 | keywords["refnames"] = mo.group(1) 124 | if line.strip().startswith("git_full ="): 125 | mo = re.search(r'=\s*"(.*)"', line) 126 | if mo: 127 | keywords["full"] = mo.group(1) 128 | f.close() 129 | except EnvironmentError: 130 | pass 131 | return keywords 132 | 133 | 134 | @register_vcs_handler("git", "keywords") 135 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 136 | if not keywords: 137 | raise NotThisMethod("no keywords at all, weird") 138 | refnames = keywords["refnames"].strip() 139 | if refnames.startswith("$Format"): 140 | if verbose: 141 | print("keywords are unexpanded, not using") 142 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 143 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 144 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 145 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 146 | TAG = "tag: " 147 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 148 | if not tags: 149 | # Either we're using git < 1.8.3, or there really are no tags. We use 150 | # a heuristic: assume all version tags have a digit. The old git %d 151 | # expansion behaves like git log --decorate=short and strips out the 152 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 153 | # between branches and tags. By ignoring refnames without digits, we 154 | # filter out many common branch names like "release" and 155 | # "stabilization", as well as "HEAD" and "master". 156 | tags = set([r for r in refs if re.search(r'\d', r)]) 157 | if verbose: 158 | print("discarding '%s', no digits" % ",".join(refs-tags)) 159 | if verbose: 160 | print("likely tags: %s" % ",".join(sorted(tags))) 161 | for ref in sorted(tags): 162 | # sorting will prefer e.g. "2.0" over "2.0rc1" 163 | if ref.startswith(tag_prefix): 164 | r = ref[len(tag_prefix):] 165 | if verbose: 166 | print("picking %s" % r) 167 | return {"version": r, 168 | "full-revisionid": keywords["full"].strip(), 169 | "dirty": False, "error": None 170 | } 171 | # no suitable tags, so version is "0+unknown", but full hex is still there 172 | if verbose: 173 | print("no suitable tags, using unknown + full revision id") 174 | return {"version": "0+unknown", 175 | "full-revisionid": keywords["full"].strip(), 176 | "dirty": False, "error": "no suitable tags"} 177 | 178 | 179 | @register_vcs_handler("git", "pieces_from_vcs") 180 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 181 | # this runs 'git' from the root of the source tree. This only gets called 182 | # if the git-archive 'subst' keywords were *not* expanded, and 183 | # _version.py hasn't already been rewritten with a short version string, 184 | # meaning we're inside a checked out source tree. 185 | 186 | if not os.path.exists(os.path.join(root, ".git")): 187 | if verbose: 188 | print("no .git in %s" % root) 189 | raise NotThisMethod("no .git directory") 190 | 191 | GITS = ["git"] 192 | if sys.platform == "win32": 193 | GITS = ["git.cmd", "git.exe"] 194 | # if there is a tag, this yields TAG-NUM-gHEX[-dirty] 195 | # if there are no tags, this yields HEX[-dirty] (no NUM) 196 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", 197 | "--always", "--long"], 198 | cwd=root) 199 | # --long was added in git-1.5.5 200 | if describe_out is None: 201 | raise NotThisMethod("'git describe' failed") 202 | describe_out = describe_out.strip() 203 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 204 | if full_out is None: 205 | raise NotThisMethod("'git rev-parse' failed") 206 | full_out = full_out.strip() 207 | 208 | pieces = {} 209 | pieces["long"] = full_out 210 | pieces["short"] = full_out[:7] # maybe improved later 211 | pieces["error"] = None 212 | 213 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 214 | # TAG might have hyphens. 215 | git_describe = describe_out 216 | 217 | # look for -dirty suffix 218 | dirty = git_describe.endswith("-dirty") 219 | pieces["dirty"] = dirty 220 | if dirty: 221 | git_describe = git_describe[:git_describe.rindex("-dirty")] 222 | 223 | # now we have TAG-NUM-gHEX or HEX 224 | 225 | if "-" in git_describe: 226 | # TAG-NUM-gHEX 227 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 228 | if not mo: 229 | # unparseable. Maybe git-describe is misbehaving? 230 | pieces["error"] = ("unable to parse git-describe output: '%s'" 231 | % describe_out) 232 | return pieces 233 | 234 | # tag 235 | full_tag = mo.group(1) 236 | if not full_tag.startswith(tag_prefix): 237 | if verbose: 238 | fmt = "tag '%s' doesn't start with prefix '%s'" 239 | print(fmt % (full_tag, tag_prefix)) 240 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 241 | % (full_tag, tag_prefix)) 242 | return pieces 243 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 244 | 245 | # distance: number of commits since tag 246 | pieces["distance"] = int(mo.group(2)) 247 | 248 | # commit: short hex revision ID 249 | pieces["short"] = mo.group(3) 250 | 251 | else: 252 | # HEX: no tags 253 | pieces["closest-tag"] = None 254 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], 255 | cwd=root) 256 | pieces["distance"] = int(count_out) # total number of commits 257 | 258 | return pieces 259 | 260 | 261 | def plus_or_dot(pieces): 262 | if "+" in pieces.get("closest-tag", ""): 263 | return "." 264 | return "+" 265 | 266 | 267 | def render_pep440(pieces): 268 | # now build up version string, with post-release "local version 269 | # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 270 | # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 271 | 272 | # exceptions: 273 | # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 274 | 275 | if pieces["closest-tag"]: 276 | rendered = pieces["closest-tag"] 277 | if pieces["distance"] or pieces["dirty"]: 278 | rendered += plus_or_dot(pieces) 279 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 280 | if pieces["dirty"]: 281 | rendered += ".dirty" 282 | else: 283 | # exception #1 284 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 285 | pieces["short"]) 286 | if pieces["dirty"]: 287 | rendered += ".dirty" 288 | return rendered 289 | 290 | 291 | def render_pep440_pre(pieces): 292 | # TAG[.post.devDISTANCE] . No -dirty 293 | 294 | # exceptions: 295 | # 1: no tags. 0.post.devDISTANCE 296 | 297 | if pieces["closest-tag"]: 298 | rendered = pieces["closest-tag"] 299 | if pieces["distance"]: 300 | rendered += ".post.dev%d" % pieces["distance"] 301 | else: 302 | # exception #1 303 | rendered = "0.post.dev%d" % pieces["distance"] 304 | return rendered 305 | 306 | 307 | def render_pep440_post(pieces): 308 | # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that 309 | # .dev0 sorts backwards (a dirty tree will appear "older" than the 310 | # corresponding clean one), but you shouldn't be releasing software with 311 | # -dirty anyways. 312 | 313 | # exceptions: 314 | # 1: no tags. 0.postDISTANCE[.dev0] 315 | 316 | if pieces["closest-tag"]: 317 | rendered = pieces["closest-tag"] 318 | if pieces["distance"] or pieces["dirty"]: 319 | rendered += ".post%d" % pieces["distance"] 320 | if pieces["dirty"]: 321 | rendered += ".dev0" 322 | rendered += plus_or_dot(pieces) 323 | rendered += "g%s" % pieces["short"] 324 | else: 325 | # exception #1 326 | rendered = "0.post%d" % pieces["distance"] 327 | if pieces["dirty"]: 328 | rendered += ".dev0" 329 | rendered += "+g%s" % pieces["short"] 330 | return rendered 331 | 332 | 333 | def render_pep440_old(pieces): 334 | # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. 335 | 336 | # exceptions: 337 | # 1: no tags. 0.postDISTANCE[.dev0] 338 | 339 | if pieces["closest-tag"]: 340 | rendered = pieces["closest-tag"] 341 | if pieces["distance"] or pieces["dirty"]: 342 | rendered += ".post%d" % pieces["distance"] 343 | if pieces["dirty"]: 344 | rendered += ".dev0" 345 | else: 346 | # exception #1 347 | rendered = "0.post%d" % pieces["distance"] 348 | if pieces["dirty"]: 349 | rendered += ".dev0" 350 | return rendered 351 | 352 | 353 | def render_git_describe(pieces): 354 | # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty 355 | # --always' 356 | 357 | # exceptions: 358 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 359 | 360 | if pieces["closest-tag"]: 361 | rendered = pieces["closest-tag"] 362 | if pieces["distance"]: 363 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 364 | else: 365 | # exception #1 366 | rendered = pieces["short"] 367 | if pieces["dirty"]: 368 | rendered += "-dirty" 369 | return rendered 370 | 371 | 372 | def render_git_describe_long(pieces): 373 | # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty 374 | # --always -long'. The distance/hash is unconditional. 375 | 376 | # exceptions: 377 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 378 | 379 | if pieces["closest-tag"]: 380 | rendered = pieces["closest-tag"] 381 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 382 | else: 383 | # exception #1 384 | rendered = pieces["short"] 385 | if pieces["dirty"]: 386 | rendered += "-dirty" 387 | return rendered 388 | 389 | 390 | def render(pieces, style): 391 | if pieces["error"]: 392 | return {"version": "unknown", 393 | "full-revisionid": pieces.get("long"), 394 | "dirty": None, 395 | "error": pieces["error"]} 396 | 397 | if not style or style == "default": 398 | style = "pep440" # the default 399 | 400 | if style == "pep440": 401 | rendered = render_pep440(pieces) 402 | elif style == "pep440-pre": 403 | rendered = render_pep440_pre(pieces) 404 | elif style == "pep440-post": 405 | rendered = render_pep440_post(pieces) 406 | elif style == "pep440-old": 407 | rendered = render_pep440_old(pieces) 408 | elif style == "git-describe": 409 | rendered = render_git_describe(pieces) 410 | elif style == "git-describe-long": 411 | rendered = render_git_describe_long(pieces) 412 | else: 413 | raise ValueError("unknown style '%s'" % style) 414 | 415 | return {"version": rendered, "full-revisionid": pieces["long"], 416 | "dirty": pieces["dirty"], "error": None} 417 | 418 | 419 | def get_versions(): 420 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 421 | # __file__, we can work backwards from there to the root. Some 422 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 423 | # case we can only use expanded keywords. 424 | 425 | cfg = get_config() 426 | verbose = cfg.verbose 427 | 428 | try: 429 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 430 | verbose) 431 | except NotThisMethod: 432 | pass 433 | 434 | try: 435 | root = os.path.realpath(__file__) 436 | # versionfile_source is the relative path from the top of the source 437 | # tree (where the .git directory might live) to this file. Invert 438 | # this to find the root from __file__. 439 | for i in cfg.versionfile_source.split('/'): 440 | root = os.path.dirname(root) 441 | except NameError: 442 | return {"version": "0+unknown", "full-revisionid": None, 443 | "dirty": None, 444 | "error": "unable to find root of source tree"} 445 | 446 | try: 447 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 448 | return render(pieces, cfg.style) 449 | except NotThisMethod: 450 | pass 451 | 452 | try: 453 | if cfg.parentdir_prefix: 454 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 455 | except NotThisMethod: 456 | pass 457 | 458 | return {"version": "0+unknown", "full-revisionid": None, 459 | "dirty": None, 460 | "error": "unable to compute version"} 461 | -------------------------------------------------------------------------------- /tsne/bh_sne.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | import numpy as np 3 | cimport numpy as np 4 | cimport cython 5 | 6 | cdef extern from "tsne.h": 7 | cdef cppclass TSNE: 8 | TSNE() 9 | void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, unsigned int seed) 10 | 11 | 12 | cdef class BH_SNE: 13 | cdef TSNE* thisptr # hold a C++ instance 14 | 15 | def __cinit__(self): 16 | self.thisptr = new TSNE() 17 | 18 | def __dealloc__(self): 19 | del self.thisptr 20 | 21 | @cython.boundscheck(False) 22 | @cython.wraparound(False) 23 | def run(self, X, N, D, d, perplexity, theta, seed): 24 | cdef np.ndarray[np.float64_t, ndim=2, mode='c'] _X = np.ascontiguousarray(X) 25 | cdef np.ndarray[np.float64_t, ndim=2, mode='c'] Y = np.zeros((N, d), dtype=np.float64) 26 | self.thisptr.run(&_X[0,0], N, D, &Y[0,0], d, perplexity, theta, seed) 27 | return Y 28 | -------------------------------------------------------------------------------- /tsne/bh_sne_src/Readme.txt: -------------------------------------------------------------------------------- 1 | Barnes-Hut-SNE v0.1 2 | ------------------------------------------- 3 | © Laurens van der Maaten 4 | Delft University of Technology, 2012 5 | =========================================== 6 | 7 | 8 | DESCRIPTION 9 | 10 | This code contains a C++ implementation of Barnes-Hut-SNE as described in the corresponding paper. Please cite this paper whenever you use this code. 11 | 12 | The code also contains a Matlab wrapper for the C++ code (fast_tsne.m). Please refer to the help text in fast_tsne.m for more information on the input format for the data. Based on the code in fast_tsne.m, it is straightforward to develop wrappers in other programming languages. 13 | 14 | 15 | COMPILATION 16 | 17 | Compilation of the files is relatively straightforward, but requires a working installation of CBLAS. Please refer to the compile_mac and compile_linux shell scripts to see the required compilation command. Note that paths may be different on your machine, so may have to be changed in order for the files to compile. 18 | 19 | 20 | LEGAL 21 | 22 | You are free to use, modify, or redistribute this software in any way you want, but only for non-commercial purposes. The use of the software is at your own risk; the authors are not responsible for any damage as a result from errors in the software. 23 | 24 | 25 | CONTACT 26 | If you encounter problems with the implementations or have questions about Barnes-Hut-SNE, make sure you read the paper and the online FAQ first! If your question is not answered afterwards, feel free to send me an email at: lvdmaaten@gmail.com 27 | -------------------------------------------------------------------------------- /tsne/bh_sne_src/compile_linux: -------------------------------------------------------------------------------- 1 | echo "Make sure to change the path to CBLAS in this file before running it!" 2 | g++ quadtree.cpp tsne.cpp -o bh_tsne -O3 -I./CBLAS/include -L./ -lcblas -------------------------------------------------------------------------------- /tsne/bh_sne_src/compile_mac: -------------------------------------------------------------------------------- 1 | echo "Make sure to change the path to CBLAS in this file before running it!" 2 | g++ quadtree.cpp tsne.cpp -o bh_tsne -O3 -I/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers -lcblas 3 | -------------------------------------------------------------------------------- /tsne/bh_sne_src/fast_tsne.m: -------------------------------------------------------------------------------- 1 | function mappedX = fast_tsne(X, initial_dims, perplexity, theta) 2 | %FAST_TSNE Runs the (landmark) C++ implementation of t-SNE 3 | % 4 | % mappedX = fast_tsne(X, initial_dims, perplexity, theta) 5 | % 6 | % Runs the C++ implementation of Barnes-Hut-SNE. The high-dimensional 7 | % datapoints are specified in the NxD matrix X. The dimensionality of the 8 | % datapoints is reduced to initial_dims dimensions using PCA (default = 50) 9 | % before t-SNE is performed. Next, t-SNE reduces the points to two 10 | % dimensions. The perplexity of the input similarities may be specified 11 | % through the perplexity variable (default = 30). The variable theta sets 12 | % the trade-off parameter between speed and accuracy: theta = 0 corresponds 13 | % to standard, slow t-SNE, while theta = 1 makes very crude approximations. 14 | % Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). 15 | % The function returns the two-dimensional data points in mappedX. 16 | % 17 | % NOTE: The function is designed to run on large (N > 5000) data sets. It 18 | % may give poor performance on very small data sets (it is better to use a 19 | % standard t-SNE implementation on such data). 20 | % 21 | % 22 | % (C) Laurens van der Maaten 23 | % Delft University of Technology, 2012 24 | 25 | 26 | if ~exist('initial_dims', 'var') || isempty(initial_dims) 27 | initial_dims = 50; 28 | end 29 | if ~exist('perplexity', 'var') 30 | perplexity = 30; 31 | end 32 | if ~exist('theta', 'var') 33 | theta = 0.5; 34 | end 35 | 36 | % Perform the initial dimensionality reduction using PCA 37 | X = double(X); 38 | X = bsxfun(@minus, X, mean(X, 1)); 39 | covX = X' * X; 40 | [M, lambda] = eig(covX); 41 | [~, ind] = sort(diag(lambda), 'descend'); 42 | if initial_dims > size(M, 2) 43 | initial_dims = size(M, 2); 44 | end 45 | M = M(:,ind(1:initial_dims)); 46 | X = X * M; 47 | clear covX M lambda 48 | 49 | % Run the fast diffusion SNE implementation 50 | write_data(X, theta, perplexity); 51 | tic, system('./bh_tsne'); toc 52 | [mappedX, landmarks, costs] = read_data; 53 | landmarks = landmarks + 1; % correct for Matlab indexing 54 | delete('data.dat'); 55 | delete('result.dat'); 56 | end 57 | 58 | 59 | % Writes the datafile for the fast t-SNE implementation 60 | function write_data(X, theta, perplexity) 61 | [n, d] = size(X); 62 | h = fopen('data.dat', 'wb'); 63 | fwrite(h, n, 'integer*4'); 64 | fwrite(h, d, 'integer*4'); 65 | fwrite(h, theta, 'double'); 66 | fwrite(h, perplexity, 'double'); 67 | fwrite(h, X', 'double'); 68 | fclose(h); 69 | end 70 | 71 | 72 | % Reads the result file from the fast t-SNE implementation 73 | function [X, landmarks, costs] = read_data 74 | h = fopen('result.dat', 'rb'); 75 | n = fread(h, 1, 'integer*4'); 76 | d = fread(h, 1, 'integer*4'); 77 | X = fread(h, n * d, 'double'); 78 | landmarks = fread(h, n, 'integer*4'); 79 | landmarks = landmarks + 1; 80 | costs = fread(h, n, 'double'); % this vector contains only zeros 81 | X = reshape(X, [d n])'; 82 | fclose(h); 83 | end 84 | -------------------------------------------------------------------------------- /tsne/bh_sne_src/quadtree.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * quadtree.cpp 3 | * Implementation of a quadtree in two dimensions + Barnes-Hut algorithm for t-SNE. 4 | * 5 | * Created by Laurens van der Maaten. 6 | * Copyright 2012, Delft University of Technology. All rights reserved. 7 | * 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "quadtree.h" 16 | 17 | 18 | 19 | // Checks whether a point lies in a cell 20 | bool Cell::containsPoint(double point[]) 21 | { 22 | if(x - hw > point[0]) return false; 23 | if(x + hw < point[0]) return false; 24 | if(y - hh > point[1]) return false; 25 | if(y + hh < point[1]) return false; 26 | return true; 27 | } 28 | 29 | 30 | // Default constructor for quadtree -- build tree, too! 31 | QuadTree::QuadTree(double* inp_data, int N) 32 | { 33 | 34 | // Compute mean, width, and height of current map (boundaries of quadtree) 35 | double* mean_Y = new double[QT_NO_DIMS]; for(int d = 0; d < QT_NO_DIMS; d++) mean_Y[d] = .0; 36 | double* min_Y = new double[QT_NO_DIMS]; for(int d = 0; d < QT_NO_DIMS; d++) min_Y[d] = DBL_MAX; 37 | double* max_Y = new double[QT_NO_DIMS]; for(int d = 0; d < QT_NO_DIMS; d++) max_Y[d] = -DBL_MAX; 38 | for(int n = 0; n < N; n++) { 39 | for(int d = 0; d < QT_NO_DIMS; d++) { 40 | mean_Y[d] += inp_data[n * QT_NO_DIMS + d]; 41 | if(inp_data[n * QT_NO_DIMS + d] < min_Y[d]) min_Y[d] = inp_data[n * QT_NO_DIMS + d]; 42 | if(inp_data[n * QT_NO_DIMS + d] > max_Y[d]) max_Y[d] = inp_data[n * QT_NO_DIMS + d]; 43 | } 44 | } 45 | for(int d = 0; d < QT_NO_DIMS; d++) mean_Y[d] /= (double) N; 46 | 47 | // Construct quadtree 48 | init(NULL, inp_data, mean_Y[0], mean_Y[1], max(max_Y[0] - mean_Y[0], mean_Y[0] - min_Y[0]) + 1e-5, 49 | max(max_Y[1] - mean_Y[1], mean_Y[1] - min_Y[1]) + 1e-5); 50 | fill(N); 51 | delete[] mean_Y; delete[] max_Y; delete[] min_Y; 52 | } 53 | 54 | 55 | // Constructor for quadtree with particular size and parent -- build the tree, too! 56 | QuadTree::QuadTree(double* inp_data, int N, double inp_x, double inp_y, double inp_hw, double inp_hh) 57 | { 58 | init(NULL, inp_data, inp_x, inp_y, inp_hw, inp_hh); 59 | fill(N); 60 | } 61 | 62 | // Constructor for quadtree with particular size and parent -- build the tree, too! 63 | QuadTree::QuadTree(QuadTree* inp_parent, double* inp_data, int N, double inp_x, double inp_y, double inp_hw, double inp_hh) 64 | { 65 | init(inp_parent, inp_data, inp_x, inp_y, inp_hw, inp_hh); 66 | fill(N); 67 | } 68 | 69 | 70 | // Constructor for quadtree with particular size (do not fill the tree) 71 | QuadTree::QuadTree(double* inp_data, double inp_x, double inp_y, double inp_hw, double inp_hh) 72 | { 73 | init(NULL, inp_data, inp_x, inp_y, inp_hw, inp_hh); 74 | } 75 | 76 | 77 | // Constructor for quadtree with particular size and parent (do not fill the tree) 78 | QuadTree::QuadTree(QuadTree* inp_parent, double* inp_data, double inp_x, double inp_y, double inp_hw, double inp_hh) 79 | { 80 | init(inp_parent, inp_data, inp_x, inp_y, inp_hw, inp_hh); 81 | } 82 | 83 | 84 | // Main initialization function 85 | void QuadTree::init(QuadTree* inp_parent, double* inp_data, double inp_x, double inp_y, double inp_hw, double inp_hh) 86 | { 87 | parent = inp_parent; 88 | data = inp_data; 89 | is_leaf = true; 90 | size = 0; 91 | cum_size = 0; 92 | boundary.x = inp_x; 93 | boundary.y = inp_y; 94 | boundary.hw = inp_hw; 95 | boundary.hh = inp_hh; 96 | northWest = NULL; 97 | northEast = NULL; 98 | southWest = NULL; 99 | southEast = NULL; 100 | for(int i = 0; i < QT_NO_DIMS; i++) center_of_mass[i] = .0; 101 | } 102 | 103 | 104 | // Destructor for quadtree 105 | QuadTree::~QuadTree() 106 | { 107 | delete northWest; 108 | delete northEast; 109 | delete southWest; 110 | delete southEast; 111 | } 112 | 113 | 114 | // Update the data underlying this tree 115 | void QuadTree::setData(double* inp_data) 116 | { 117 | data = inp_data; 118 | } 119 | 120 | 121 | // Get the parent of the current tree 122 | QuadTree* QuadTree::getParent() 123 | { 124 | return parent; 125 | } 126 | 127 | 128 | // Insert a point into the QuadTree 129 | bool QuadTree::insert(int new_index) 130 | { 131 | // Ignore objects which do not belong in this quad tree 132 | double* point = data + new_index * QT_NO_DIMS; 133 | if(!boundary.containsPoint(point)) 134 | return false; 135 | 136 | // Online update of cumulative size and center-of-mass 137 | cum_size++; 138 | double mult1 = (double) (cum_size - 1) / (double) cum_size; 139 | double mult2 = 1.0 / (double) cum_size; 140 | for(int d = 0; d < QT_NO_DIMS; d++) center_of_mass[d] *= mult1; 141 | for(int d = 0; d < QT_NO_DIMS; d++) center_of_mass[d] += mult2 * point[d]; 142 | 143 | // If there is space in this quad tree and it is a leaf, add the object here 144 | if(is_leaf && size < QT_NODE_CAPACITY) { 145 | index[size] = new_index; 146 | size++; 147 | return true; 148 | } 149 | 150 | // Don't add duplicates for now (this is not very nice) 151 | bool any_duplicate = false; 152 | for(int n = 0; n < size; n++) { 153 | bool duplicate = true; 154 | for(int d = 0; d < QT_NO_DIMS; d++) { 155 | if(point[d] != data[index[n] * QT_NO_DIMS + d]) { duplicate = false; break; } 156 | } 157 | any_duplicate = any_duplicate | duplicate; 158 | } 159 | if(any_duplicate) return true; 160 | 161 | // Otherwise, we need to subdivide the current cell 162 | if(is_leaf) subdivide(); 163 | 164 | // Find out where the point can be inserted 165 | if(northWest->insert(new_index)) return true; 166 | if(northEast->insert(new_index)) return true; 167 | if(southWest->insert(new_index)) return true; 168 | if(southEast->insert(new_index)) return true; 169 | 170 | // Otherwise, the point cannot be inserted (this should never happen) 171 | return false; 172 | } 173 | 174 | 175 | // Create four children which fully divide this cell into four quads of equal area 176 | void QuadTree::subdivide() { 177 | 178 | // Create four children 179 | northWest = new QuadTree(this, data, boundary.x - .5 * boundary.hw, boundary.y - .5 * boundary.hh, .5 * boundary.hw, .5 * boundary.hh); 180 | northEast = new QuadTree(this, data, boundary.x + .5 * boundary.hw, boundary.y - .5 * boundary.hh, .5 * boundary.hw, .5 * boundary.hh); 181 | southWest = new QuadTree(this, data, boundary.x - .5 * boundary.hw, boundary.y + .5 * boundary.hh, .5 * boundary.hw, .5 * boundary.hh); 182 | southEast = new QuadTree(this, data, boundary.x + .5 * boundary.hw, boundary.y + .5 * boundary.hh, .5 * boundary.hw, .5 * boundary.hh); 183 | 184 | // Move existing points to correct children 185 | for(int i = 0; i < size; i++) { 186 | bool success = false; 187 | if(!success) success = northWest->insert(index[i]); 188 | if(!success) success = northEast->insert(index[i]); 189 | if(!success) success = southWest->insert(index[i]); 190 | if(!success) success = southEast->insert(index[i]); 191 | index[i] = -1; 192 | } 193 | 194 | // Empty parent node 195 | size = 0; 196 | is_leaf = false; 197 | } 198 | 199 | 200 | // Build quadtree on dataset 201 | void QuadTree::fill(int N) 202 | { 203 | for(int i = 0; i < N; i++) insert(i); 204 | } 205 | 206 | 207 | // Checks whether the specified tree is correct 208 | bool QuadTree::isCorrect() 209 | { 210 | for(int n = 0; n < size; n++) { 211 | double* point = data + index[n] * QT_NO_DIMS; 212 | if(!boundary.containsPoint(point)) return false; 213 | } 214 | if(!is_leaf) return northWest->isCorrect() && 215 | northEast->isCorrect() && 216 | southWest->isCorrect() && 217 | southEast->isCorrect(); 218 | else return true; 219 | } 220 | 221 | 222 | // Rebuilds a possibly incorrect tree (LAURENS: This function is not tested yet!) 223 | void QuadTree::rebuildTree() 224 | { 225 | for(int n = 0; n < size; n++) { 226 | 227 | // Check whether point is erroneous 228 | double* point = data + index[n] * QT_NO_DIMS; 229 | if(!boundary.containsPoint(point)) { 230 | 231 | // Remove erroneous point 232 | int rem_index = index[n]; 233 | for(int m = n + 1; m < size; m++) index[m - 1] = index[m]; 234 | index[size - 1] = -1; 235 | size--; 236 | 237 | // Update center-of-mass and counter in all parents 238 | bool done = false; 239 | QuadTree* node = this; 240 | while(!done) { 241 | for(int d = 0; d < QT_NO_DIMS; d++) { 242 | node->center_of_mass[d] = ((double) node->cum_size * node->center_of_mass[d] - point[d]) / (double) (node->cum_size - 1); 243 | } 244 | node->cum_size--; 245 | if(node->getParent() == NULL) done = true; 246 | else node = node->getParent(); 247 | } 248 | 249 | // Reinsert point in the root tree 250 | node->insert(rem_index); 251 | } 252 | } 253 | 254 | // Rebuild lower parts of the tree 255 | northWest->rebuildTree(); 256 | northEast->rebuildTree(); 257 | southWest->rebuildTree(); 258 | southEast->rebuildTree(); 259 | } 260 | 261 | 262 | // Build a list of all indices in quadtree 263 | void QuadTree::getAllIndices(int* indices) 264 | { 265 | getAllIndices(indices, 0); 266 | } 267 | 268 | 269 | // Build a list of all indices in quadtree 270 | int QuadTree::getAllIndices(int* indices, int loc) 271 | { 272 | 273 | // Gather indices in current quadrant 274 | for(int i = 0; i < size; i++) indices[loc + i] = index[i]; 275 | loc += size; 276 | 277 | // Gather indices in children 278 | if(!is_leaf) { 279 | loc = northWest->getAllIndices(indices, loc); 280 | loc = northEast->getAllIndices(indices, loc); 281 | loc = southWest->getAllIndices(indices, loc); 282 | loc = southEast->getAllIndices(indices, loc); 283 | } 284 | return loc; 285 | } 286 | 287 | 288 | int QuadTree::getDepth() { 289 | if(is_leaf) return 1; 290 | return 1 + max(max(northWest->getDepth(), 291 | northEast->getDepth()), 292 | max(southWest->getDepth(), 293 | southEast->getDepth())); 294 | 295 | } 296 | 297 | 298 | // Compute non-edge forces using Barnes-Hut algorithm 299 | void QuadTree::computeNonEdgeForces(int point_index, double theta, double neg_f[], double* sum_Q) 300 | { 301 | 302 | // Make sure that we spend no time on empty nodes or self-interactions 303 | if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return; 304 | 305 | // Compute distance between point and center-of-mass 306 | double D = .0; 307 | int ind = point_index * QT_NO_DIMS; 308 | for(int d = 0; d < QT_NO_DIMS; d++) buff[d] = data[ind + d]; 309 | for(int d = 0; d < QT_NO_DIMS; d++) buff[d] -= center_of_mass[d]; 310 | for(int d = 0; d < QT_NO_DIMS; d++) D += buff[d] * buff[d]; 311 | 312 | // Check whether we can use this node as a "summary" 313 | if(is_leaf || max(boundary.hh, boundary.hw) / sqrt(D) < theta) { 314 | 315 | // Compute and add t-SNE force between point and current node 316 | double Q = 1.0 / (1.0 + D); 317 | *sum_Q += cum_size * Q; 318 | double mult = cum_size * Q * Q; 319 | for(int d = 0; d < QT_NO_DIMS; d++) neg_f[d] += mult * buff[d]; 320 | } 321 | else { 322 | 323 | // Recursively apply Barnes-Hut to children 324 | northWest->computeNonEdgeForces(point_index, theta, neg_f, sum_Q); 325 | northEast->computeNonEdgeForces(point_index, theta, neg_f, sum_Q); 326 | southWest->computeNonEdgeForces(point_index, theta, neg_f, sum_Q); 327 | southEast->computeNonEdgeForces(point_index, theta, neg_f, sum_Q); 328 | } 329 | } 330 | 331 | 332 | // Computes edge forces 333 | void QuadTree::computeEdgeForces(int* row_P, int* col_P, double* val_P, int N, double* pos_f) 334 | { 335 | 336 | // Loop over all edges in the graph 337 | int ind1, ind2; 338 | double D; 339 | for(int n = 0; n < N; n++) { 340 | ind1 = n * QT_NO_DIMS; 341 | for(int i = row_P[n]; i < row_P[n + 1]; i++) { 342 | 343 | // Compute pairwise distance and Q-value 344 | D = .0; 345 | ind2 = col_P[i] * QT_NO_DIMS; 346 | for(int d = 0; d < QT_NO_DIMS; d++) buff[d] = data[ind1 + d]; 347 | for(int d = 0; d < QT_NO_DIMS; d++) buff[d] -= data[ind2 + d]; 348 | for(int d = 0; d < QT_NO_DIMS; d++) D += buff[d] * buff[d]; 349 | D = val_P[i] / (1.0 + D); 350 | 351 | // Sum positive force 352 | for(int d = 0; d < QT_NO_DIMS; d++) pos_f[ind1 + d] += D * buff[d]; 353 | } 354 | } 355 | } 356 | 357 | 358 | // Print out tree 359 | void QuadTree::print() 360 | { 361 | if(cum_size == 0) { 362 | printf("Empty node\n"); 363 | return; 364 | } 365 | 366 | if(is_leaf) { 367 | printf("Leaf node; data = ["); 368 | for(int i = 0; i < size; i++) { 369 | double* point = data + index[i] * QT_NO_DIMS; 370 | for(int d = 0; d < QT_NO_DIMS; d++) printf("%f, ", point[d]); 371 | printf(" (index = %d)", index[i]); 372 | if(i < size - 1) printf("\n"); 373 | else printf("]\n"); 374 | } 375 | } 376 | else { 377 | printf("Intersection node with center-of-mass = ["); 378 | for(int d = 0; d < QT_NO_DIMS; d++) printf("%f, ", center_of_mass[d]); 379 | printf("]; children are:\n"); 380 | northEast->print(); 381 | northWest->print(); 382 | southEast->print(); 383 | southWest->print(); 384 | } 385 | } 386 | 387 | -------------------------------------------------------------------------------- /tsne/bh_sne_src/quadtree.h: -------------------------------------------------------------------------------- 1 | /* 2 | * quadtree.h 3 | * Header file for a quadtree. 4 | * 5 | * Created by Laurens van der Maaten. 6 | * Copyright 2012, Delft University of Technology. All rights reserved. 7 | * 8 | */ 9 | 10 | #ifndef QUADTREE_H 11 | #define QUADTREE_H 12 | 13 | using namespace std; 14 | 15 | static inline double min(double x, double y) { return (x <= y ? x : y); } 16 | static inline double max(double x, double y) { return (x <= y ? y : x); } 17 | static inline double abs(double x) { return (x < .0 ? -x : x); } 18 | 19 | class Cell { 20 | 21 | public: 22 | double x; 23 | double y; 24 | double hw; 25 | double hh; 26 | bool containsPoint(double point[]); 27 | }; 28 | 29 | 30 | class QuadTree 31 | { 32 | 33 | // Fixed constants 34 | static const int QT_NO_DIMS = 2; 35 | static const int QT_NODE_CAPACITY = 1; 36 | 37 | // A buffer we use when doing force computations 38 | double buff[QT_NO_DIMS]; 39 | 40 | // Properties of this node in the tree 41 | QuadTree* parent; 42 | bool is_leaf; 43 | int size; 44 | int cum_size; 45 | 46 | // Axis-aligned bounding box stored as a center with half-dimensions to represent the boundaries of this quad tree 47 | Cell boundary; 48 | 49 | // Indices in this quad tree node, corresponding center-of-mass, and list of all children 50 | double* data; 51 | double center_of_mass[QT_NO_DIMS]; 52 | int index[QT_NODE_CAPACITY]; 53 | 54 | // Children 55 | QuadTree* northWest; 56 | QuadTree* northEast; 57 | QuadTree* southWest; 58 | QuadTree* southEast; 59 | 60 | public: 61 | QuadTree(double* inp_data, int N); 62 | QuadTree(double* inp_data, double inp_x, double inp_y, double inp_hw, double inp_hh); 63 | QuadTree(double* inp_data, int N, double inp_x, double inp_y, double inp_hw, double inp_hh); 64 | QuadTree(QuadTree* inp_parent, double* inp_data, int N, double inp_x, double inp_y, double inp_hw, double inp_hh); 65 | QuadTree(QuadTree* inp_parent, double* inp_data, double inp_x, double inp_y, double inp_hw, double inp_hh); 66 | ~QuadTree(); 67 | void setData(double* inp_data); 68 | QuadTree* getParent(); 69 | void construct(Cell boundary); 70 | bool insert(int new_index); 71 | void subdivide(); 72 | bool isCorrect(); 73 | void rebuildTree(); 74 | void getAllIndices(int* indices); 75 | int getDepth(); 76 | void computeNonEdgeForces(int point_index, double theta, double neg_f[], double* sum_Q); 77 | void computeEdgeForces(int* row_P, int* col_P, double* val_P, int N, double* pos_f); 78 | void print(); 79 | 80 | private: 81 | void init(QuadTree* inp_parent, double* inp_data, double inp_x, double inp_y, double inp_hw, double inp_hh); 82 | void fill(int N); 83 | int getAllIndices(int* indices, int loc); 84 | bool isChild(int test_index, int start, int end); 85 | }; 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /tsne/bh_sne_src/tsne.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * tsne.cpp 3 | * Implementation of both standard and Barnes-Hut-SNE. 4 | * 5 | * Created by Laurens van der Maaten. 6 | * Copyright 2012, Delft University of Technology. All rights reserved. 7 | * 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "quadtree.h" 17 | #include "vptree.h" 18 | #include "tsne.h" 19 | 20 | extern "C" { 21 | #include 22 | } 23 | 24 | 25 | using namespace std; 26 | 27 | // Perform t-SNE 28 | void TSNE::run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, unsigned int seed) { 29 | // Initalize the pseudorandom number generator 30 | srand(seed); 31 | 32 | // Determine whether we are using an exact algorithm 33 | if(N - 1 < 3 * perplexity) { printf("Perplexity too large for the number of data points!\n"); exit(1); } 34 | printf("Using no_dims = %d, perplexity = %f, theta = %f, seed=%d\n", no_dims, perplexity, theta, seed); 35 | bool exact = (theta == .0) ? true : false; 36 | 37 | // Set learning parameters 38 | float total_time = .0; 39 | clock_t start, end; 40 | int max_iter = 1000, stop_lying_iter = 250, mom_switch_iter = 250; 41 | double momentum = .5, final_momentum = .8; 42 | double eta = 200.0; 43 | 44 | // Allocate some memory 45 | double* dY = (double*) malloc(N * no_dims * sizeof(double)); 46 | double* uY = (double*) malloc(N * no_dims * sizeof(double)); 47 | double* gains = (double*) malloc(N * no_dims * sizeof(double)); 48 | if(dY == NULL || uY == NULL || gains == NULL) { printf("Memory allocation failed!\n"); exit(1); } 49 | for(int i = 0; i < N * no_dims; i++) uY[i] = .0; 50 | for(int i = 0; i < N * no_dims; i++) gains[i] = 1.0; 51 | 52 | // Normalize input data (to prevent numerical problems) 53 | printf("Computing input similarities...\n"); 54 | 55 | start = clock(); 56 | zeroMean(X, N, D); 57 | double max_X = .0; 58 | for(int i = 0; i < N * D; i++) { 59 | if(X[i] > max_X) max_X = X[i]; 60 | } 61 | 62 | for(int i = 0; i < N * D; i++) X[i] /= max_X; 63 | 64 | // Compute input similarities for exact t-SNE 65 | double* P; int* row_P; int* col_P; double* val_P; 66 | if(exact) { 67 | 68 | // Compute similarities 69 | P = (double*) malloc(N * N * sizeof(double)); 70 | if(P == NULL) { printf("Memory allocation failed!\n"); exit(1); } 71 | computeGaussianPerplexity(X, N, D, P, perplexity); 72 | 73 | // Symmetrize input similarities 74 | printf("Symmetrizing...\n"); 75 | for(int n = 0; n < N; n++) { 76 | for(int m = n + 1; m < N; m++) { 77 | P[n * N + m] += P[m * N + n]; 78 | P[m * N + n] = P[n * N + m]; 79 | } 80 | } 81 | double sum_P = .0; 82 | for(int i = 0; i < N * N; i++) sum_P += P[i]; 83 | for(int i = 0; i < N * N; i++) P[i] /= sum_P; 84 | } 85 | 86 | // Compute input similarities for approximate t-SNE 87 | else { 88 | 89 | // Compute asymmetric pairwise input similarities 90 | computeGaussianPerplexity(X, N, D, &row_P, &col_P, &val_P, perplexity, (int) (3 * perplexity)); 91 | 92 | // Symmetrize input similarities 93 | symmetrizeMatrix(&row_P, &col_P, &val_P, N); 94 | double sum_P = .0; 95 | for(int i = 0; i < row_P[N]; i++) sum_P += val_P[i]; 96 | for(int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P; 97 | } 98 | end = clock(); 99 | 100 | // Lie about the P-values 101 | if(exact) { for(int i = 0; i < N * N; i++) P[i] *= 12.0; } 102 | else { for(int i = 0; i < row_P[N]; i++) val_P[i] *= 12.0; } 103 | 104 | // Initialize solution (randomly) 105 | for(int i = 0; i < N * no_dims; i++) Y[i] = randn() * .0001; 106 | 107 | // Perform main training loop 108 | if(exact) printf("Done in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC); 109 | else printf("Done in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N)); 110 | start = clock(); 111 | for(int iter = 0; iter < max_iter; iter++) { 112 | 113 | // Compute (approximate) gradient 114 | if(exact) computeExactGradient(P, Y, N, no_dims, dY); 115 | else computeGradient(P, row_P, col_P, val_P, Y, N, no_dims, dY, theta); 116 | 117 | // Update gains 118 | for(int i = 0; i < N * no_dims; i++) gains[i] = (sign(dY[i]) != sign(uY[i])) ? (gains[i] + .2) : (gains[i] * .8); 119 | for(int i = 0; i < N * no_dims; i++) if(gains[i] < .01) gains[i] = .01; 120 | 121 | // Perform gradient update (with momentum and gains) 122 | for(int i = 0; i < N * no_dims; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i]; 123 | for(int i = 0; i < N * no_dims; i++) Y[i] = Y[i] + uY[i]; 124 | 125 | // Make solution zero-mean 126 | zeroMean(Y, N, no_dims); 127 | 128 | // Stop lying about the P-values after a while, and switch momentum 129 | if(iter == stop_lying_iter) { 130 | if(exact) { for(int i = 0; i < N * N; i++) P[i] /= 12.0; } 131 | else { for(int i = 0; i < row_P[N]; i++) val_P[i] /= 12.0; } 132 | } 133 | if(iter == mom_switch_iter) momentum = final_momentum; 134 | 135 | // Print out progress 136 | if(iter > 0 && iter % 50 == 0 || iter == max_iter - 1) { 137 | end = clock(); 138 | double C = .0; 139 | if(exact) C = evaluateError(P, Y, N); 140 | else C = evaluateError(row_P, col_P, val_P, Y, N, theta); // doing approximate computation here! 141 | if(iter == 0) 142 | printf("Iteration %d: error is %f\n", iter + 1, C); 143 | else { 144 | total_time += (float) (end - start) / CLOCKS_PER_SEC; 145 | printf("Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter, C, (float) (end - start) / CLOCKS_PER_SEC); 146 | } 147 | start = clock(); 148 | } 149 | } 150 | end = clock(); total_time += (float) (end - start) / CLOCKS_PER_SEC; 151 | 152 | // Clean up memory 153 | free(dY); 154 | free(uY); 155 | free(gains); 156 | if(exact) free(P); 157 | else { 158 | free(row_P); row_P = NULL; 159 | free(col_P); col_P = NULL; 160 | free(val_P); val_P = NULL; 161 | } 162 | printf("Fitting performed in %4.2f seconds.\n", total_time); 163 | } 164 | 165 | 166 | // Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm) 167 | void TSNE::computeGradient(double* P, int* inp_row_P, int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta) 168 | { 169 | 170 | // Construct quadtree on current map 171 | QuadTree* tree = new QuadTree(Y, N); 172 | 173 | // Compute all terms required for t-SNE gradient 174 | double sum_Q = .0; 175 | double* pos_f = (double*) calloc(N * D, sizeof(double)); 176 | double* neg_f = (double*) calloc(N * D, sizeof(double)); 177 | if(pos_f == NULL || neg_f == NULL) { printf("Memory allocation failed!\n"); exit(1); } 178 | tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f); 179 | for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, neg_f + n * D, &sum_Q); 180 | 181 | // Compute final t-SNE gradient 182 | for(int i = 0; i < N * D; i++) { 183 | dC[i] = pos_f[i] - (neg_f[i] / sum_Q); 184 | } 185 | free(pos_f); 186 | free(neg_f); 187 | delete tree; 188 | } 189 | 190 | // Compute gradient of the t-SNE cost function (exact) 191 | void TSNE::computeExactGradient(double* P, double* Y, int N, int D, double* dC) { 192 | 193 | // Make sure the current gradient contains zeros 194 | for(int i = 0; i < N * D; i++) dC[i] = 0.0; 195 | 196 | // Compute the squared Euclidean distance matrix 197 | double* DD = (double*) malloc(N * N * sizeof(double)); 198 | if(DD == NULL) { printf("Memory allocation failed!\n"); exit(1); } 199 | computeSquaredEuclideanDistance(Y, N, D, DD); 200 | 201 | // Compute Q-matrix and normalization sum 202 | double* Q = (double*) malloc(N * N * sizeof(double)); 203 | if(Q == NULL) { printf("Memory allocation failed!\n"); exit(1); } 204 | double sum_Q = .0; 205 | for(int n = 0; n < N; n++) { 206 | for(int m = 0; m < N; m++) { 207 | if(n != m) { 208 | Q[n * N + m] = 1 / (1 + DD[n * N + m]); 209 | sum_Q += Q[n * N + m]; 210 | } 211 | } 212 | } 213 | 214 | // Perform the computation of the gradient 215 | for(int n = 0; n < N; n++) { 216 | for(int m = 0; m < N; m++) { 217 | if(n != m) { 218 | double mult = (P[n * N + m] - (Q[n * N + m] / sum_Q)) * Q[n * N + m]; 219 | for(int d = 0; d < D; d++) { 220 | dC[n * D + d] += (Y[n * D + d] - Y[m * D + d]) * mult; 221 | } 222 | } 223 | } 224 | } 225 | 226 | // Free memory 227 | free(DD); DD = NULL; 228 | free(Q); Q = NULL; 229 | } 230 | 231 | 232 | // Evaluate t-SNE cost function (exactly) 233 | double TSNE::evaluateError(double* P, double* Y, int N) { 234 | 235 | // Compute the squared Euclidean distance matrix 236 | double* DD = (double*) malloc(N * N * sizeof(double)); 237 | double* Q = (double*) malloc(N * N * sizeof(double)); 238 | if(DD == NULL || Q == NULL) { printf("Memory allocation failed!\n"); exit(1); } 239 | computeSquaredEuclideanDistance(Y, N, 2, DD); 240 | 241 | // Compute Q-matrix and normalization sum 242 | double sum_Q = DBL_MIN; 243 | for(int n = 0; n < N; n++) { 244 | for(int m = 0; m < N; m++) { 245 | if(n != m) { 246 | Q[n * N + m] = 1 / (1 + DD[n * N + m]); 247 | sum_Q += Q[n * N + m]; 248 | } 249 | else Q[n * N + m] = DBL_MIN; 250 | } 251 | } 252 | for(int i = 0; i < N * N; i++) Q[i] /= sum_Q; 253 | 254 | // Sum t-SNE error 255 | double C = .0; 256 | for(int n = 0; n < N; n++) { 257 | for(int m = 0; m < N; m++) { 258 | C += P[n * N + m] * log((P[n * N + m] + 1e-9) / (Q[n * N + m] + 1e-9)); 259 | } 260 | } 261 | 262 | // Clean up memory 263 | free(DD); 264 | free(Q); 265 | return C; 266 | } 267 | 268 | // Evaluate t-SNE cost function (approximately) 269 | double TSNE::evaluateError(int* row_P, int* col_P, double* val_P, double* Y, int N, double theta) 270 | { 271 | 272 | // Get estimate of normalization term 273 | const int QT_NO_DIMS = 2; 274 | QuadTree* tree = new QuadTree(Y, N); 275 | double buff[QT_NO_DIMS] = {.0, .0}; 276 | double sum_Q = .0; 277 | for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, buff, &sum_Q); 278 | 279 | // Loop over all edges to compute t-SNE error 280 | int ind1, ind2; 281 | double C = .0, Q; 282 | for(int n = 0; n < N; n++) { 283 | ind1 = n * QT_NO_DIMS; 284 | for(int i = row_P[n]; i < row_P[n + 1]; i++) { 285 | Q = .0; 286 | ind2 = col_P[i] * QT_NO_DIMS; 287 | for(int d = 0; d < QT_NO_DIMS; d++) buff[d] = Y[ind1 + d]; 288 | for(int d = 0; d < QT_NO_DIMS; d++) buff[d] -= Y[ind2 + d]; 289 | for(int d = 0; d < QT_NO_DIMS; d++) Q += buff[d] * buff[d]; 290 | Q = (1.0 / (1.0 + Q)) / sum_Q; 291 | C += val_P[i] * log((val_P[i] + FLT_MIN) / (Q + FLT_MIN)); 292 | } 293 | } 294 | return C; 295 | } 296 | 297 | 298 | // Compute input similarities with a fixed perplexity 299 | void TSNE::computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity) { 300 | 301 | // Compute the squared Euclidean distance matrix 302 | double* DD = (double*) malloc(N * N * sizeof(double)); 303 | if(DD == NULL) { printf("Memory allocation failed!\n"); exit(1); } 304 | computeSquaredEuclideanDistance(X, N, D, DD); 305 | 306 | // Compute the Gaussian kernel row by row 307 | for(int n = 0; n < N; n++) { 308 | 309 | // Initialize some variables 310 | bool found = false; 311 | double beta = 1.0; 312 | double min_beta = -DBL_MAX; 313 | double max_beta = DBL_MAX; 314 | double tol = 1e-5; 315 | double sum_P; 316 | 317 | // Iterate until we found a good perplexity 318 | int iter = 0; 319 | while(!found && iter < 200) { 320 | 321 | // Compute Gaussian kernel row 322 | for(int m = 0; m < N; m++) P[n * N + m] = exp(-beta * DD[n * N + m]); 323 | P[n * N + n] = DBL_MIN; 324 | 325 | // Compute entropy of current row 326 | sum_P = DBL_MIN; 327 | for(int m = 0; m < N; m++) sum_P += P[n * N + m]; 328 | double H = 0.0; 329 | for(int m = 0; m < N; m++) H += beta * (DD[n * N + m] * P[n * N + m]); 330 | H = (H / sum_P) + log(sum_P); 331 | 332 | // Evaluate whether the entropy is within the tolerance level 333 | double Hdiff = H - log(perplexity); 334 | if(Hdiff < tol && -Hdiff < tol) { 335 | found = true; 336 | } 337 | else { 338 | if(Hdiff > 0) { 339 | min_beta = beta; 340 | if(max_beta == DBL_MAX || max_beta == -DBL_MAX) 341 | beta *= 2.0; 342 | else 343 | beta = (beta + max_beta) / 2.0; 344 | } 345 | else { 346 | max_beta = beta; 347 | if(min_beta == -DBL_MAX || min_beta == DBL_MAX) 348 | beta /= 2.0; 349 | else 350 | beta = (beta + min_beta) / 2.0; 351 | } 352 | } 353 | 354 | // Update iteration counter 355 | iter++; 356 | } 357 | 358 | // Row normalize P 359 | for(int m = 0; m < N; m++) P[n * N + m] /= sum_P; 360 | } 361 | 362 | // Clean up memory 363 | free(DD); DD = NULL; 364 | } 365 | 366 | 367 | // Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free) 368 | void TSNE::computeGaussianPerplexity(double* X, int N, int D, int** _row_P, int** _col_P, double** _val_P, double perplexity, int K) { 369 | 370 | if(perplexity > K) printf("Perplexity should be lower than K!\n"); 371 | 372 | // Allocate the memory we need 373 | *_row_P = (int*) malloc((N + 1) * sizeof(int)); 374 | *_col_P = (int*) calloc(N * K, sizeof(int)); 375 | *_val_P = (double*) calloc(N * K, sizeof(double)); 376 | if(*_row_P == NULL || *_col_P == NULL || *_val_P == NULL) { printf("Memory allocation failed!\n"); exit(1); } 377 | int* row_P = *_row_P; 378 | int* col_P = *_col_P; 379 | double* val_P = *_val_P; 380 | double* cur_P = (double*) malloc((N - 1) * sizeof(double)); 381 | if(cur_P == NULL) { printf("Memory allocation failed!\n"); exit(1); } 382 | row_P[0] = 0; 383 | for(int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + K; 384 | 385 | // Build ball tree on data set 386 | VpTree* tree = new VpTree(); 387 | vector obj_X(N, DataPoint(D, -1, X)); 388 | for(int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D); 389 | tree->create(obj_X); 390 | 391 | // Loop over all points to find nearest neighbors 392 | printf("Building tree...\n"); 393 | vector indices; 394 | vector distances; 395 | for(int n = 0; n < N; n++) { 396 | 397 | if(n % 10000 == 0) printf(" - point %d of %d\n", n, N); 398 | 399 | // Find nearest neighbors 400 | indices.clear(); 401 | distances.clear(); 402 | tree->search(obj_X[n], K + 1, &indices, &distances); 403 | 404 | // Initialize some variables for binary search 405 | bool found = false; 406 | double beta = 1.0; 407 | double min_beta = -DBL_MAX; 408 | double max_beta = DBL_MAX; 409 | double tol = 1e-5; 410 | 411 | // Iterate until we found a good perplexity 412 | int iter = 0; double sum_P; 413 | while(!found && iter < 200) { 414 | 415 | // Compute Gaussian kernel row 416 | for(int m = 0; m < K; m++) cur_P[m] = exp(-beta * distances[m + 1]); 417 | 418 | // Compute entropy of current row 419 | sum_P = DBL_MIN; 420 | for(int m = 0; m < K; m++) sum_P += cur_P[m]; 421 | double H = .0; 422 | for(int m = 0; m < K; m++) H += beta * (distances[m + 1] * cur_P[m]); 423 | H = (H / sum_P) + log(sum_P); 424 | 425 | // Evaluate whether the entropy is within the tolerance level 426 | double Hdiff = H - log(perplexity); 427 | if(Hdiff < tol && -Hdiff < tol) { 428 | found = true; 429 | } 430 | else { 431 | if(Hdiff > 0) { 432 | min_beta = beta; 433 | if(max_beta == DBL_MAX || max_beta == -DBL_MAX) 434 | beta *= 2.0; 435 | else 436 | beta = (beta + max_beta) / 2.0; 437 | } 438 | else { 439 | max_beta = beta; 440 | if(min_beta == -DBL_MAX || min_beta == DBL_MAX) 441 | beta /= 2.0; 442 | else 443 | beta = (beta + min_beta) / 2.0; 444 | } 445 | } 446 | 447 | // Update iteration counter 448 | iter++; 449 | } 450 | 451 | // Row-normalize current row of P and store in matrix 452 | for(int m = 0; m < K; m++) cur_P[m] /= sum_P; 453 | for(int m = 0; m < K; m++) { 454 | col_P[row_P[n] + m] = indices[m + 1].index(); 455 | val_P[row_P[n] + m] = cur_P[m]; 456 | } 457 | } 458 | 459 | // Clean up memory 460 | obj_X.clear(); 461 | free(cur_P); 462 | delete tree; 463 | } 464 | 465 | 466 | // Compute input similarities with a fixed perplexity (this function allocates memory another function should free) 467 | void TSNE::computeGaussianPerplexity(double* X, int N, int D, int** _row_P, int** _col_P, double** _val_P, double perplexity, double threshold) { 468 | 469 | // Allocate some memory we need for computations 470 | double* buff = (double*) malloc(D * sizeof(double)); 471 | double* DD = (double*) malloc(N * sizeof(double)); 472 | double* cur_P = (double*) malloc(N * sizeof(double)); 473 | if(buff == NULL || DD == NULL || cur_P == NULL) { printf("Memory allocation failed!\n"); exit(1); } 474 | 475 | // Compute the Gaussian kernel row by row (to find number of elements in sparse P) 476 | int total_count = 0; 477 | for(int n = 0; n < N; n++) { 478 | 479 | // Compute the squared Euclidean distance matrix 480 | for(int m = 0; m < N; m++) { 481 | for(int d = 0; d < D; d++) buff[d] = X[n * D + d]; 482 | for(int d = 0; d < D; d++) buff[d] -= X[m * D + d]; 483 | DD[m] = .0; 484 | for(int d = 0; d < D; d++) DD[m] += buff[d] * buff[d]; 485 | } 486 | 487 | // Initialize some variables 488 | bool found = false; 489 | double beta = 1.0; 490 | double min_beta = -DBL_MAX; 491 | double max_beta = DBL_MAX; 492 | double tol = 1e-5; 493 | 494 | // Iterate until we found a good perplexity 495 | int iter = 0; double sum_P; 496 | while(!found && iter < 200) { 497 | 498 | // Compute Gaussian kernel row 499 | for(int m = 0; m < N; m++) cur_P[m] = exp(-beta * DD[m]); 500 | cur_P[n] = DBL_MIN; 501 | 502 | // Compute entropy of current row 503 | sum_P = DBL_MIN; 504 | for(int m = 0; m < N; m++) sum_P += cur_P[m]; 505 | double H = 0.0; 506 | for(int m = 0; m < N; m++) H += beta * (DD[m] * cur_P[m]); 507 | H = (H / sum_P) + log(sum_P); 508 | 509 | // Evaluate whether the entropy is within the tolerance level 510 | double Hdiff = H - log(perplexity); 511 | if(Hdiff < tol && -Hdiff < tol) { 512 | found = true; 513 | } 514 | else { 515 | if(Hdiff > 0) { 516 | min_beta = beta; 517 | if(max_beta == DBL_MAX || max_beta == -DBL_MAX) 518 | beta *= 2.0; 519 | else 520 | beta = (beta + max_beta) / 2.0; 521 | } 522 | else { 523 | max_beta = beta; 524 | if(min_beta == -DBL_MAX || min_beta == DBL_MAX) 525 | beta /= 2.0; 526 | else 527 | beta = (beta + min_beta) / 2.0; 528 | } 529 | } 530 | 531 | // Update iteration counter 532 | iter++; 533 | } 534 | 535 | // Row-normalize and threshold current row of P 536 | for(int m = 0; m < N; m++) cur_P[m] /= sum_P; 537 | for(int m = 0; m < N; m++) { 538 | if(cur_P[m] > threshold / (double) N) total_count++; 539 | } 540 | } 541 | 542 | // Allocate the memory we need 543 | *_row_P = (int*) malloc((N + 1) * sizeof(int)); 544 | *_col_P = (int*) malloc(total_count * sizeof(int)); 545 | *_val_P = (double*) malloc(total_count * sizeof(double)); 546 | int* row_P = *_row_P; 547 | int* col_P = *_col_P; 548 | double* val_P = *_val_P; 549 | if(row_P == NULL || col_P == NULL || val_P == NULL) { printf("Memory allocation failed!\n"); exit(1); } 550 | row_P[0] = 0; 551 | 552 | // Compute the Gaussian kernel row by row (this time, store the results) 553 | int count = 0; 554 | for(int n = 0; n < N; n++) { 555 | 556 | // Compute the squared Euclidean distance matrix 557 | for(int m = 0; m < N; m++) { 558 | for(int d = 0; d < D; d++) buff[d] = X[n * D + d]; 559 | for(int d = 0; d < D; d++) buff[d] -= X[m * D + d]; 560 | DD[m] = .0; 561 | for(int d = 0; d < D; d++) DD[m] += buff[d] * buff[d]; 562 | } 563 | 564 | // Initialize some variables 565 | bool found = false; 566 | double beta = 1.0; 567 | double min_beta = -DBL_MAX; 568 | double max_beta = DBL_MAX; 569 | double tol = 1e-5; 570 | 571 | // Iterate until we found a good perplexity 572 | int iter = 0; double sum_P; 573 | while(!found && iter < 200) { 574 | 575 | // Compute Gaussian kernel row 576 | for(int m = 0; m < N; m++) cur_P[m] = exp(-beta * DD[m]); 577 | cur_P[n] = DBL_MIN; 578 | 579 | // Compute entropy of current row 580 | sum_P = DBL_MIN; 581 | for(int m = 0; m < N; m++) sum_P += cur_P[m]; 582 | double H = 0.0; 583 | for(int m = 0; m < N; m++) H += beta * (DD[m] * cur_P[m]); 584 | H = (H / sum_P) + log(sum_P); 585 | 586 | // Evaluate whether the entropy is within the tolerance level 587 | double Hdiff = H - log(perplexity); 588 | if(Hdiff < tol && -Hdiff < tol) { 589 | found = true; 590 | } 591 | else { 592 | if(Hdiff > 0) { 593 | min_beta = beta; 594 | if(max_beta == DBL_MAX || max_beta == -DBL_MAX) 595 | beta *= 2.0; 596 | else 597 | beta = (beta + max_beta) / 2.0; 598 | } 599 | else { 600 | max_beta = beta; 601 | if(min_beta == -DBL_MAX || min_beta == DBL_MAX) 602 | beta /= 2.0; 603 | else 604 | beta = (beta + min_beta) / 2.0; 605 | } 606 | } 607 | 608 | // Update iteration counter 609 | iter++; 610 | } 611 | 612 | // Row-normalize and threshold current row of P 613 | for(int m = 0; m < N; m++) cur_P[m] /= sum_P; 614 | for(int m = 0; m < N; m++) { 615 | if(cur_P[m] > threshold / (double) N) { 616 | col_P[count] = m; 617 | val_P[count] = cur_P[m]; 618 | count++; 619 | } 620 | } 621 | row_P[n + 1] = count; 622 | } 623 | 624 | // Clean up memory 625 | free(DD); DD = NULL; 626 | free(buff); buff = NULL; 627 | free(cur_P); cur_P = NULL; 628 | } 629 | 630 | 631 | void TSNE::symmetrizeMatrix(int** _row_P, int** _col_P, double** _val_P, int N) { 632 | 633 | // Get sparse matrix 634 | int* row_P = *_row_P; 635 | int* col_P = *_col_P; 636 | double* val_P = *_val_P; 637 | 638 | // Count number of elements and row counts of symmetric matrix 639 | int* row_counts = (int*) calloc(N, sizeof(int)); 640 | if(row_counts == NULL) { printf("Memory allocation failed!\n"); exit(1); } 641 | for(int n = 0; n < N; n++) { 642 | for(int i = row_P[n]; i < row_P[n + 1]; i++) { 643 | 644 | // Check whether element (col_P[i], n) is present 645 | bool present = false; 646 | for(int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { 647 | if(col_P[m] == n) present = true; 648 | } 649 | if(present) row_counts[n]++; 650 | else { 651 | row_counts[n]++; 652 | row_counts[col_P[i]]++; 653 | } 654 | } 655 | } 656 | int no_elem = 0; 657 | for(int n = 0; n < N; n++) no_elem += row_counts[n]; 658 | 659 | // Allocate memory for symmetrized matrix 660 | int* sym_row_P = (int*) malloc((N + 1) * sizeof(int)); 661 | int* sym_col_P = (int*) malloc(no_elem * sizeof(int)); 662 | double* sym_val_P = (double*) malloc(no_elem * sizeof(double)); 663 | if(sym_row_P == NULL || sym_col_P == NULL || sym_val_P == NULL) { printf("Memory allocation failed!\n"); exit(1); } 664 | 665 | // Construct new row indices for symmetric matrix 666 | sym_row_P[0] = 0; 667 | for(int n = 0; n < N; n++) sym_row_P[n + 1] = sym_row_P[n] + row_counts[n]; 668 | 669 | // Fill the result matrix 670 | int* offset = (int*) calloc(N, sizeof(int)); 671 | if(offset == NULL) { printf("Memory allocation failed!\n"); exit(1); } 672 | for(int n = 0; n < N; n++) { 673 | for(int i = row_P[n]; i < row_P[n + 1]; i++) { // considering element(n, col_P[i]) 674 | 675 | // Check whether element (col_P[i], n) is present 676 | bool present = false; 677 | for(int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { 678 | if(col_P[m] == n) { 679 | present = true; 680 | if(n <= col_P[i]) { // make sure we do not add elements twice 681 | sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; 682 | sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; 683 | sym_val_P[sym_row_P[n] + offset[n]] = val_P[i] + val_P[m]; 684 | sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i] + val_P[m]; 685 | } 686 | } 687 | } 688 | 689 | // If (col_P[i], n) is not present, there is no addition involved 690 | if(!present) { 691 | sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; 692 | sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; 693 | sym_val_P[sym_row_P[n] + offset[n]] = val_P[i]; 694 | sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i]; 695 | } 696 | 697 | // Update offsets 698 | if(!present || (present && n <= col_P[i])) { 699 | offset[n]++; 700 | if(col_P[i] != n) offset[col_P[i]]++; 701 | } 702 | } 703 | } 704 | 705 | // Divide the result by two 706 | for(int i = 0; i < no_elem; i++) sym_val_P[i] /= 2.0; 707 | 708 | // Return symmetrized matrices 709 | free(*_row_P); *_row_P = sym_row_P; 710 | free(*_col_P); *_col_P = sym_col_P; 711 | free(*_val_P); *_val_P = sym_val_P; 712 | 713 | // Free up some memery 714 | free(offset); offset = NULL; 715 | free(row_counts); row_counts = NULL; 716 | } 717 | 718 | // Compute squared Euclidean distance matrix (using BLAS) 719 | void TSNE::computeSquaredEuclideanDistance(double* X, int N, int D, double* DD) { 720 | double* dataSums = (double*) calloc(N, sizeof(double)); 721 | if(dataSums == NULL) { printf("Memory allocation failed!\n"); exit(1); } 722 | for(int n = 0; n < N; n++) { 723 | for(int d = 0; d < D; d++) { 724 | dataSums[n] += (X[n * D + d] * X[n * D + d]); 725 | } 726 | } 727 | for(int n = 0; n < N; n++) { 728 | for(int m = 0; m < N; m++) { 729 | DD[n * N + m] = dataSums[n] + dataSums[m]; 730 | } 731 | } 732 | cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, N, D, -2.0, X, D, X, D, 1.0, DD, N); 733 | free(dataSums); dataSums = NULL; 734 | } 735 | 736 | 737 | // Makes data zero-mean 738 | void TSNE::zeroMean(double* X, int N, int D) { 739 | 740 | // Compute data mean 741 | double* mean = (double*) calloc(D, sizeof(double)); 742 | if(mean == NULL) { printf("Memory allocation failed!\n"); exit(1); } 743 | for(int n = 0; n < N; n++) { 744 | for(int d = 0; d < D; d++) { 745 | mean[d] += X[n * D + d]; 746 | } 747 | } 748 | for(int d = 0; d < D; d++) { 749 | mean[d] /= (double) N; 750 | } 751 | 752 | // Subtract data mean 753 | for(int n = 0; n < N; n++) { 754 | for(int d = 0; d < D; d++) { 755 | X[n * D + d] -= mean[d]; 756 | } 757 | } 758 | free(mean); mean = NULL; 759 | } 760 | 761 | 762 | // Generates a Gaussian random number 763 | double TSNE::randn() { 764 | double x, y, radius; 765 | do { 766 | x = 2 * (rand() / ((double) RAND_MAX + 1)) - 1; 767 | y = 2 * (rand() / ((double) RAND_MAX + 1)) - 1; 768 | radius = (x * x) + (y * y); 769 | } while((radius >= 1.0) || (radius == 0.0)); 770 | radius = sqrt(-2 * log(radius) / radius); 771 | x *= radius; 772 | y *= radius; 773 | return x; 774 | } 775 | 776 | // Function that loads data from a t-SNE file 777 | // Note: this function does a malloc that should be freed elsewhere 778 | bool TSNE::load_data(double** data, int* n, int* d, double* theta, double* perplexity) { 779 | 780 | // Open file, read first 2 integers, allocate memory, and read the data 781 | FILE *h; 782 | if((h = fopen("data.dat", "r+b")) == NULL) { 783 | printf("Error: could not open data file.\n"); 784 | return false; 785 | } 786 | fread(n, sizeof(int), 1, h); // number of datapoints 787 | fread(d, sizeof(int), 1, h); // original dimensionality 788 | fread(theta, sizeof(double), 1, h); // gradient accuracy 789 | fread(perplexity, sizeof(double), 1, h); // perplexity 790 | *data = (double*) calloc(*d * *n, sizeof(double)); 791 | if(*data == NULL) { printf("Memory allocation failed!\n"); exit(1); } 792 | fread(*data, sizeof(double), *n * *d, h); // the data 793 | fclose(h); 794 | printf("Read the %i x %i data matrix successfully!\n", *n, *d); 795 | return true; 796 | } 797 | 798 | // Function that saves map to a t-SNE file 799 | void TSNE::save_data(double* data, int* landmarks, double* costs, int n, int d) { 800 | 801 | // Open file, write first 2 integers and then the data 802 | FILE *h; 803 | if((h = fopen("result.dat", "w+b")) == NULL) { 804 | printf("Error: could not open data file.\n"); 805 | return; 806 | } 807 | fwrite(&n, sizeof(int), 1, h); 808 | fwrite(&d, sizeof(int), 1, h); 809 | fwrite(data, sizeof(double), n * d, h); 810 | fwrite(landmarks, sizeof(int), n, h); 811 | fwrite(costs, sizeof(double), n, h); 812 | fclose(h); 813 | printf("Wrote the %i x %i data matrix successfully!\n", n, d); 814 | } 815 | 816 | 817 | // Function that runs the Barnes-Hut implementation of t-SNE 818 | //int main() { 819 | // 820 | // // Define some variables 821 | // int origN, N, D, no_dims = 2, *landmarks; 822 | // double perc_landmarks; 823 | // double perplexity, theta, *data; 824 | // TSNE* tsne = new TSNE(); 825 | // 826 | // // Read the parameters and the dataset 827 | // if(tsne->load_data(&data, &origN, &D, &theta, &perplexity)) { 828 | // 829 | // // Make dummy landmarks 830 | // N = origN; 831 | // int* landmarks = (int*) malloc(N * sizeof(int)); 832 | // if(landmarks == NULL) { printf("Memory allocation failed!\n"); exit(1); } 833 | // for(int n = 0; n < N; n++) landmarks[n] = n; 834 | // 835 | // // Now fire up the SNE implementation 836 | // double* Y = (double*) malloc(N * no_dims * sizeof(double)); 837 | // double* costs = (double*) calloc(N, sizeof(double)); 838 | // if(Y == NULL || costs == NULL) { printf("Memory allocation failed!\n"); exit(1); } 839 | // tsne->run(data, N, D, Y, no_dims, perplexity, theta); 840 | // 841 | // // Save the results 842 | // tsne->save_data(Y, landmarks, costs, N, no_dims); 843 | // 844 | // // Clean up the memory 845 | // free(data); data = NULL; 846 | // free(Y); Y = NULL; 847 | // free(costs); costs = NULL; 848 | // free(landmarks); landmarks = NULL; 849 | // } 850 | // delete(tsne); 851 | //} 852 | -------------------------------------------------------------------------------- /tsne/bh_sne_src/tsne.h: -------------------------------------------------------------------------------- 1 | /* 2 | * tsne.h 3 | * Header file for t-SNE. 4 | * 5 | * Created by Laurens van der Maaten. 6 | * Copyright 2012, Delft University of Technology. All rights reserved. 7 | * 8 | */ 9 | 10 | 11 | #ifndef TSNE_H 12 | #define TSNE_H 13 | 14 | 15 | static inline double sign(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); } 16 | 17 | 18 | class TSNE 19 | { 20 | public: 21 | void run(double* X, int N, int D, double* Y, int no_dims, double perplexity, double theta, unsigned int seed); 22 | bool load_data(double** data, int* n, int* d, double* theta, double* perplexity); 23 | void save_data(double* data, int* landmarks, double* costs, int n, int d); 24 | 25 | void symmetrizeMatrix(int** row_P, int** col_P, double** val_P, int N); // should be static?! 26 | 27 | 28 | private: 29 | void computeGradient(double* P, int* inp_row_P, int* inp_col_P, double* inp_val_P, double* Y, int N, int D, double* dC, double theta); 30 | void computeExactGradient(double* P, double* Y, int N, int D, double* dC); 31 | double evaluateError(double* P, double* Y, int N); 32 | double evaluateError(int* row_P, int* col_P, double* val_P, double* Y, int N, double theta); 33 | void zeroMean(double* X, int N, int D); 34 | void computeGaussianPerplexity(double* X, int N, int D, double* P, double perplexity); 35 | void computeGaussianPerplexity(double* X, int N, int D, int** _row_P, int** _col_P, double** _val_P, double perplexity, int K); 36 | void computeGaussianPerplexity(double* X, int N, int D, int** _row_P, int** _col_P, double** _val_P, double perplexity, double threshold); 37 | void computeSquaredEuclideanDistance(double* X, int N, int D, double* DD); 38 | double randn(); 39 | }; 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /tsne/bh_sne_src/vptree.h: -------------------------------------------------------------------------------- 1 | /* 2 | * vptree.h 3 | * Implementation of a vantage-point tree. 4 | * 5 | * Created by Laurens van der Maaten. 6 | * Copyright 2012, Delft University of Technology. All rights reserved. 7 | * 8 | */ 9 | 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | #ifndef VPTREE_H 20 | #define VPTREE_H 21 | 22 | class DataPoint 23 | { 24 | int _D; 25 | int _ind; 26 | double* _x; 27 | 28 | public: 29 | DataPoint() { 30 | _D = 1; 31 | _ind = -1; 32 | _x = NULL; 33 | } 34 | DataPoint(int D, int ind, double* x) { 35 | _D = D; 36 | _ind = ind; 37 | _x = (double*) malloc(_D * sizeof(double)); 38 | for(int d = 0; d < _D; d++) _x[d] = x[d]; 39 | } 40 | DataPoint(const DataPoint& other) { // this makes a deep copy -- should not free anything 41 | if(this != &other) { 42 | _D = other.dimensionality(); 43 | _ind = other.index(); 44 | _x = (double*) malloc(_D * sizeof(double)); 45 | for(int d = 0; d < _D; d++) _x[d] = other.x(d); 46 | } 47 | } 48 | ~DataPoint() { if(_x != NULL) free(_x); } 49 | DataPoint& operator= (const DataPoint& other) { // asignment should free old object 50 | if(this != &other) { 51 | if(_x != NULL) free(_x); 52 | _D = other.dimensionality(); 53 | _ind = other.index(); 54 | _x = (double*) malloc(_D * sizeof(double)); 55 | for(int d = 0; d < _D; d++) _x[d] = other.x(d); 56 | } 57 | return *this; 58 | } 59 | int index() const { return _ind; } 60 | int dimensionality() const { return _D; } 61 | double x(int d) const { return _x[d]; } 62 | }; 63 | 64 | 65 | double euclidean_distance(const DataPoint &t1, const DataPoint &t2) { 66 | double dd = .0; 67 | for(int d = 0; d < t1.dimensionality(); d++) dd += (t1.x(d) - t2.x(d)) * (t1.x(d) - t2.x(d)); 68 | return dd; 69 | } 70 | 71 | 72 | template 73 | class VpTree 74 | { 75 | public: 76 | 77 | // Default constructor 78 | VpTree() : _root(0) {} 79 | 80 | // Destructor 81 | ~VpTree() { 82 | delete _root; 83 | } 84 | 85 | // Function to create a new VpTree from data 86 | void create(const std::vector& items) { 87 | delete _root; 88 | _items = items; 89 | _root = buildFromPoints(0, items.size()); 90 | } 91 | 92 | // Function that uses the tree to find the k nearest neighbors of target 93 | void search(const T& target, int k, std::vector* results, std::vector* distances) 94 | { 95 | 96 | // Use a priority queue to store intermediate results on 97 | std::priority_queue heap; 98 | 99 | // Variable that tracks the distance to the farthest point in our results 100 | _tau = DBL_MAX; 101 | 102 | // Perform the searcg 103 | search(_root, target, k, heap); 104 | 105 | // Gather final results 106 | results->clear(); distances->clear(); 107 | while(!heap.empty()) { 108 | results->push_back(_items[heap.top().index]); 109 | distances->push_back(heap.top().dist); 110 | heap.pop(); 111 | } 112 | 113 | // Results are in reverse order 114 | std::reverse(results->begin(), results->end()); 115 | std::reverse(distances->begin(), distances->end()); 116 | } 117 | 118 | private: 119 | std::vector _items; 120 | double _tau; 121 | 122 | // Single node of a VP tree (has a point and radius; left children are closer to point than the radius) 123 | struct Node 124 | { 125 | int index; // index of point in node 126 | double threshold; // radius(?) 127 | Node* left; // points closer by than threshold 128 | Node* right; // points farther away than threshold 129 | 130 | Node() : 131 | index(0), threshold(0.), left(0), right(0) {} 132 | 133 | ~Node() { // destructor 134 | delete left; 135 | delete right; 136 | } 137 | }* _root; 138 | 139 | 140 | // An item on the intermediate result queue 141 | struct HeapItem { 142 | HeapItem( int index, double dist) : 143 | index(index), dist(dist) {} 144 | int index; 145 | double dist; 146 | bool operator<(const HeapItem& o) const { 147 | return dist < o.dist; 148 | } 149 | }; 150 | 151 | // Distance comparator for use in std::nth_element 152 | struct DistanceComparator 153 | { 154 | const T& item; 155 | DistanceComparator(const T& item) : item(item) {} 156 | bool operator()(const T& a, const T& b) { 157 | return distance(item, a) < distance(item, b); 158 | } 159 | }; 160 | 161 | // Function that (recursively) fills the tree 162 | Node* buildFromPoints( int lower, int upper ) 163 | { 164 | if (upper == lower) { // indicates that we're done here! 165 | return NULL; 166 | } 167 | 168 | // Lower index is center of current node 169 | Node* node = new Node(); 170 | node->index = lower; 171 | 172 | if (upper - lower > 1) { // if we did not arrive at leaf yet 173 | 174 | // Choose an arbitrary point and move it to the start 175 | int i = (int) ((double)rand() / RAND_MAX * (upper - lower - 1)) + lower; 176 | std::swap(_items[lower], _items[i]); 177 | 178 | // Partition around the median distance 179 | int median = (upper + lower) / 2; 180 | std::nth_element(_items.begin() + lower + 1, 181 | _items.begin() + median, 182 | _items.begin() + upper, 183 | DistanceComparator(_items[lower])); 184 | 185 | // Threshold of the new node will be the distance to the median 186 | node->threshold = distance(_items[lower], _items[median]); 187 | 188 | // Recursively build tree 189 | node->index = lower; 190 | node->left = buildFromPoints(lower + 1, median); 191 | node->right = buildFromPoints(median, upper); 192 | } 193 | 194 | // Return result 195 | return node; 196 | } 197 | 198 | // Helper function that searches the tree 199 | void search(Node* node, const T& target, int k, std::priority_queue& heap) 200 | { 201 | if(node == NULL) return; // indicates that we're done here 202 | 203 | // Compute distance between target and current node 204 | double dist = distance(_items[node->index], target); 205 | 206 | // If current node within radius tau 207 | if(dist < _tau) { 208 | if(heap.size() == k) heap.pop(); // remove furthest node from result list (if we already have k results) 209 | heap.push(HeapItem(node->index, dist)); // add current node to result list 210 | if(heap.size() == k) _tau = heap.top().dist; // update value of tau (farthest point in result list) 211 | } 212 | 213 | // Return if we arrived at a leaf 214 | if(node->left == NULL && node->right == NULL) { 215 | return; 216 | } 217 | 218 | // If the target lies within the radius of ball 219 | if(dist < node->threshold) { 220 | if(dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child first 221 | search(node->left, target, k, heap); 222 | } 223 | 224 | if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child 225 | search(node->right, target, k, heap); 226 | } 227 | 228 | // If the target lies outsize the radius of the ball 229 | } else { 230 | if(dist + _tau >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child first 231 | search(node->right, target, k, heap); 232 | } 233 | 234 | if (dist - _tau <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child 235 | search(node->left, target, k, heap); 236 | } 237 | } 238 | } 239 | }; 240 | 241 | #endif 242 | -------------------------------------------------------------------------------- /tsne/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexisbcook/tsne/3aeafd511bd31747b0e1aef1dffd077ee2c258d4/tsne/tests/__init__.py -------------------------------------------------------------------------------- /tsne/tests/test_iris.py: -------------------------------------------------------------------------------- 1 | 2 | def test_iris(): 3 | from tsne import bh_sne 4 | from sklearn.datasets import load_iris 5 | 6 | iris = load_iris() 7 | 8 | X = iris.data 9 | y = iris.target 10 | 11 | X_2d = bh_sne(X) 12 | -------------------------------------------------------------------------------- /tsne/tests/test_seed.py: -------------------------------------------------------------------------------- 1 | 2 | def test_seed(): 3 | from tsne import bh_sne 4 | from sklearn.datasets import load_iris 5 | import numpy as np 6 | 7 | iris = load_iris() 8 | 9 | X = iris.data 10 | y = iris.target 11 | 12 | t1 = bh_sne(X, random_state=np.random.RandomState(0), copy_data=True) 13 | t2 = bh_sne(X, random_state=np.random.RandomState(0), copy_data=True) 14 | assert np.all(t1 == t2) 15 | -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- 1 | 2 | # Version: 0.15 3 | 4 | """ 5 | The Versioneer 6 | ============== 7 | 8 | * like a rocketeer, but for versions! 9 | * https://github.com/warner/python-versioneer 10 | * Brian Warner 11 | * License: Public Domain 12 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy 13 | * [![Latest Version] 14 | (https://pypip.in/version/versioneer/badge.svg?style=flat) 15 | ](https://pypi.python.org/pypi/versioneer/) 16 | * [![Build Status] 17 | (https://travis-ci.org/warner/python-versioneer.png?branch=master) 18 | ](https://travis-ci.org/warner/python-versioneer) 19 | 20 | This is a tool for managing a recorded version number in distutils-based 21 | python projects. The goal is to remove the tedious and error-prone "update 22 | the embedded version string" step from your release process. Making a new 23 | release should be as easy as recording a new tag in your version-control 24 | system, and maybe making new tarballs. 25 | 26 | 27 | ## Quick Install 28 | 29 | * `pip install versioneer` to somewhere to your $PATH 30 | * add a `[versioneer]` section to your setup.cfg (see below) 31 | * run `versioneer install` in your source tree, commit the results 32 | 33 | ## Version Identifiers 34 | 35 | Source trees come from a variety of places: 36 | 37 | * a version-control system checkout (mostly used by developers) 38 | * a nightly tarball, produced by build automation 39 | * a snapshot tarball, produced by a web-based VCS browser, like github's 40 | "tarball from tag" feature 41 | * a release tarball, produced by "setup.py sdist", distributed through PyPI 42 | 43 | Within each source tree, the version identifier (either a string or a number, 44 | this tool is format-agnostic) can come from a variety of places: 45 | 46 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows 47 | about recent "tags" and an absolute revision-id 48 | * the name of the directory into which the tarball was unpacked 49 | * an expanded VCS keyword ($Id$, etc) 50 | * a `_version.py` created by some earlier build step 51 | 52 | For released software, the version identifier is closely related to a VCS 53 | tag. Some projects use tag names that include more than just the version 54 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool 55 | needs to strip the tag prefix to extract the version identifier. For 56 | unreleased software (between tags), the version identifier should provide 57 | enough information to help developers recreate the same tree, while also 58 | giving them an idea of roughly how old the tree is (after version 1.2, before 59 | version 1.3). Many VCS systems can report a description that captures this, 60 | for example `git describe --tags --dirty --always` reports things like 61 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 62 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has 63 | uncommitted changes. 64 | 65 | The version identifier is used for multiple purposes: 66 | 67 | * to allow the module to self-identify its version: `myproject.__version__` 68 | * to choose a name and prefix for a 'setup.py sdist' tarball 69 | 70 | ## Theory of Operation 71 | 72 | Versioneer works by adding a special `_version.py` file into your source 73 | tree, where your `__init__.py` can import it. This `_version.py` knows how to 74 | dynamically ask the VCS tool for version information at import time. 75 | 76 | `_version.py` also contains `$Revision$` markers, and the installation 77 | process marks `_version.py` to have this marker rewritten with a tag name 78 | during the `git archive` command. As a result, generated tarballs will 79 | contain enough information to get the proper version. 80 | 81 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to 82 | the top level of your source tree, next to `setup.py` and the `setup.cfg` 83 | that configures it. This overrides several distutils/setuptools commands to 84 | compute the version when invoked, and changes `setup.py build` and `setup.py 85 | sdist` to replace `_version.py` with a small static file that contains just 86 | the generated version data. 87 | 88 | ## Installation 89 | 90 | First, decide on values for the following configuration variables: 91 | 92 | * `VCS`: the version control system you use. Currently accepts "git". 93 | 94 | * `style`: the style of version string to be produced. See "Styles" below for 95 | details. Defaults to "pep440", which looks like 96 | `TAG[+DISTANCE.gSHORTHASH[.dirty]]`. 97 | 98 | * `versionfile_source`: 99 | 100 | A project-relative pathname into which the generated version strings should 101 | be written. This is usually a `_version.py` next to your project's main 102 | `__init__.py` file, so it can be imported at runtime. If your project uses 103 | `src/myproject/__init__.py`, this should be `src/myproject/_version.py`. 104 | This file should be checked in to your VCS as usual: the copy created below 105 | by `setup.py setup_versioneer` will include code that parses expanded VCS 106 | keywords in generated tarballs. The 'build' and 'sdist' commands will 107 | replace it with a copy that has just the calculated version string. 108 | 109 | This must be set even if your project does not have any modules (and will 110 | therefore never import `_version.py`), since "setup.py sdist" -based trees 111 | still need somewhere to record the pre-calculated version strings. Anywhere 112 | in the source tree should do. If there is a `__init__.py` next to your 113 | `_version.py`, the `setup.py setup_versioneer` command (described below) 114 | will append some `__version__`-setting assignments, if they aren't already 115 | present. 116 | 117 | * `versionfile_build`: 118 | 119 | Like `versionfile_source`, but relative to the build directory instead of 120 | the source directory. These will differ when your setup.py uses 121 | 'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`, 122 | then you will probably have `versionfile_build='myproject/_version.py'` and 123 | `versionfile_source='src/myproject/_version.py'`. 124 | 125 | If this is set to None, then `setup.py build` will not attempt to rewrite 126 | any `_version.py` in the built tree. If your project does not have any 127 | libraries (e.g. if it only builds a script), then you should use 128 | `versionfile_build = None` and override `distutils.command.build_scripts` 129 | to explicitly insert a copy of `versioneer.get_version()` into your 130 | generated script. 131 | 132 | * `tag_prefix`: 133 | 134 | a string, like 'PROJECTNAME-', which appears at the start of all VCS tags. 135 | If your tags look like 'myproject-1.2.0', then you should use 136 | tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this 137 | should be an empty string. 138 | 139 | * `parentdir_prefix`: 140 | 141 | a optional string, frequently the same as tag_prefix, which appears at the 142 | start of all unpacked tarball filenames. If your tarball unpacks into 143 | 'myproject-1.2.0', this should be 'myproject-'. To disable this feature, 144 | just omit the field from your `setup.cfg`. 145 | 146 | This tool provides one script, named `versioneer`. That script has one mode, 147 | "install", which writes a copy of `versioneer.py` into the current directory 148 | and runs `versioneer.py setup` to finish the installation. 149 | 150 | To versioneer-enable your project: 151 | 152 | * 1: Modify your `setup.cfg`, adding a section named `[versioneer]` and 153 | populating it with the configuration values you decided earlier (note that 154 | the option names are not case-sensitive): 155 | 156 | ```` 157 | [versioneer] 158 | VCS = git 159 | style = pep440 160 | versionfile_source = src/myproject/_version.py 161 | versionfile_build = myproject/_version.py 162 | tag_prefix = "" 163 | parentdir_prefix = myproject- 164 | ```` 165 | 166 | * 2: Run `versioneer install`. This will do the following: 167 | 168 | * copy `versioneer.py` into the top of your source tree 169 | * create `_version.py` in the right place (`versionfile_source`) 170 | * modify your `__init__.py` (if one exists next to `_version.py`) to define 171 | `__version__` (by calling a function from `_version.py`) 172 | * modify your `MANIFEST.in` to include both `versioneer.py` and the 173 | generated `_version.py` in sdist tarballs 174 | 175 | `versioneer install` will complain about any problems it finds with your 176 | `setup.py` or `setup.cfg`. Run it multiple times until you have fixed all 177 | the problems. 178 | 179 | * 3: add a `import versioneer` to your setup.py, and add the following 180 | arguments to the setup() call: 181 | 182 | version=versioneer.get_version(), 183 | cmdclass=versioneer.get_cmdclass(), 184 | 185 | * 4: commit these changes to your VCS. To make sure you won't forget, 186 | `versioneer install` will mark everything it touched for addition using 187 | `git add`. Don't forget to add `setup.py` and `setup.cfg` too. 188 | 189 | ## Post-Installation Usage 190 | 191 | Once established, all uses of your tree from a VCS checkout should get the 192 | current version string. All generated tarballs should include an embedded 193 | version string (so users who unpack them will not need a VCS tool installed). 194 | 195 | If you distribute your project through PyPI, then the release process should 196 | boil down to two steps: 197 | 198 | * 1: git tag 1.0 199 | * 2: python setup.py register sdist upload 200 | 201 | If you distribute it through github (i.e. users use github to generate 202 | tarballs with `git archive`), the process is: 203 | 204 | * 1: git tag 1.0 205 | * 2: git push; git push --tags 206 | 207 | Versioneer will report "0+untagged.NUMCOMMITS.gHASH" until your tree has at 208 | least one tag in its history. 209 | 210 | ## Version-String Flavors 211 | 212 | Code which uses Versioneer can learn about its version string at runtime by 213 | importing `_version` from your main `__init__.py` file and running the 214 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can 215 | import the top-level `versioneer.py` and run `get_versions()`. 216 | 217 | Both functions return a dictionary with different flavors of version 218 | information: 219 | 220 | * `['version']`: A condensed version string, rendered using the selected 221 | style. This is the most commonly used value for the project's version 222 | string. The default "pep440" style yields strings like `0.11`, 223 | `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section 224 | below for alternative styles. 225 | 226 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the 227 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". 228 | 229 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that 230 | this is only accurate if run in a VCS checkout, otherwise it is likely to 231 | be False or None 232 | 233 | * `['error']`: if the version string could not be computed, this will be set 234 | to a string describing the problem, otherwise it will be None. It may be 235 | useful to throw an exception in setup.py if this is set, to avoid e.g. 236 | creating tarballs with a version string of "unknown". 237 | 238 | Some variants are more useful than others. Including `full-revisionid` in a 239 | bug report should allow developers to reconstruct the exact code being tested 240 | (or indicate the presence of local changes that should be shared with the 241 | developers). `version` is suitable for display in an "about" box or a CLI 242 | `--version` output: it can be easily compared against release notes and lists 243 | of bugs fixed in various releases. 244 | 245 | The installer adds the following text to your `__init__.py` to place a basic 246 | version in `YOURPROJECT.__version__`: 247 | 248 | from ._version import get_versions 249 | __version__ = get_versions()['version'] 250 | del get_versions 251 | 252 | ## Styles 253 | 254 | The setup.cfg `style=` configuration controls how the VCS information is 255 | rendered into a version string. 256 | 257 | The default style, "pep440", produces a PEP440-compliant string, equal to the 258 | un-prefixed tag name for actual releases, and containing an additional "local 259 | version" section with more detail for in-between builds. For Git, this is 260 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags 261 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the 262 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and 263 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released 264 | software (exactly equal to a known tag), the identifier will only contain the 265 | stripped tag, e.g. "0.11". 266 | 267 | Other styles are available. See details.md in the Versioneer source tree for 268 | descriptions. 269 | 270 | ## Debugging 271 | 272 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend 273 | to return a version of "0+unknown". To investigate the problem, run `setup.py 274 | version`, which will run the version-lookup code in a verbose mode, and will 275 | display the full contents of `get_versions()` (including the `error` string, 276 | which may help identify what went wrong). 277 | 278 | ## Updating Versioneer 279 | 280 | To upgrade your project to a new release of Versioneer, do the following: 281 | 282 | * install the new Versioneer (`pip install -U versioneer` or equivalent) 283 | * edit `setup.cfg`, if necessary, to include any new configuration settings 284 | indicated by the release notes 285 | * re-run `versioneer install` in your source tree, to replace 286 | `SRC/_version.py` 287 | * commit any changed files 288 | 289 | ### Upgrading to 0.15 290 | 291 | Starting with this version, Versioneer is configured with a `[versioneer]` 292 | section in your `setup.cfg` file. Earlier versions required the `setup.py` to 293 | set attributes on the `versioneer` module immediately after import. The new 294 | version will refuse to run (raising an exception during import) until you 295 | have provided the necessary `setup.cfg` section. 296 | 297 | In addition, the Versioneer package provides an executable named 298 | `versioneer`, and the installation process is driven by running `versioneer 299 | install`. In 0.14 and earlier, the executable was named 300 | `versioneer-installer` and was run without an argument. 301 | 302 | ### Upgrading to 0.14 303 | 304 | 0.14 changes the format of the version string. 0.13 and earlier used 305 | hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a 306 | plus-separated "local version" section strings, with dot-separated 307 | components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old 308 | format, but should be ok with the new one. 309 | 310 | ### Upgrading from 0.11 to 0.12 311 | 312 | Nothing special. 313 | 314 | ### Upgrading from 0.10 to 0.11 315 | 316 | You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running 317 | `setup.py setup_versioneer`. This will enable the use of additional 318 | version-control systems (SVN, etc) in the future. 319 | 320 | ## Future Directions 321 | 322 | This tool is designed to make it easily extended to other version-control 323 | systems: all VCS-specific components are in separate directories like 324 | src/git/ . The top-level `versioneer.py` script is assembled from these 325 | components by running make-versioneer.py . In the future, make-versioneer.py 326 | will take a VCS name as an argument, and will construct a version of 327 | `versioneer.py` that is specific to the given VCS. It might also take the 328 | configuration arguments that are currently provided manually during 329 | installation by editing setup.py . Alternatively, it might go the other 330 | direction and include code from all supported VCS systems, reducing the 331 | number of intermediate scripts. 332 | 333 | 334 | ## License 335 | 336 | To make Versioneer easier to embed, all its code is hereby released into the 337 | public domain. The `_version.py` that it creates is also in the public 338 | domain. 339 | 340 | """ 341 | 342 | from __future__ import print_function 343 | try: 344 | import configparser 345 | except ImportError: 346 | import ConfigParser as configparser 347 | import errno 348 | import json 349 | import os 350 | import re 351 | import subprocess 352 | import sys 353 | 354 | 355 | class VersioneerConfig: 356 | pass 357 | 358 | 359 | def get_root(): 360 | # we require that all commands are run from the project root, i.e. the 361 | # directory that contains setup.py, setup.cfg, and versioneer.py . 362 | root = os.path.realpath(os.path.abspath(os.getcwd())) 363 | setup_py = os.path.join(root, "setup.py") 364 | versioneer_py = os.path.join(root, "versioneer.py") 365 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 366 | # allow 'python path/to/setup.py COMMAND' 367 | root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) 368 | setup_py = os.path.join(root, "setup.py") 369 | versioneer_py = os.path.join(root, "versioneer.py") 370 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 371 | err = ("Versioneer was unable to run the project root directory. " 372 | "Versioneer requires setup.py to be executed from " 373 | "its immediate directory (like 'python setup.py COMMAND'), " 374 | "or in a way that lets it use sys.argv[0] to find the root " 375 | "(like 'python path/to/setup.py COMMAND').") 376 | raise VersioneerBadRootError(err) 377 | try: 378 | # Certain runtime workflows (setup.py install/develop in a setuptools 379 | # tree) execute all dependencies in a single python process, so 380 | # "versioneer" may be imported multiple times, and python's shared 381 | # module-import table will cache the first one. So we can't use 382 | # os.path.dirname(__file__), as that will find whichever 383 | # versioneer.py was first imported, even in later projects. 384 | me = os.path.realpath(os.path.abspath(__file__)) 385 | if os.path.splitext(me)[0] != os.path.splitext(versioneer_py)[0]: 386 | print("Warning: build in %s is using versioneer.py from %s" 387 | % (os.path.dirname(me), versioneer_py)) 388 | except NameError: 389 | pass 390 | return root 391 | 392 | 393 | def get_config_from_root(root): 394 | # This might raise EnvironmentError (if setup.cfg is missing), or 395 | # configparser.NoSectionError (if it lacks a [versioneer] section), or 396 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at 397 | # the top of versioneer.py for instructions on writing your setup.cfg . 398 | setup_cfg = os.path.join(root, "setup.cfg") 399 | parser = configparser.SafeConfigParser() 400 | with open(setup_cfg, "r") as f: 401 | parser.readfp(f) 402 | VCS = parser.get("versioneer", "VCS") # mandatory 403 | 404 | def get(parser, name): 405 | if parser.has_option("versioneer", name): 406 | return parser.get("versioneer", name) 407 | return None 408 | cfg = VersioneerConfig() 409 | cfg.VCS = VCS 410 | cfg.style = get(parser, "style") or "" 411 | cfg.versionfile_source = get(parser, "versionfile_source") 412 | cfg.versionfile_build = get(parser, "versionfile_build") 413 | cfg.tag_prefix = get(parser, "tag_prefix") 414 | cfg.parentdir_prefix = get(parser, "parentdir_prefix") 415 | cfg.verbose = get(parser, "verbose") 416 | return cfg 417 | 418 | 419 | class NotThisMethod(Exception): 420 | pass 421 | 422 | # these dictionaries contain VCS-specific tools 423 | LONG_VERSION_PY = {} 424 | HANDLERS = {} 425 | 426 | 427 | def register_vcs_handler(vcs, method): # decorator 428 | def decorate(f): 429 | if vcs not in HANDLERS: 430 | HANDLERS[vcs] = {} 431 | HANDLERS[vcs][method] = f 432 | return f 433 | return decorate 434 | 435 | 436 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 437 | assert isinstance(commands, list) 438 | p = None 439 | for c in commands: 440 | try: 441 | dispcmd = str([c] + args) 442 | # remember shell=False, so use git.cmd on windows, not just git 443 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 444 | stderr=(subprocess.PIPE if hide_stderr 445 | else None)) 446 | break 447 | except EnvironmentError: 448 | e = sys.exc_info()[1] 449 | if e.errno == errno.ENOENT: 450 | continue 451 | if verbose: 452 | print("unable to run %s" % dispcmd) 453 | print(e) 454 | return None 455 | else: 456 | if verbose: 457 | print("unable to find command, tried %s" % (commands,)) 458 | return None 459 | stdout = p.communicate()[0].strip() 460 | if sys.version_info[0] >= 3: 461 | stdout = stdout.decode() 462 | if p.returncode != 0: 463 | if verbose: 464 | print("unable to run %s (error)" % dispcmd) 465 | return None 466 | return stdout 467 | LONG_VERSION_PY['git'] = ''' 468 | # This file helps to compute a version number in source trees obtained from 469 | # git-archive tarball (such as those provided by githubs download-from-tag 470 | # feature). Distribution tarballs (built by setup.py sdist) and build 471 | # directories (produced by setup.py build) will contain a much shorter file 472 | # that just contains the computed version number. 473 | 474 | # This file is released into the public domain. Generated by 475 | # versioneer-0.15 (https://github.com/warner/python-versioneer) 476 | 477 | import errno 478 | import os 479 | import re 480 | import subprocess 481 | import sys 482 | 483 | 484 | def get_keywords(): 485 | # these strings will be replaced by git during git-archive. 486 | # setup.py/versioneer.py will grep for the variable names, so they must 487 | # each be defined on a line of their own. _version.py will just call 488 | # get_keywords(). 489 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" 490 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" 491 | keywords = {"refnames": git_refnames, "full": git_full} 492 | return keywords 493 | 494 | 495 | class VersioneerConfig: 496 | pass 497 | 498 | 499 | def get_config(): 500 | # these strings are filled in when 'setup.py versioneer' creates 501 | # _version.py 502 | cfg = VersioneerConfig() 503 | cfg.VCS = "git" 504 | cfg.style = "%(STYLE)s" 505 | cfg.tag_prefix = "%(TAG_PREFIX)s" 506 | cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" 507 | cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" 508 | cfg.verbose = False 509 | return cfg 510 | 511 | 512 | class NotThisMethod(Exception): 513 | pass 514 | 515 | 516 | LONG_VERSION_PY = {} 517 | HANDLERS = {} 518 | 519 | 520 | def register_vcs_handler(vcs, method): # decorator 521 | def decorate(f): 522 | if vcs not in HANDLERS: 523 | HANDLERS[vcs] = {} 524 | HANDLERS[vcs][method] = f 525 | return f 526 | return decorate 527 | 528 | 529 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 530 | assert isinstance(commands, list) 531 | p = None 532 | for c in commands: 533 | try: 534 | dispcmd = str([c] + args) 535 | # remember shell=False, so use git.cmd on windows, not just git 536 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 537 | stderr=(subprocess.PIPE if hide_stderr 538 | else None)) 539 | break 540 | except EnvironmentError: 541 | e = sys.exc_info()[1] 542 | if e.errno == errno.ENOENT: 543 | continue 544 | if verbose: 545 | print("unable to run %%s" %% dispcmd) 546 | print(e) 547 | return None 548 | else: 549 | if verbose: 550 | print("unable to find command, tried %%s" %% (commands,)) 551 | return None 552 | stdout = p.communicate()[0].strip() 553 | if sys.version_info[0] >= 3: 554 | stdout = stdout.decode() 555 | if p.returncode != 0: 556 | if verbose: 557 | print("unable to run %%s (error)" %% dispcmd) 558 | return None 559 | return stdout 560 | 561 | 562 | def versions_from_parentdir(parentdir_prefix, root, verbose): 563 | # Source tarballs conventionally unpack into a directory that includes 564 | # both the project name and a version string. 565 | dirname = os.path.basename(root) 566 | if not dirname.startswith(parentdir_prefix): 567 | if verbose: 568 | print("guessing rootdir is '%%s', but '%%s' doesn't start with " 569 | "prefix '%%s'" %% (root, dirname, parentdir_prefix)) 570 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 571 | return {"version": dirname[len(parentdir_prefix):], 572 | "full-revisionid": None, 573 | "dirty": False, "error": None} 574 | 575 | 576 | @register_vcs_handler("git", "get_keywords") 577 | def git_get_keywords(versionfile_abs): 578 | # the code embedded in _version.py can just fetch the value of these 579 | # keywords. When used from setup.py, we don't want to import _version.py, 580 | # so we do it with a regexp instead. This function is not used from 581 | # _version.py. 582 | keywords = {} 583 | try: 584 | f = open(versionfile_abs, "r") 585 | for line in f.readlines(): 586 | if line.strip().startswith("git_refnames ="): 587 | mo = re.search(r'=\s*"(.*)"', line) 588 | if mo: 589 | keywords["refnames"] = mo.group(1) 590 | if line.strip().startswith("git_full ="): 591 | mo = re.search(r'=\s*"(.*)"', line) 592 | if mo: 593 | keywords["full"] = mo.group(1) 594 | f.close() 595 | except EnvironmentError: 596 | pass 597 | return keywords 598 | 599 | 600 | @register_vcs_handler("git", "keywords") 601 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 602 | if not keywords: 603 | raise NotThisMethod("no keywords at all, weird") 604 | refnames = keywords["refnames"].strip() 605 | if refnames.startswith("$Format"): 606 | if verbose: 607 | print("keywords are unexpanded, not using") 608 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 609 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 610 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 611 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 612 | TAG = "tag: " 613 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 614 | if not tags: 615 | # Either we're using git < 1.8.3, or there really are no tags. We use 616 | # a heuristic: assume all version tags have a digit. The old git %%d 617 | # expansion behaves like git log --decorate=short and strips out the 618 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 619 | # between branches and tags. By ignoring refnames without digits, we 620 | # filter out many common branch names like "release" and 621 | # "stabilization", as well as "HEAD" and "master". 622 | tags = set([r for r in refs if re.search(r'\d', r)]) 623 | if verbose: 624 | print("discarding '%%s', no digits" %% ",".join(refs-tags)) 625 | if verbose: 626 | print("likely tags: %%s" %% ",".join(sorted(tags))) 627 | for ref in sorted(tags): 628 | # sorting will prefer e.g. "2.0" over "2.0rc1" 629 | if ref.startswith(tag_prefix): 630 | r = ref[len(tag_prefix):] 631 | if verbose: 632 | print("picking %%s" %% r) 633 | return {"version": r, 634 | "full-revisionid": keywords["full"].strip(), 635 | "dirty": False, "error": None 636 | } 637 | # no suitable tags, so version is "0+unknown", but full hex is still there 638 | if verbose: 639 | print("no suitable tags, using unknown + full revision id") 640 | return {"version": "0+unknown", 641 | "full-revisionid": keywords["full"].strip(), 642 | "dirty": False, "error": "no suitable tags"} 643 | 644 | 645 | @register_vcs_handler("git", "pieces_from_vcs") 646 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 647 | # this runs 'git' from the root of the source tree. This only gets called 648 | # if the git-archive 'subst' keywords were *not* expanded, and 649 | # _version.py hasn't already been rewritten with a short version string, 650 | # meaning we're inside a checked out source tree. 651 | 652 | if not os.path.exists(os.path.join(root, ".git")): 653 | if verbose: 654 | print("no .git in %%s" %% root) 655 | raise NotThisMethod("no .git directory") 656 | 657 | GITS = ["git"] 658 | if sys.platform == "win32": 659 | GITS = ["git.cmd", "git.exe"] 660 | # if there is a tag, this yields TAG-NUM-gHEX[-dirty] 661 | # if there are no tags, this yields HEX[-dirty] (no NUM) 662 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", 663 | "--always", "--long"], 664 | cwd=root) 665 | # --long was added in git-1.5.5 666 | if describe_out is None: 667 | raise NotThisMethod("'git describe' failed") 668 | describe_out = describe_out.strip() 669 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 670 | if full_out is None: 671 | raise NotThisMethod("'git rev-parse' failed") 672 | full_out = full_out.strip() 673 | 674 | pieces = {} 675 | pieces["long"] = full_out 676 | pieces["short"] = full_out[:7] # maybe improved later 677 | pieces["error"] = None 678 | 679 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 680 | # TAG might have hyphens. 681 | git_describe = describe_out 682 | 683 | # look for -dirty suffix 684 | dirty = git_describe.endswith("-dirty") 685 | pieces["dirty"] = dirty 686 | if dirty: 687 | git_describe = git_describe[:git_describe.rindex("-dirty")] 688 | 689 | # now we have TAG-NUM-gHEX or HEX 690 | 691 | if "-" in git_describe: 692 | # TAG-NUM-gHEX 693 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 694 | if not mo: 695 | # unparseable. Maybe git-describe is misbehaving? 696 | pieces["error"] = ("unable to parse git-describe output: '%%s'" 697 | %% describe_out) 698 | return pieces 699 | 700 | # tag 701 | full_tag = mo.group(1) 702 | if not full_tag.startswith(tag_prefix): 703 | if verbose: 704 | fmt = "tag '%%s' doesn't start with prefix '%%s'" 705 | print(fmt %% (full_tag, tag_prefix)) 706 | pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" 707 | %% (full_tag, tag_prefix)) 708 | return pieces 709 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 710 | 711 | # distance: number of commits since tag 712 | pieces["distance"] = int(mo.group(2)) 713 | 714 | # commit: short hex revision ID 715 | pieces["short"] = mo.group(3) 716 | 717 | else: 718 | # HEX: no tags 719 | pieces["closest-tag"] = None 720 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], 721 | cwd=root) 722 | pieces["distance"] = int(count_out) # total number of commits 723 | 724 | return pieces 725 | 726 | 727 | def plus_or_dot(pieces): 728 | if "+" in pieces.get("closest-tag", ""): 729 | return "." 730 | return "+" 731 | 732 | 733 | def render_pep440(pieces): 734 | # now build up version string, with post-release "local version 735 | # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 736 | # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 737 | 738 | # exceptions: 739 | # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 740 | 741 | if pieces["closest-tag"]: 742 | rendered = pieces["closest-tag"] 743 | if pieces["distance"] or pieces["dirty"]: 744 | rendered += plus_or_dot(pieces) 745 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) 746 | if pieces["dirty"]: 747 | rendered += ".dirty" 748 | else: 749 | # exception #1 750 | rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], 751 | pieces["short"]) 752 | if pieces["dirty"]: 753 | rendered += ".dirty" 754 | return rendered 755 | 756 | 757 | def render_pep440_pre(pieces): 758 | # TAG[.post.devDISTANCE] . No -dirty 759 | 760 | # exceptions: 761 | # 1: no tags. 0.post.devDISTANCE 762 | 763 | if pieces["closest-tag"]: 764 | rendered = pieces["closest-tag"] 765 | if pieces["distance"]: 766 | rendered += ".post.dev%%d" %% pieces["distance"] 767 | else: 768 | # exception #1 769 | rendered = "0.post.dev%%d" %% pieces["distance"] 770 | return rendered 771 | 772 | 773 | def render_pep440_post(pieces): 774 | # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that 775 | # .dev0 sorts backwards (a dirty tree will appear "older" than the 776 | # corresponding clean one), but you shouldn't be releasing software with 777 | # -dirty anyways. 778 | 779 | # exceptions: 780 | # 1: no tags. 0.postDISTANCE[.dev0] 781 | 782 | if pieces["closest-tag"]: 783 | rendered = pieces["closest-tag"] 784 | if pieces["distance"] or pieces["dirty"]: 785 | rendered += ".post%%d" %% pieces["distance"] 786 | if pieces["dirty"]: 787 | rendered += ".dev0" 788 | rendered += plus_or_dot(pieces) 789 | rendered += "g%%s" %% pieces["short"] 790 | else: 791 | # exception #1 792 | rendered = "0.post%%d" %% pieces["distance"] 793 | if pieces["dirty"]: 794 | rendered += ".dev0" 795 | rendered += "+g%%s" %% pieces["short"] 796 | return rendered 797 | 798 | 799 | def render_pep440_old(pieces): 800 | # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. 801 | 802 | # exceptions: 803 | # 1: no tags. 0.postDISTANCE[.dev0] 804 | 805 | if pieces["closest-tag"]: 806 | rendered = pieces["closest-tag"] 807 | if pieces["distance"] or pieces["dirty"]: 808 | rendered += ".post%%d" %% pieces["distance"] 809 | if pieces["dirty"]: 810 | rendered += ".dev0" 811 | else: 812 | # exception #1 813 | rendered = "0.post%%d" %% pieces["distance"] 814 | if pieces["dirty"]: 815 | rendered += ".dev0" 816 | return rendered 817 | 818 | 819 | def render_git_describe(pieces): 820 | # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty 821 | # --always' 822 | 823 | # exceptions: 824 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 825 | 826 | if pieces["closest-tag"]: 827 | rendered = pieces["closest-tag"] 828 | if pieces["distance"]: 829 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 830 | else: 831 | # exception #1 832 | rendered = pieces["short"] 833 | if pieces["dirty"]: 834 | rendered += "-dirty" 835 | return rendered 836 | 837 | 838 | def render_git_describe_long(pieces): 839 | # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty 840 | # --always -long'. The distance/hash is unconditional. 841 | 842 | # exceptions: 843 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 844 | 845 | if pieces["closest-tag"]: 846 | rendered = pieces["closest-tag"] 847 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 848 | else: 849 | # exception #1 850 | rendered = pieces["short"] 851 | if pieces["dirty"]: 852 | rendered += "-dirty" 853 | return rendered 854 | 855 | 856 | def render(pieces, style): 857 | if pieces["error"]: 858 | return {"version": "unknown", 859 | "full-revisionid": pieces.get("long"), 860 | "dirty": None, 861 | "error": pieces["error"]} 862 | 863 | if not style or style == "default": 864 | style = "pep440" # the default 865 | 866 | if style == "pep440": 867 | rendered = render_pep440(pieces) 868 | elif style == "pep440-pre": 869 | rendered = render_pep440_pre(pieces) 870 | elif style == "pep440-post": 871 | rendered = render_pep440_post(pieces) 872 | elif style == "pep440-old": 873 | rendered = render_pep440_old(pieces) 874 | elif style == "git-describe": 875 | rendered = render_git_describe(pieces) 876 | elif style == "git-describe-long": 877 | rendered = render_git_describe_long(pieces) 878 | else: 879 | raise ValueError("unknown style '%%s'" %% style) 880 | 881 | return {"version": rendered, "full-revisionid": pieces["long"], 882 | "dirty": pieces["dirty"], "error": None} 883 | 884 | 885 | def get_versions(): 886 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 887 | # __file__, we can work backwards from there to the root. Some 888 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 889 | # case we can only use expanded keywords. 890 | 891 | cfg = get_config() 892 | verbose = cfg.verbose 893 | 894 | try: 895 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 896 | verbose) 897 | except NotThisMethod: 898 | pass 899 | 900 | try: 901 | root = os.path.realpath(__file__) 902 | # versionfile_source is the relative path from the top of the source 903 | # tree (where the .git directory might live) to this file. Invert 904 | # this to find the root from __file__. 905 | for i in cfg.versionfile_source.split('/'): 906 | root = os.path.dirname(root) 907 | except NameError: 908 | return {"version": "0+unknown", "full-revisionid": None, 909 | "dirty": None, 910 | "error": "unable to find root of source tree"} 911 | 912 | try: 913 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 914 | return render(pieces, cfg.style) 915 | except NotThisMethod: 916 | pass 917 | 918 | try: 919 | if cfg.parentdir_prefix: 920 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 921 | except NotThisMethod: 922 | pass 923 | 924 | return {"version": "0+unknown", "full-revisionid": None, 925 | "dirty": None, 926 | "error": "unable to compute version"} 927 | ''' 928 | 929 | 930 | @register_vcs_handler("git", "get_keywords") 931 | def git_get_keywords(versionfile_abs): 932 | # the code embedded in _version.py can just fetch the value of these 933 | # keywords. When used from setup.py, we don't want to import _version.py, 934 | # so we do it with a regexp instead. This function is not used from 935 | # _version.py. 936 | keywords = {} 937 | try: 938 | f = open(versionfile_abs, "r") 939 | for line in f.readlines(): 940 | if line.strip().startswith("git_refnames ="): 941 | mo = re.search(r'=\s*"(.*)"', line) 942 | if mo: 943 | keywords["refnames"] = mo.group(1) 944 | if line.strip().startswith("git_full ="): 945 | mo = re.search(r'=\s*"(.*)"', line) 946 | if mo: 947 | keywords["full"] = mo.group(1) 948 | f.close() 949 | except EnvironmentError: 950 | pass 951 | return keywords 952 | 953 | 954 | @register_vcs_handler("git", "keywords") 955 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 956 | if not keywords: 957 | raise NotThisMethod("no keywords at all, weird") 958 | refnames = keywords["refnames"].strip() 959 | if refnames.startswith("$Format"): 960 | if verbose: 961 | print("keywords are unexpanded, not using") 962 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 963 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 964 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 965 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 966 | TAG = "tag: " 967 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 968 | if not tags: 969 | # Either we're using git < 1.8.3, or there really are no tags. We use 970 | # a heuristic: assume all version tags have a digit. The old git %d 971 | # expansion behaves like git log --decorate=short and strips out the 972 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 973 | # between branches and tags. By ignoring refnames without digits, we 974 | # filter out many common branch names like "release" and 975 | # "stabilization", as well as "HEAD" and "master". 976 | tags = set([r for r in refs if re.search(r'\d', r)]) 977 | if verbose: 978 | print("discarding '%s', no digits" % ",".join(refs-tags)) 979 | if verbose: 980 | print("likely tags: %s" % ",".join(sorted(tags))) 981 | for ref in sorted(tags): 982 | # sorting will prefer e.g. "2.0" over "2.0rc1" 983 | if ref.startswith(tag_prefix): 984 | r = ref[len(tag_prefix):] 985 | if verbose: 986 | print("picking %s" % r) 987 | return {"version": r, 988 | "full-revisionid": keywords["full"].strip(), 989 | "dirty": False, "error": None 990 | } 991 | # no suitable tags, so version is "0+unknown", but full hex is still there 992 | if verbose: 993 | print("no suitable tags, using unknown + full revision id") 994 | return {"version": "0+unknown", 995 | "full-revisionid": keywords["full"].strip(), 996 | "dirty": False, "error": "no suitable tags"} 997 | 998 | 999 | @register_vcs_handler("git", "pieces_from_vcs") 1000 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 1001 | # this runs 'git' from the root of the source tree. This only gets called 1002 | # if the git-archive 'subst' keywords were *not* expanded, and 1003 | # _version.py hasn't already been rewritten with a short version string, 1004 | # meaning we're inside a checked out source tree. 1005 | 1006 | if not os.path.exists(os.path.join(root, ".git")): 1007 | if verbose: 1008 | print("no .git in %s" % root) 1009 | raise NotThisMethod("no .git directory") 1010 | 1011 | GITS = ["git"] 1012 | if sys.platform == "win32": 1013 | GITS = ["git.cmd", "git.exe"] 1014 | # if there is a tag, this yields TAG-NUM-gHEX[-dirty] 1015 | # if there are no tags, this yields HEX[-dirty] (no NUM) 1016 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", 1017 | "--always", "--long"], 1018 | cwd=root) 1019 | # --long was added in git-1.5.5 1020 | if describe_out is None: 1021 | raise NotThisMethod("'git describe' failed") 1022 | describe_out = describe_out.strip() 1023 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 1024 | if full_out is None: 1025 | raise NotThisMethod("'git rev-parse' failed") 1026 | full_out = full_out.strip() 1027 | 1028 | pieces = {} 1029 | pieces["long"] = full_out 1030 | pieces["short"] = full_out[:7] # maybe improved later 1031 | pieces["error"] = None 1032 | 1033 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 1034 | # TAG might have hyphens. 1035 | git_describe = describe_out 1036 | 1037 | # look for -dirty suffix 1038 | dirty = git_describe.endswith("-dirty") 1039 | pieces["dirty"] = dirty 1040 | if dirty: 1041 | git_describe = git_describe[:git_describe.rindex("-dirty")] 1042 | 1043 | # now we have TAG-NUM-gHEX or HEX 1044 | 1045 | if "-" in git_describe: 1046 | # TAG-NUM-gHEX 1047 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 1048 | if not mo: 1049 | # unparseable. Maybe git-describe is misbehaving? 1050 | pieces["error"] = ("unable to parse git-describe output: '%s'" 1051 | % describe_out) 1052 | return pieces 1053 | 1054 | # tag 1055 | full_tag = mo.group(1) 1056 | if not full_tag.startswith(tag_prefix): 1057 | if verbose: 1058 | fmt = "tag '%s' doesn't start with prefix '%s'" 1059 | print(fmt % (full_tag, tag_prefix)) 1060 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 1061 | % (full_tag, tag_prefix)) 1062 | return pieces 1063 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 1064 | 1065 | # distance: number of commits since tag 1066 | pieces["distance"] = int(mo.group(2)) 1067 | 1068 | # commit: short hex revision ID 1069 | pieces["short"] = mo.group(3) 1070 | 1071 | else: 1072 | # HEX: no tags 1073 | pieces["closest-tag"] = None 1074 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], 1075 | cwd=root) 1076 | pieces["distance"] = int(count_out) # total number of commits 1077 | 1078 | return pieces 1079 | 1080 | 1081 | def do_vcs_install(manifest_in, versionfile_source, ipy): 1082 | GITS = ["git"] 1083 | if sys.platform == "win32": 1084 | GITS = ["git.cmd", "git.exe"] 1085 | files = [manifest_in, versionfile_source] 1086 | if ipy: 1087 | files.append(ipy) 1088 | try: 1089 | me = __file__ 1090 | if me.endswith(".pyc") or me.endswith(".pyo"): 1091 | me = os.path.splitext(me)[0] + ".py" 1092 | versioneer_file = os.path.relpath(me) 1093 | except NameError: 1094 | versioneer_file = "versioneer.py" 1095 | files.append(versioneer_file) 1096 | present = False 1097 | try: 1098 | f = open(".gitattributes", "r") 1099 | for line in f.readlines(): 1100 | if line.strip().startswith(versionfile_source): 1101 | if "export-subst" in line.strip().split()[1:]: 1102 | present = True 1103 | f.close() 1104 | except EnvironmentError: 1105 | pass 1106 | if not present: 1107 | f = open(".gitattributes", "a+") 1108 | f.write("%s export-subst\n" % versionfile_source) 1109 | f.close() 1110 | files.append(".gitattributes") 1111 | run_command(GITS, ["add", "--"] + files) 1112 | 1113 | 1114 | def versions_from_parentdir(parentdir_prefix, root, verbose): 1115 | # Source tarballs conventionally unpack into a directory that includes 1116 | # both the project name and a version string. 1117 | dirname = os.path.basename(root) 1118 | if not dirname.startswith(parentdir_prefix): 1119 | if verbose: 1120 | print("guessing rootdir is '%s', but '%s' doesn't start with " 1121 | "prefix '%s'" % (root, dirname, parentdir_prefix)) 1122 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 1123 | return {"version": dirname[len(parentdir_prefix):], 1124 | "full-revisionid": None, 1125 | "dirty": False, "error": None} 1126 | 1127 | SHORT_VERSION_PY = """ 1128 | # This file was generated by 'versioneer.py' (0.15) from 1129 | # revision-control system data, or from the parent directory name of an 1130 | # unpacked source archive. Distribution tarballs contain a pre-generated copy 1131 | # of this file. 1132 | 1133 | import json 1134 | import sys 1135 | 1136 | version_json = ''' 1137 | %s 1138 | ''' # END VERSION_JSON 1139 | 1140 | 1141 | def get_versions(): 1142 | return json.loads(version_json) 1143 | """ 1144 | 1145 | 1146 | def versions_from_file(filename): 1147 | try: 1148 | with open(filename) as f: 1149 | contents = f.read() 1150 | except EnvironmentError: 1151 | raise NotThisMethod("unable to read _version.py") 1152 | mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", 1153 | contents, re.M | re.S) 1154 | if not mo: 1155 | raise NotThisMethod("no version_json in _version.py") 1156 | return json.loads(mo.group(1)) 1157 | 1158 | 1159 | def write_to_version_file(filename, versions): 1160 | os.unlink(filename) 1161 | contents = json.dumps(versions, sort_keys=True, 1162 | indent=1, separators=(",", ": ")) 1163 | with open(filename, "w") as f: 1164 | f.write(SHORT_VERSION_PY % contents) 1165 | 1166 | print("set %s to '%s'" % (filename, versions["version"])) 1167 | 1168 | 1169 | def plus_or_dot(pieces): 1170 | if "+" in pieces.get("closest-tag", ""): 1171 | return "." 1172 | return "+" 1173 | 1174 | 1175 | def render_pep440(pieces): 1176 | # now build up version string, with post-release "local version 1177 | # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 1178 | # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 1179 | 1180 | # exceptions: 1181 | # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 1182 | 1183 | if pieces["closest-tag"]: 1184 | rendered = pieces["closest-tag"] 1185 | if pieces["distance"] or pieces["dirty"]: 1186 | rendered += plus_or_dot(pieces) 1187 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 1188 | if pieces["dirty"]: 1189 | rendered += ".dirty" 1190 | else: 1191 | # exception #1 1192 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 1193 | pieces["short"]) 1194 | if pieces["dirty"]: 1195 | rendered += ".dirty" 1196 | return rendered 1197 | 1198 | 1199 | def render_pep440_pre(pieces): 1200 | # TAG[.post.devDISTANCE] . No -dirty 1201 | 1202 | # exceptions: 1203 | # 1: no tags. 0.post.devDISTANCE 1204 | 1205 | if pieces["closest-tag"]: 1206 | rendered = pieces["closest-tag"] 1207 | if pieces["distance"]: 1208 | rendered += ".post.dev%d" % pieces["distance"] 1209 | else: 1210 | # exception #1 1211 | rendered = "0.post.dev%d" % pieces["distance"] 1212 | return rendered 1213 | 1214 | 1215 | def render_pep440_post(pieces): 1216 | # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that 1217 | # .dev0 sorts backwards (a dirty tree will appear "older" than the 1218 | # corresponding clean one), but you shouldn't be releasing software with 1219 | # -dirty anyways. 1220 | 1221 | # exceptions: 1222 | # 1: no tags. 0.postDISTANCE[.dev0] 1223 | 1224 | if pieces["closest-tag"]: 1225 | rendered = pieces["closest-tag"] 1226 | if pieces["distance"] or pieces["dirty"]: 1227 | rendered += ".post%d" % pieces["distance"] 1228 | if pieces["dirty"]: 1229 | rendered += ".dev0" 1230 | rendered += plus_or_dot(pieces) 1231 | rendered += "g%s" % pieces["short"] 1232 | else: 1233 | # exception #1 1234 | rendered = "0.post%d" % pieces["distance"] 1235 | if pieces["dirty"]: 1236 | rendered += ".dev0" 1237 | rendered += "+g%s" % pieces["short"] 1238 | return rendered 1239 | 1240 | 1241 | def render_pep440_old(pieces): 1242 | # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. 1243 | 1244 | # exceptions: 1245 | # 1: no tags. 0.postDISTANCE[.dev0] 1246 | 1247 | if pieces["closest-tag"]: 1248 | rendered = pieces["closest-tag"] 1249 | if pieces["distance"] or pieces["dirty"]: 1250 | rendered += ".post%d" % pieces["distance"] 1251 | if pieces["dirty"]: 1252 | rendered += ".dev0" 1253 | else: 1254 | # exception #1 1255 | rendered = "0.post%d" % pieces["distance"] 1256 | if pieces["dirty"]: 1257 | rendered += ".dev0" 1258 | return rendered 1259 | 1260 | 1261 | def render_git_describe(pieces): 1262 | # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty 1263 | # --always' 1264 | 1265 | # exceptions: 1266 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1267 | 1268 | if pieces["closest-tag"]: 1269 | rendered = pieces["closest-tag"] 1270 | if pieces["distance"]: 1271 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1272 | else: 1273 | # exception #1 1274 | rendered = pieces["short"] 1275 | if pieces["dirty"]: 1276 | rendered += "-dirty" 1277 | return rendered 1278 | 1279 | 1280 | def render_git_describe_long(pieces): 1281 | # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty 1282 | # --always -long'. The distance/hash is unconditional. 1283 | 1284 | # exceptions: 1285 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1286 | 1287 | if pieces["closest-tag"]: 1288 | rendered = pieces["closest-tag"] 1289 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1290 | else: 1291 | # exception #1 1292 | rendered = pieces["short"] 1293 | if pieces["dirty"]: 1294 | rendered += "-dirty" 1295 | return rendered 1296 | 1297 | 1298 | def render(pieces, style): 1299 | if pieces["error"]: 1300 | return {"version": "unknown", 1301 | "full-revisionid": pieces.get("long"), 1302 | "dirty": None, 1303 | "error": pieces["error"]} 1304 | 1305 | if not style or style == "default": 1306 | style = "pep440" # the default 1307 | 1308 | if style == "pep440": 1309 | rendered = render_pep440(pieces) 1310 | elif style == "pep440-pre": 1311 | rendered = render_pep440_pre(pieces) 1312 | elif style == "pep440-post": 1313 | rendered = render_pep440_post(pieces) 1314 | elif style == "pep440-old": 1315 | rendered = render_pep440_old(pieces) 1316 | elif style == "git-describe": 1317 | rendered = render_git_describe(pieces) 1318 | elif style == "git-describe-long": 1319 | rendered = render_git_describe_long(pieces) 1320 | else: 1321 | raise ValueError("unknown style '%s'" % style) 1322 | 1323 | return {"version": rendered, "full-revisionid": pieces["long"], 1324 | "dirty": pieces["dirty"], "error": None} 1325 | 1326 | 1327 | class VersioneerBadRootError(Exception): 1328 | pass 1329 | 1330 | 1331 | def get_versions(verbose=False): 1332 | # returns dict with two keys: 'version' and 'full' 1333 | 1334 | if "versioneer" in sys.modules: 1335 | # see the discussion in cmdclass.py:get_cmdclass() 1336 | del sys.modules["versioneer"] 1337 | 1338 | root = get_root() 1339 | cfg = get_config_from_root(root) 1340 | 1341 | assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" 1342 | handlers = HANDLERS.get(cfg.VCS) 1343 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS 1344 | verbose = verbose or cfg.verbose 1345 | assert cfg.versionfile_source is not None, \ 1346 | "please set versioneer.versionfile_source" 1347 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" 1348 | 1349 | versionfile_abs = os.path.join(root, cfg.versionfile_source) 1350 | 1351 | # extract version from first of: _version.py, VCS command (e.g. 'git 1352 | # describe'), parentdir. This is meant to work for developers using a 1353 | # source checkout, for users of a tarball created by 'setup.py sdist', 1354 | # and for users of a tarball/zipball created by 'git archive' or github's 1355 | # download-from-tag feature or the equivalent in other VCSes. 1356 | 1357 | get_keywords_f = handlers.get("get_keywords") 1358 | from_keywords_f = handlers.get("keywords") 1359 | if get_keywords_f and from_keywords_f: 1360 | try: 1361 | keywords = get_keywords_f(versionfile_abs) 1362 | ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) 1363 | if verbose: 1364 | print("got version from expanded keyword %s" % ver) 1365 | return ver 1366 | except NotThisMethod: 1367 | pass 1368 | 1369 | try: 1370 | ver = versions_from_file(versionfile_abs) 1371 | if verbose: 1372 | print("got version from file %s %s" % (versionfile_abs, ver)) 1373 | return ver 1374 | except NotThisMethod: 1375 | pass 1376 | 1377 | from_vcs_f = handlers.get("pieces_from_vcs") 1378 | if from_vcs_f: 1379 | try: 1380 | pieces = from_vcs_f(cfg.tag_prefix, root, verbose) 1381 | ver = render(pieces, cfg.style) 1382 | if verbose: 1383 | print("got version from VCS %s" % ver) 1384 | return ver 1385 | except NotThisMethod: 1386 | pass 1387 | 1388 | try: 1389 | if cfg.parentdir_prefix: 1390 | ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 1391 | if verbose: 1392 | print("got version from parentdir %s" % ver) 1393 | return ver 1394 | except NotThisMethod: 1395 | pass 1396 | 1397 | if verbose: 1398 | print("unable to compute version") 1399 | 1400 | return {"version": "0+unknown", "full-revisionid": None, 1401 | "dirty": None, "error": "unable to compute version"} 1402 | 1403 | 1404 | def get_version(): 1405 | return get_versions()["version"] 1406 | 1407 | 1408 | def get_cmdclass(): 1409 | if "versioneer" in sys.modules: 1410 | del sys.modules["versioneer"] 1411 | # this fixes the "python setup.py develop" case (also 'install' and 1412 | # 'easy_install .'), in which subdependencies of the main project are 1413 | # built (using setup.py bdist_egg) in the same python process. Assume 1414 | # a main project A and a dependency B, which use different versions 1415 | # of Versioneer. A's setup.py imports A's Versioneer, leaving it in 1416 | # sys.modules by the time B's setup.py is executed, causing B to run 1417 | # with the wrong versioneer. Setuptools wraps the sub-dep builds in a 1418 | # sandbox that restores sys.modules to it's pre-build state, so the 1419 | # parent is protected against the child's "import versioneer". By 1420 | # removing ourselves from sys.modules here, before the child build 1421 | # happens, we protect the child from the parent's versioneer too. 1422 | # Also see https://github.com/warner/python-versioneer/issues/52 1423 | 1424 | cmds = {} 1425 | 1426 | # we add "version" to both distutils and setuptools 1427 | from distutils.core import Command 1428 | 1429 | class cmd_version(Command): 1430 | description = "report generated version string" 1431 | user_options = [] 1432 | boolean_options = [] 1433 | 1434 | def initialize_options(self): 1435 | pass 1436 | 1437 | def finalize_options(self): 1438 | pass 1439 | 1440 | def run(self): 1441 | vers = get_versions(verbose=True) 1442 | print("Version: %s" % vers["version"]) 1443 | print(" full-revisionid: %s" % vers.get("full-revisionid")) 1444 | print(" dirty: %s" % vers.get("dirty")) 1445 | if vers["error"]: 1446 | print(" error: %s" % vers["error"]) 1447 | cmds["version"] = cmd_version 1448 | 1449 | # we override "build_py" in both distutils and setuptools 1450 | # 1451 | # most invocation pathways end up running build_py: 1452 | # distutils/build -> build_py 1453 | # distutils/install -> distutils/build ->.. 1454 | # setuptools/bdist_wheel -> distutils/install ->.. 1455 | # setuptools/bdist_egg -> distutils/install_lib -> build_py 1456 | # setuptools/install -> bdist_egg ->.. 1457 | # setuptools/develop -> ? 1458 | 1459 | from distutils.command.build_py import build_py as _build_py 1460 | 1461 | class cmd_build_py(_build_py): 1462 | def run(self): 1463 | root = get_root() 1464 | cfg = get_config_from_root(root) 1465 | versions = get_versions() 1466 | _build_py.run(self) 1467 | # now locate _version.py in the new build/ directory and replace 1468 | # it with an updated value 1469 | if cfg.versionfile_build: 1470 | target_versionfile = os.path.join(self.build_lib, 1471 | cfg.versionfile_build) 1472 | print("UPDATING %s" % target_versionfile) 1473 | write_to_version_file(target_versionfile, versions) 1474 | cmds["build_py"] = cmd_build_py 1475 | 1476 | if "cx_Freeze" in sys.modules: # cx_freeze enabled? 1477 | from cx_Freeze.dist import build_exe as _build_exe 1478 | 1479 | class cmd_build_exe(_build_exe): 1480 | def run(self): 1481 | root = get_root() 1482 | cfg = get_config_from_root(root) 1483 | versions = get_versions() 1484 | target_versionfile = cfg.versionfile_source 1485 | print("UPDATING %s" % target_versionfile) 1486 | write_to_version_file(target_versionfile, versions) 1487 | 1488 | _build_exe.run(self) 1489 | os.unlink(target_versionfile) 1490 | with open(cfg.versionfile_source, "w") as f: 1491 | LONG = LONG_VERSION_PY[cfg.VCS] 1492 | f.write(LONG % 1493 | {"DOLLAR": "$", 1494 | "STYLE": cfg.style, 1495 | "TAG_PREFIX": cfg.tag_prefix, 1496 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1497 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1498 | }) 1499 | cmds["build_exe"] = cmd_build_exe 1500 | del cmds["build_py"] 1501 | 1502 | # we override different "sdist" commands for both environments 1503 | if "setuptools" in sys.modules: 1504 | from setuptools.command.sdist import sdist as _sdist 1505 | else: 1506 | from distutils.command.sdist import sdist as _sdist 1507 | 1508 | class cmd_sdist(_sdist): 1509 | def run(self): 1510 | versions = get_versions() 1511 | self._versioneer_generated_versions = versions 1512 | # unless we update this, the command will keep using the old 1513 | # version 1514 | self.distribution.metadata.version = versions["version"] 1515 | return _sdist.run(self) 1516 | 1517 | def make_release_tree(self, base_dir, files): 1518 | root = get_root() 1519 | cfg = get_config_from_root(root) 1520 | _sdist.make_release_tree(self, base_dir, files) 1521 | # now locate _version.py in the new base_dir directory 1522 | # (remembering that it may be a hardlink) and replace it with an 1523 | # updated value 1524 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source) 1525 | print("UPDATING %s" % target_versionfile) 1526 | write_to_version_file(target_versionfile, 1527 | self._versioneer_generated_versions) 1528 | cmds["sdist"] = cmd_sdist 1529 | 1530 | return cmds 1531 | 1532 | 1533 | CONFIG_ERROR = """ 1534 | setup.cfg is missing the necessary Versioneer configuration. You need 1535 | a section like: 1536 | 1537 | [versioneer] 1538 | VCS = git 1539 | style = pep440 1540 | versionfile_source = src/myproject/_version.py 1541 | versionfile_build = myproject/_version.py 1542 | tag_prefix = "" 1543 | parentdir_prefix = myproject- 1544 | 1545 | You will also need to edit your setup.py to use the results: 1546 | 1547 | import versioneer 1548 | setup(version=versioneer.get_version(), 1549 | cmdclass=versioneer.get_cmdclass(), ...) 1550 | 1551 | Please read the docstring in ./versioneer.py for configuration instructions, 1552 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. 1553 | """ 1554 | 1555 | SAMPLE_CONFIG = """ 1556 | # See the docstring in versioneer.py for instructions. Note that you must 1557 | # re-run 'versioneer.py setup' after changing this section, and commit the 1558 | # resulting files. 1559 | 1560 | [versioneer] 1561 | #VCS = git 1562 | #style = pep440 1563 | #versionfile_source = 1564 | #versionfile_build = 1565 | #tag_prefix = 1566 | #parentdir_prefix = 1567 | 1568 | """ 1569 | 1570 | INIT_PY_SNIPPET = """ 1571 | from ._version import get_versions 1572 | __version__ = get_versions()['version'] 1573 | del get_versions 1574 | """ 1575 | 1576 | 1577 | def do_setup(): 1578 | root = get_root() 1579 | try: 1580 | cfg = get_config_from_root(root) 1581 | except (EnvironmentError, configparser.NoSectionError, 1582 | configparser.NoOptionError) as e: 1583 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)): 1584 | print("Adding sample versioneer config to setup.cfg", 1585 | file=sys.stderr) 1586 | with open(os.path.join(root, "setup.cfg"), "a") as f: 1587 | f.write(SAMPLE_CONFIG) 1588 | print(CONFIG_ERROR, file=sys.stderr) 1589 | return 1 1590 | 1591 | print(" creating %s" % cfg.versionfile_source) 1592 | with open(cfg.versionfile_source, "w") as f: 1593 | LONG = LONG_VERSION_PY[cfg.VCS] 1594 | f.write(LONG % {"DOLLAR": "$", 1595 | "STYLE": cfg.style, 1596 | "TAG_PREFIX": cfg.tag_prefix, 1597 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1598 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1599 | }) 1600 | 1601 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), 1602 | "__init__.py") 1603 | if os.path.exists(ipy): 1604 | try: 1605 | with open(ipy, "r") as f: 1606 | old = f.read() 1607 | except EnvironmentError: 1608 | old = "" 1609 | if INIT_PY_SNIPPET not in old: 1610 | print(" appending to %s" % ipy) 1611 | with open(ipy, "a") as f: 1612 | f.write(INIT_PY_SNIPPET) 1613 | else: 1614 | print(" %s unmodified" % ipy) 1615 | else: 1616 | print(" %s doesn't exist, ok" % ipy) 1617 | ipy = None 1618 | 1619 | # Make sure both the top-level "versioneer.py" and versionfile_source 1620 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so 1621 | # they'll be copied into source distributions. Pip won't be able to 1622 | # install the package without this. 1623 | manifest_in = os.path.join(root, "MANIFEST.in") 1624 | simple_includes = set() 1625 | try: 1626 | with open(manifest_in, "r") as f: 1627 | for line in f: 1628 | if line.startswith("include "): 1629 | for include in line.split()[1:]: 1630 | simple_includes.add(include) 1631 | except EnvironmentError: 1632 | pass 1633 | # That doesn't cover everything MANIFEST.in can do 1634 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so 1635 | # it might give some false negatives. Appending redundant 'include' 1636 | # lines is safe, though. 1637 | if "versioneer.py" not in simple_includes: 1638 | print(" appending 'versioneer.py' to MANIFEST.in") 1639 | with open(manifest_in, "a") as f: 1640 | f.write("include versioneer.py\n") 1641 | else: 1642 | print(" 'versioneer.py' already in MANIFEST.in") 1643 | if cfg.versionfile_source not in simple_includes: 1644 | print(" appending versionfile_source ('%s') to MANIFEST.in" % 1645 | cfg.versionfile_source) 1646 | with open(manifest_in, "a") as f: 1647 | f.write("include %s\n" % cfg.versionfile_source) 1648 | else: 1649 | print(" versionfile_source already in MANIFEST.in") 1650 | 1651 | # Make VCS-specific changes. For git, this means creating/changing 1652 | # .gitattributes to mark _version.py for export-time keyword 1653 | # substitution. 1654 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy) 1655 | return 0 1656 | 1657 | 1658 | def scan_setup_py(): 1659 | found = set() 1660 | setters = False 1661 | errors = 0 1662 | with open("setup.py", "r") as f: 1663 | for line in f.readlines(): 1664 | if "import versioneer" in line: 1665 | found.add("import") 1666 | if "versioneer.get_cmdclass()" in line: 1667 | found.add("cmdclass") 1668 | if "versioneer.get_version()" in line: 1669 | found.add("get_version") 1670 | if "versioneer.VCS" in line: 1671 | setters = True 1672 | if "versioneer.versionfile_source" in line: 1673 | setters = True 1674 | if len(found) != 3: 1675 | print("") 1676 | print("Your setup.py appears to be missing some important items") 1677 | print("(but I might be wrong). Please make sure it has something") 1678 | print("roughly like the following:") 1679 | print("") 1680 | print(" import versioneer") 1681 | print(" setup( version=versioneer.get_version(),") 1682 | print(" cmdclass=versioneer.get_cmdclass(), ...)") 1683 | print("") 1684 | errors += 1 1685 | if setters: 1686 | print("You should remove lines like 'versioneer.VCS = ' and") 1687 | print("'versioneer.versionfile_source = ' . This configuration") 1688 | print("now lives in setup.cfg, and should be removed from setup.py") 1689 | print("") 1690 | errors += 1 1691 | return errors 1692 | 1693 | if __name__ == "__main__": 1694 | cmd = sys.argv[1] 1695 | if cmd == "setup": 1696 | errors = do_setup() 1697 | errors += scan_setup_py() 1698 | if errors: 1699 | sys.exit(1) 1700 | --------------------------------------------------------------------------------