├── salt ├── roots │ ├── salt │ │ ├── top.sls │ │ ├── etc │ │ │ ├── theanorc │ │ │ └── matplotlibrc │ │ ├── bin │ │ │ ├── ipynb.upstart │ │ │ ├── sanity_check.py │ │ │ ├── run_tests │ │ │ └── distribute_setup.py │ │ └── charmer │ │ │ └── init.sls │ └── pillar │ │ ├── top.sls │ │ └── python34.sls └── minion ├── .gitignore ├── TODO ├── notebooks ├── Demo │ ├── cm.csv │ └── graph.html ├── Hello World.ipynb └── Snake Charmer QA.ipynb ├── Vagrantfile ├── FAQ.md ├── CUSTOMIZING.md └── README.md /salt/roots/salt/top.sls: -------------------------------------------------------------------------------- 1 | base: 2 | '*': 3 | - charmer 4 | 5 | -------------------------------------------------------------------------------- /salt/roots/salt/etc/theanorc: -------------------------------------------------------------------------------- 1 | [blas] 2 | ldflags = -lblas -lgfortran 3 | 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | *.log 3 | log 4 | data 5 | notebooks/last_commit.txt 6 | 7 | -------------------------------------------------------------------------------- /salt/roots/pillar/top.sls: -------------------------------------------------------------------------------- 1 | base: 2 | 'charmed34': 3 | - python34 4 | 5 | 6 | -------------------------------------------------------------------------------- /salt/minion: -------------------------------------------------------------------------------- 1 | file_client: local 2 | 3 | log_file: /srv/log/minion 4 | 5 | providers: 6 | service: upstart 7 | -------------------------------------------------------------------------------- /salt/roots/salt/bin/ipynb.upstart: -------------------------------------------------------------------------------- 1 | description "Run IPython Notebook server" 2 | 3 | start on filesystem or runlevel [2345] 4 | stop on runlevel [!2345] 5 | 6 | # if you want it to automatically restart if it crashes, leave the next line in 7 | respawn 8 | 9 | limit nofile 100000 100000 10 | 11 | {% set pyver = pillar['pyver'] %} 12 | {% set pyver_ints = pyver|replace('.', '') %} 13 | 14 | script 15 | cd /home/vagrant/notebooks 16 | su -c "python{{ pyver }} -m IPython notebook --matplotlib inline --ip='*' --port 88{{ pyver_ints }}" vagrant 17 | end script 18 | 19 | -------------------------------------------------------------------------------- /salt/roots/salt/bin/sanity_check.py: -------------------------------------------------------------------------------- 1 | #! {{ python }} 2 | 3 | import urllib.request 4 | from sys import argv 5 | 6 | {% set pkgs = pillar['pip_pkgs'] %} 7 | {# FIXME #} 8 | {% for pkg in pkgs %} 9 | 10 | {% if pkg['import'] is defined %} 11 | {% set name = pkg['import'] %} 12 | {% else %} 13 | {% set name = pkg['name']|lower %} 14 | {% endif %} 15 | 16 | print("Checking {{ name }} is intact...") 17 | import {{ name }} 18 | try: 19 | print("Imported {{ name }} %s OK" % {{ name }}.__version__) 20 | except AttributeError: 21 | print("Imported {{ name }} OK") 22 | 23 | {% endfor %} 24 | 25 | req = urllib.request.Request('{{ nb_url }}') 26 | response = urllib.request.urlopen(req) 27 | 28 | print("Your VM is up and running: {{ nb_url }}") 29 | 30 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | Add PyToolz, xlrd, dill 2 | 3 | 4 | Try Pyrasite, and Ian Ozsvald's memory profiler 5 | 6 | 7 | Add tests for lifelines 8 | 9 | 10 | Add link to this doc http://nbviewer.ipython.org/github/ptwobrussell/Mining-the-Social-Web-2nd-Edition/blob/master/ipynb/_Appendix%20C%20-%20Python%20%26%20IPython%20Notebook%20Tips.ipynb 11 | 12 | 13 | Standardize output file naming format -- name sympy and pytables tests the same way we do theano 14 | 15 | 16 | Check whether we need to use os.walk for sympy or pytables tests 17 | 18 | 19 | Install optional backends for matplotlib (latex etc.) 20 | 21 | 22 | .matplotlibrc to use Agg by default 23 | 24 | 25 | Set up ramdisk for theano cache during Salt provisioning: 26 | 27 | mount -t ramfs -o size=200m ramfs /home/vagrant/theano_ram_cache 28 | 29 | Then get Theano to use this during testing: 30 | 31 | THEANO_FLAGS="base_compiledir=/home/vagrant/theano_ram_cache" $PYTHON `which theano-nose` --theano 32 | 33 | * Should we cache Theano artifacts on the host? 34 | 35 | 36 | Need to add documentation on clearing Theano cache 37 | 38 | 39 | Re-enable Theano typed_list tests when this feature is released (https://github.com/andrewclegg/snake-charmer/issues/17) 40 | 41 | 42 | Build ATLAS from source, for speed 43 | 44 | 45 | OpenCV looks like a big project: 46 | 47 | https://help.ubuntu.com/community/OpenCV 48 | 49 | http://opencvlover.blogspot.co.uk/2014/01/installing-opencv-on-ubuntu.html 50 | 51 | http://stackoverflow.com/questions/20953273/install-opencv-for-python-3-3/21212023#21212023 52 | 53 | 54 | -------------------------------------------------------------------------------- /notebooks/Demo/cm.csv: -------------------------------------------------------------------------------- 1 | source,target,value 2 | Russian,French,0.03278688524590164 3 | Russian,Spanish,0.02564102564102564 4 | Russian,Ukrainian,0.22727272727272727 5 | French,German,0.14035087719298245 6 | French,Italian,0.018867924528301886 7 | French,Spanish,0.15384615384615385 8 | French,Dutch,0.1891891891891892 9 | French,Polish,0.08823529411764706 10 | Swedish,French,0.03278688524590164 11 | Swedish,Italian,0.03773584905660377 12 | Swedish,Spanish,0.02564102564102564 13 | Swedish,Dutch,0.08108108108108109 14 | German,French,0.09836065573770492 15 | German,Swedish,0.01639344262295082 16 | German,Italian,0.07547169811320754 17 | German,Spanish,0.10256410256410256 18 | German,Dutch,0.21621621621621623 19 | German,Polish,0.029411764705882353 20 | Italian,French,0.03278688524590164 21 | Italian,Swedish,0.01639344262295082 22 | Italian,German,0.15789473684210525 23 | Italian,Spanish,0.05128205128205128 24 | Italian,Dutch,0.08108108108108109 25 | Italian,Portuguese,0.1 26 | Spanish,French,0.08196721311475409 27 | Spanish,German,0.05263157894736842 28 | Spanish,Dutch,0.10810810810810811 29 | Spanish,Portuguese,0.2 30 | Dutch,French,0.08196721311475409 31 | Dutch,German,0.15789473684210525 32 | Dutch,Spanish,0.05128205128205128 33 | Dutch,Polish,0.08823529411764706 34 | Polish,French,0.06557377049180328 35 | Polish,German,0.17543859649122806 36 | Polish,Italian,0.018867924528301886 37 | Polish,Dutch,0.08108108108108109 38 | Polish,Japanese,0.2 39 | Ukrainian,Russian,0.06451612903225806 40 | Chinese,Dutch,0.10810810810810811 41 | Chinese,Japanese,0.4 42 | Portuguese,German,0.017543859649122806 43 | Portuguese,Spanish,0.07692307692307693 44 | Arabic,Chinese,0.09090909090909091 45 | Arabic,Farsi,0.4 46 | Farsi,Chinese,0.09090909090909091 47 | Japanese,Dutch,0.05405405405405406 48 | Hebrew,Dutch,0.05405405405405406 49 | Catalan,Spanish,0.02564102564102564 50 | Catalan,Dutch,0.02702702702702703 51 | Hungarian,Dutch,0.05405405405405406 52 | Bulgarian,Russian,0.016129032258064516 53 | Bulgarian,Ukrainian,0.09090909090909091 54 | Finnish,French,0.03278688524590164 55 | Finnish,Dutch,0.02702702702702703 56 | Turkish,German,0.03508771929824561 57 | Czech,Spanish,0.02564102564102564 58 | Norwegian,Dutch,0.02702702702702703 59 | Esperanto,French,0.01639344262295082 60 | Esperanto,Dutch,0.02702702702702703 61 | Indonesian,German,0.017543859649122806 62 | -------------------------------------------------------------------------------- /notebooks/Hello World.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:b385549ffbada96541e76ca5768491391e29f674ef8a86908276e3c39308fa79" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Welcome to Snake Charmer" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Run this for basic system information" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "collapsed": false, 28 | "input": [ 29 | "import sys\n", 30 | "%load_ext rmagic\n", 31 | "%load_ext octavemagic\n", 32 | "print('Python version %s\\n' % (sys.version))\n", 33 | "!cat /proc/version; echo; cat /etc/*-release; echo; free -m; echo\n", 34 | "!echo -n \"Last Github \"; cat ~/notebooks/last_commit.txt; echo\n", 35 | "%R -o rVer rVer = R.version$version.string\n", 36 | "print(rVer[0])\n", 37 | "%octave disp(['Octave v' (v = ver())(1).Version]);" 38 | ], 39 | "language": "python", 40 | "metadata": {}, 41 | "outputs": [] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Run this for a list of installed Python packages" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "collapsed": false, 53 | "input": [ 54 | "# Same as \"pip list\" for current interpreter's pip. Std. library excluded.\n", 55 | "from pip.util import get_installed_distributions\n", 56 | "print('\\n'.join(str(x)\n", 57 | " for x in sorted(get_installed_distributions(),\n", 58 | " key=lambda dist: dist.project_name.lower())))" 59 | ], 60 | "language": "python", 61 | "metadata": {}, 62 | "outputs": [] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Run this for a list of installed Ubuntu packages" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "collapsed": false, 74 | "input": [ 75 | "!dpkg -l" 76 | ], 77 | "language": "python", 78 | "metadata": {}, 79 | "outputs": [] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "## Run this for a graph" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "collapsed": false, 91 | "input": [ 92 | "import numpy as np\n", 93 | "import prettyplotlib as ppl\n", 94 | "np.random.seed(12)\n", 95 | "fig, ax = ppl.plt.subplots(1)\n", 96 | "ppl.hist(ax, np.random.randn(1000))" 97 | ], 98 | "language": "python", 99 | "metadata": {}, 100 | "outputs": [] 101 | } 102 | ], 103 | "metadata": {} 104 | } 105 | ] 106 | } -------------------------------------------------------------------------------- /salt/roots/salt/bin/run_tests: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PYTHON=$1 4 | DIR=$2 5 | 6 | rm -rf $DIR 7 | mkdir $DIR 8 | cd $DIR # child processes inherit CWD by default 9 | 10 | 11 | # PyTables, SymPy and PyMC are currently slowest -- and Theano too if we take 12 | # it off FAST_COMPILE. 13 | 14 | 15 | # Put nicely-behaved well-distributed tests that use all cores evenly first -- 16 | # just coincidence that both of these need root 17 | 18 | 19 | # Numba 20 | 21 | sudo $PYTHON -c "import numba; numba.testing.multitest()" > numba.out 2>&1 22 | 23 | # IPython.parallel 24 | 25 | sudo $PYTHON `which iptest3` --all show parallel > ipython.parallel.out 2>&1 26 | 27 | 28 | # Then just bring the others in slowly, with progressively lower priority 29 | 30 | 31 | # PyTables 32 | 33 | nice -n -14 $PYTHON /usr/local/lib/$PYTHON/dist-packages/tables/tests/test_all.py --heavy > pytables.out 2>&1 & 34 | 35 | 36 | # SymPy 37 | 38 | sleep 30 39 | 40 | nice -n -13 nosetests --exe sympy > sympy.out 2>&1 & 41 | 42 | 43 | # PyMC 44 | 45 | sleep 30 46 | 47 | nice -n -12 $PYTHON -c "import pymc; pymc.test('full')" > pymc.out 2>&1 & 48 | 49 | 50 | # Theano 51 | 52 | sleep 30 53 | 54 | export OLDFLAGS=$THEANO_FLAGS 55 | 56 | export THEANO_FLAGS='mode=FAST_COMPILE' # Although maybe it's a better test with full optimizations included? 57 | 58 | nice -n -11 $PYTHON `which theano-nose` --batch=100 --theano > theano.out 2>&1 & 59 | 60 | export THEANO_FLAGS=$OLDFLAGS 61 | 62 | 63 | # Matpotlib 64 | 65 | sleep 30 66 | 67 | nice -n -10 $PYTHON -c "import matplotlib; matplotlib.test()" > matplotlib.out 2>&1 & 68 | 69 | 70 | # NumPy 71 | 72 | sleep 30 73 | 74 | nice -n -9 $PYTHON -c "import numpy; numpy.test('full')" > numpy.out 2>&1 & 75 | 76 | 77 | # SciPy 78 | 79 | sleep 30 80 | 81 | nice -n -8 $PYTHON -c "import scipy; scipy.test('full')" > scipy.out 2>&1 & 82 | 83 | 84 | # IPython 85 | 86 | nice -n -7 $PYTHON `which iptest3` --all show html kernel.inprocess autoreload terminal core kernel lib qt nbformat testing nbconvert config extensions utils > ipython.OTHERS.out 2>&1 & 87 | 88 | 89 | # Pandas 90 | 91 | sleep 30 92 | 93 | nice -n -6 $PYTHON /usr/local/bin/nosetests pandas > pandas.out 2>&1 & 94 | 95 | 96 | # scikit-learn 97 | 98 | sleep 30 99 | 100 | nice -n -5 $PYTHON /usr/local/bin/nosetests sklearn --exe > scikit-learn.out 2>&1 & 101 | 102 | 103 | # Joblib 104 | 105 | sleep 30 106 | 107 | nice -n -4 $PYTHON /usr/local/bin/nosetests joblib > joblib.out 2>&1 & 108 | 109 | 110 | # Pillow 111 | 112 | sleep 30 113 | 114 | (ln -s /srv/cache/src/Pillow/Images . && nice -n -3 $PYTHON /srv/cache/src/Pillow/selftest.py --installed) > pillow.out 2>&1 & 115 | 116 | 117 | # NLTK 118 | 119 | sleep 30 120 | 121 | nice -n -2 $PYTHON /usr/local/lib/$PYTHON/dist-packages/nltk/test/runtests.py > nltk.out 2>&1 & 122 | 123 | 124 | # gensim 125 | 126 | sleep 30 127 | 128 | (cd /srv/cache/src/gensim && nice -n -1 $PYTHON setup.py test) > gensim.out 2>&1 & 129 | 130 | 131 | # Cython 132 | 133 | sleep 30 134 | 135 | (cd /srv/cache/src/Cython/ && nice -n 0 $PYTHON runtests.py -vv) > cython.out 2>&1 & 136 | 137 | 138 | # Bottleneck 139 | 140 | sleep 30 141 | 142 | nice -n 1 $PYTHON -c "import bottleneck as bn; bn.test()" > bottleneck.out 2>&1 & 143 | 144 | 145 | # Statsmodels 146 | 147 | sleep 30 148 | 149 | nice -n 2 $PYTHON -c "import statsmodels as sm; sm.test()" > statsmodels.out 2>&1 & 150 | 151 | 152 | # Numexpr 153 | 154 | sleep 30 155 | 156 | nice -n 3 $PYTHON /usr/local/bin/nosetests numexpr > numexpr.out 2>&1 & 157 | 158 | 159 | # Patsy 160 | 161 | sleep 30 162 | 163 | nice -n 4 $PYTHON /usr/local/bin/nosetests patsy > patsy.out 2>&1 & 164 | 165 | 166 | wait 167 | 168 | -------------------------------------------------------------------------------- /notebooks/Demo/graph.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 41 |
42 | 189 | 190 |